Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(308)

Side by Side Diff: swarm_client/trace_inputs.py

Issue 69143004: Delete swarm_client. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/
Patch Set: Created 7 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « swarm_client/tools/zip_run_isolated.py ('k') | swarm_client/utils/file_path.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 #!/usr/bin/env python
2 # coding=utf-8
3 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
4 # Use of this source code is governed by a BSD-style license that can be
5 # found in the LICENSE file.
6
7 """Traces an executable and its child processes and extract the files accessed
8 by them.
9
10 The implementation uses OS-specific API. The native Kernel logger and the ETL
11 interface is used on Windows. Dtrace is used on OSX. Strace is used otherwise.
12 The OS-specific implementation is hidden in an 'API' interface.
13
14 The results are embedded in a Results instance. The tracing is done in two
15 phases, the first is to do the actual trace and generate an
16 implementation-specific log file. Then the log file is parsed to extract the
17 information, including the individual child processes and the files accessed
18 from the log.
19 """
20
21 import codecs
22 import csv
23 import errno
24 import getpass
25 import glob
26 import json
27 import logging
28 import os
29 import re
30 import stat
31 import subprocess
32 import sys
33 import tempfile
34 import threading
35 import time
36 import weakref
37
38 from third_party import colorama
39 from third_party.depot_tools import fix_encoding
40 from third_party.depot_tools import subcommand
41
42 from utils import file_path
43 from utils import tools
44
45 ## OS-specific imports
46
47 if sys.platform == 'win32':
48 from ctypes.wintypes import byref, c_int, c_wchar_p
49 from ctypes.wintypes import windll # pylint: disable=E0611
50
51
52 __version__ = '0.1'
53
54
55 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
56 ROOT_DIR = os.path.dirname(os.path.dirname(BASE_DIR))
57
58
59 class TracingFailure(Exception):
60 """An exception occured during tracing."""
61 def __init__(self, description, pid, line_number, line, *args):
62 super(TracingFailure, self).__init__(
63 description, pid, line_number, line, *args)
64 self.description = description
65 self.pid = pid
66 self.line_number = line_number
67 self.line = line
68 self.extra = args
69
70 def __str__(self):
71 out = self.description
72 if self.pid:
73 out += '\npid: %d' % self.pid
74 if self.line_number:
75 out += '\nline: %d' % self.line_number
76 if self.line:
77 out += '\n%s' % self.line
78 if self.extra:
79 out += '\n' + ', '.join(map(str, filter(None, self.extra)))
80 return out
81
82
83 ## OS-specific functions
84
85 if sys.platform == 'win32':
86 def get_current_encoding():
87 """Returns the 'ANSI' code page associated to the process."""
88 return 'cp%d' % int(windll.kernel32.GetACP())
89
90
91 def CommandLineToArgvW(command_line):
92 """Splits a commandline into argv using CommandLineToArgvW()."""
93 # http://msdn.microsoft.com/library/windows/desktop/bb776391.aspx
94 size = c_int()
95 assert isinstance(command_line, unicode)
96 ptr = windll.shell32.CommandLineToArgvW(command_line, byref(size))
97 try:
98 return [arg for arg in (c_wchar_p * size.value).from_address(ptr)]
99 finally:
100 windll.kernel32.LocalFree(ptr)
101
102
103
104 def gen_blacklist(regexes):
105 """Returns a lambda to be used as a blacklist."""
106 compiled = [re.compile(i) for i in regexes]
107 def match(f):
108 return any(j.match(f) for j in compiled)
109 return match
110
111
112 def create_subprocess_thunk():
113 """Creates a small temporary script to start the child process.
114
115 This thunk doesn't block, its unique name is used to identify it as the
116 parent.
117 """
118 handle, name = tempfile.mkstemp(prefix='trace_inputs_thunk', suffix='.py')
119 try:
120 os.write(
121 handle,
122 (
123 'import subprocess, sys\n'
124 'sys.exit(subprocess.call(sys.argv[2:]))\n'
125 ))
126 finally:
127 os.close(handle)
128 return name
129
130
131 def create_exec_thunk():
132 """Creates a small temporary script to start the child executable.
133
134 Reads from the file handle provided as the fisrt argument to block, then
135 execv() the command to be traced.
136 """
137 handle, name = tempfile.mkstemp(prefix='trace_inputs_thunk', suffix='.py')
138 try:
139 os.write(
140 handle,
141 (
142 'import os, sys\n'
143 'fd = int(sys.argv[1])\n'
144 # This will block until the controlling process writes a byte on the
145 # pipe. It will do so once the tracing tool, e.g. strace, is ready to
146 # trace.
147 'os.read(fd, 1)\n'
148 'os.close(fd)\n'
149 'os.execv(sys.argv[2], sys.argv[2:])\n'
150 ))
151 finally:
152 os.close(handle)
153 return name
154
155
156 def strace_process_quoted_arguments(text):
157 """Extracts quoted arguments on a string and return the arguments as a list.
158
159 Implemented as an automaton. Supports incomplete strings in the form
160 '"foo"...'.
161
162 Example:
163 With text = '"foo", "bar"', the function will return ['foo', 'bar']
164
165 TODO(maruel): Implement escaping.
166 """
167 # All the possible states of the DFA.
168 ( NEED_QUOTE, # Begining of a new arguments.
169 INSIDE_STRING, # Inside an argument.
170 ESCAPED, # Found a '\' inside a quote. Treat the next char as-is.
171 NEED_COMMA_OR_DOT, # Right after the closing quote of an argument. Could be
172 # a serie of 3 dots or a comma.
173 NEED_SPACE, # Right after a comma
174 NEED_DOT_2, # Found a dot, need a second one.
175 NEED_DOT_3, # Found second dot, need a third one.
176 NEED_COMMA, # Found third dot, need a comma.
177 ) = range(8)
178
179 state = NEED_QUOTE
180 out = []
181 for index, char in enumerate(text):
182 if char == '"':
183 if state == NEED_QUOTE:
184 state = INSIDE_STRING
185 # A new argument was found.
186 out.append('')
187 elif state == INSIDE_STRING:
188 # The argument is now closed.
189 state = NEED_COMMA_OR_DOT
190 elif state == ESCAPED:
191 out[-1] += char
192 state = INSIDE_STRING
193 else:
194 raise ValueError(
195 'Can\'t process char \'%s\' at column %d for: %r' % (
196 char, index, text),
197 index,
198 text)
199 elif char == ',':
200 if state in (NEED_COMMA_OR_DOT, NEED_COMMA):
201 state = NEED_SPACE
202 elif state == INSIDE_STRING:
203 out[-1] += char
204 elif state == ESCAPED:
205 out[-1] += char
206 state = INSIDE_STRING
207 else:
208 raise ValueError(
209 'Can\'t process char \'%s\' at column %d for: %r' % (
210 char, index, text),
211 index,
212 text)
213 elif char == ' ':
214 if state == NEED_SPACE:
215 state = NEED_QUOTE
216 elif state == INSIDE_STRING:
217 out[-1] += char
218 elif state == ESCAPED:
219 out[-1] += char
220 state = INSIDE_STRING
221 else:
222 raise ValueError(
223 'Can\'t process char \'%s\' at column %d for: %r' % (
224 char, index, text),
225 index,
226 text)
227 elif char == '.':
228 if state in (NEED_QUOTE, NEED_COMMA_OR_DOT):
229 # The string is incomplete, this mean the strace -s flag should be
230 # increased.
231 # For NEED_QUOTE, the input string would look like '"foo", ...'.
232 # For NEED_COMMA_OR_DOT, the input string would look like '"foo"...'
233 state = NEED_DOT_2
234 elif state == NEED_DOT_2:
235 state = NEED_DOT_3
236 elif state == NEED_DOT_3:
237 state = NEED_COMMA
238 elif state == INSIDE_STRING:
239 out[-1] += char
240 elif state == ESCAPED:
241 out[-1] += char
242 state = INSIDE_STRING
243 else:
244 raise ValueError(
245 'Can\'t process char \'%s\' at column %d for: %r' % (
246 char, index, text),
247 index,
248 text)
249 elif char == '\\':
250 if state == ESCAPED:
251 out[-1] += char
252 state = INSIDE_STRING
253 elif state == INSIDE_STRING:
254 state = ESCAPED
255 else:
256 raise ValueError(
257 'Can\'t process char \'%s\' at column %d for: %r' % (
258 char, index, text),
259 index,
260 text)
261 else:
262 if state == INSIDE_STRING:
263 out[-1] += char
264 else:
265 raise ValueError(
266 'Can\'t process char \'%s\' at column %d for: %r' % (
267 char, index, text),
268 index,
269 text)
270 if state not in (NEED_COMMA, NEED_COMMA_OR_DOT):
271 raise ValueError(
272 'String is incorrectly terminated: %r' % text,
273 text)
274 return out
275
276
277 def read_json(filepath):
278 with open(filepath, 'r') as f:
279 return json.load(f)
280
281
282 def write_json(filepath_or_handle, data, dense):
283 """Writes data into filepath or file handle encoded as json.
284
285 If dense is True, the json is packed. Otherwise, it is human readable.
286 """
287 if hasattr(filepath_or_handle, 'write'):
288 if dense:
289 filepath_or_handle.write(
290 json.dumps(data, sort_keys=True, separators=(',',':')))
291 else:
292 filepath_or_handle.write(json.dumps(data, sort_keys=True, indent=2))
293 else:
294 with open(filepath_or_handle, 'wb') as f:
295 if dense:
296 json.dump(data, f, sort_keys=True, separators=(',',':'))
297 else:
298 json.dump(data, f, sort_keys=True, indent=2)
299
300
301 def assert_is_renderable(pseudo_string):
302 """Asserts the input is a valid object to be processed by render()."""
303 assert (
304 pseudo_string is None or
305 isinstance(pseudo_string, unicode) or
306 hasattr(pseudo_string, 'render')), repr(pseudo_string)
307
308
309 def render(pseudo_string):
310 """Converts the pseudo-string to an unicode string."""
311 if pseudo_string is None or isinstance(pseudo_string, unicode):
312 return pseudo_string
313 return pseudo_string.render()
314
315
316 class Results(object):
317 """Results of a trace session."""
318
319 class _TouchedObject(object):
320 """Something, a file or a directory, that was accessed."""
321 def __init__(self, root, path, tainted, size, nb_files):
322 logging.debug(
323 '%s(%s, %s, %s, %s, %s)' %
324 (self.__class__.__name__, root, path, tainted, size, nb_files))
325 assert_is_renderable(root)
326 assert_is_renderable(path)
327 self.root = root
328 self.path = path
329 self.tainted = tainted
330 self.nb_files = nb_files
331 # Can be used as a cache or a default value, depending on context. In
332 # particular, once self.tainted is True, because the path was replaced
333 # with a variable, it is not possible to look up the file size.
334 self._size = size
335 # These are cache only.
336 self._real_path = None
337
338 # Check internal consistency.
339 assert path, path
340 assert tainted or bool(root) != bool(file_path.isabs(path)), (root, path)
341 assert tainted or (
342 not os.path.exists(self.full_path) or
343 (self.full_path == file_path.get_native_path_case(self.full_path))), (
344 tainted,
345 self.full_path,
346 file_path.get_native_path_case(self.full_path))
347
348 @property
349 def existent(self):
350 return self.size != -1
351
352 @property
353 def full_path(self):
354 if self.root:
355 return os.path.join(self.root, self.path)
356 return self.path
357
358 @property
359 def real_path(self):
360 """Returns the path with symlinks resolved."""
361 if not self._real_path:
362 self._real_path = os.path.realpath(self.full_path)
363 return self._real_path
364
365 @property
366 def size(self):
367 """File's size. -1 is not existent.
368
369 Once tainted, it is not possible the retrieve the file size anymore since
370 the path is composed of variables.
371 """
372 if self._size is None and not self.tainted:
373 try:
374 self._size = os.stat(self.full_path).st_size
375 except OSError:
376 self._size = -1
377 return self._size
378
379 def flatten(self):
380 """Returns a dict representing this object.
381
382 A 'size' of 0 means the file was only touched and not read.
383 """
384 return {
385 'path': self.path,
386 'size': self.size,
387 }
388
389 def replace_variables(self, variables):
390 """Replaces the root of this File with one of the variables if it matches.
391
392 If a variable replacement occurs, the cloned object becomes tainted.
393 """
394 for variable, root_path in variables.iteritems():
395 if self.path.startswith(root_path):
396 return self._clone(
397 self.root, variable + self.path[len(root_path):], True)
398 # No need to clone, returns ourself.
399 return self
400
401 def strip_root(self, root):
402 """Returns a clone of itself with 'root' stripped off.
403
404 Note that the file is kept if it is either accessible from a symlinked
405 path that was used to access the file or through the real path.
406 """
407 # Check internal consistency.
408 assert (
409 self.tainted or
410 (file_path.isabs(root) and root.endswith(os.path.sep))), root
411 if not self.full_path.startswith(root):
412 # Now try to resolve the symlinks to see if it can be reached this way.
413 # Only try *after* trying without resolving symlink.
414 if not self.real_path.startswith(root):
415 return None
416 path = self.real_path
417 else:
418 path = self.full_path
419 return self._clone(root, path[len(root):], self.tainted)
420
421 def _clone(self, new_root, new_path, tainted):
422 raise NotImplementedError(self.__class__.__name__)
423
424 class File(_TouchedObject):
425 """A file that was accessed. May not be present anymore.
426
427 If tainted is true, it means it is not a real path anymore as a variable
428 replacement occured.
429
430 |mode| can be one of None, TOUCHED, READ or WRITE.
431 """
432 # Was probed for existence, and it is existent, but was never _opened_.
433 TOUCHED = 't'
434 # Opened for read only and guaranteed to not have been written to.
435 READ = 'r'
436 # Opened for write.
437 WRITE = 'w'
438
439 # They are listed in order of priority. E.g. if a file is traced as TOUCHED
440 # then as WRITE, only keep WRITE. None means no idea, which is a problem on
441 # Windows.
442 ACCEPTABLE_MODES = (None, TOUCHED, READ, WRITE)
443
444 def __init__(self, root, path, tainted, size, mode):
445 assert mode in self.ACCEPTABLE_MODES
446 super(Results.File, self).__init__(root, path, tainted, size, 1)
447 self.mode = mode
448
449 def _clone(self, new_root, new_path, tainted):
450 """Clones itself keeping meta-data."""
451 # Keep the self.size and self._real_path caches for performance reason. It
452 # is also important when the file becomes tainted (with a variable instead
453 # of the real path) since self.path is not an on-disk path anymore so
454 # out._size cannot be updated.
455 out = self.__class__(new_root, new_path, tainted, self.size, self.mode)
456 out._real_path = self._real_path
457 return out
458
459 def flatten(self):
460 out = super(Results.File, self).flatten()
461 out['mode'] = self.mode
462 return out
463
464 class Directory(_TouchedObject):
465 """A directory of files. Must exist.
466
467 For a Directory instance, self.size is not a cache, it's an actual value
468 that is never modified and represents the total size of the files contained
469 in this directory. It is possible that the directory is empty so that
470 size==0; this happens if there's only an invalid symlink in it.
471 """
472 def __init__(self, root, path, tainted, size, nb_files):
473 """path='.' is a valid value and must be handled appropriately."""
474 assert not path.endswith(os.path.sep), path
475 super(Results.Directory, self).__init__(
476 root, path + os.path.sep, tainted, size, nb_files)
477
478 def flatten(self):
479 out = super(Results.Directory, self).flatten()
480 out['nb_files'] = self.nb_files
481 return out
482
483 def _clone(self, new_root, new_path, tainted):
484 """Clones itself keeping meta-data."""
485 out = self.__class__(
486 new_root,
487 new_path.rstrip(os.path.sep),
488 tainted,
489 self.size,
490 self.nb_files)
491 out._real_path = self._real_path
492 return out
493
494 class Process(object):
495 """A process that was traced.
496
497 Contains references to the files accessed by this process and its children.
498 """
499 def __init__(self, pid, files, executable, command, initial_cwd, children):
500 logging.debug('Process(%s, %d, ...)' % (pid, len(files)))
501 self.pid = pid
502 self.files = sorted(files, key=lambda x: x.path)
503 self.children = children
504 self.executable = executable
505 self.command = command
506 self.initial_cwd = initial_cwd
507
508 # Check internal consistency.
509 assert len(set(f.path for f in self.files)) == len(self.files), sorted(
510 f.path for f in self.files)
511 assert isinstance(self.children, list)
512 assert isinstance(self.files, list)
513
514 @property
515 def all(self):
516 for child in self.children:
517 for i in child.all:
518 yield i
519 yield self
520
521 def flatten(self):
522 return {
523 'children': [c.flatten() for c in self.children],
524 'command': self.command,
525 'executable': self.executable,
526 'files': [f.flatten() for f in self.files],
527 'initial_cwd': self.initial_cwd,
528 'pid': self.pid,
529 }
530
531 def strip_root(self, root):
532 assert file_path.isabs(root) and root.endswith(os.path.sep), root
533 # Loads the files after since they are constructed as objects.
534 out = self.__class__(
535 self.pid,
536 filter(None, (f.strip_root(root) for f in self.files)),
537 self.executable,
538 self.command,
539 self.initial_cwd,
540 [c.strip_root(root) for c in self.children])
541 logging.debug(
542 'strip_root(%s) %d -> %d' % (root, len(self.files), len(out.files)))
543 return out
544
545 def __init__(self, process):
546 self.process = process
547 # Cache.
548 self._files = None
549
550 def flatten(self):
551 return {
552 'root': self.process.flatten(),
553 }
554
555 @property
556 def files(self):
557 if self._files is None:
558 self._files = sorted(
559 sum((p.files for p in self.process.all), []),
560 key=lambda x: x.path)
561 return self._files
562
563 @property
564 def existent(self):
565 return [f for f in self.files if f.existent]
566
567 @property
568 def non_existent(self):
569 return [f for f in self.files if not f.existent]
570
571 def strip_root(self, root):
572 """Returns a clone with all the files outside the directory |root| removed
573 and converts all the path to be relative paths.
574
575 It keeps files accessible through the |root| directory or that have been
576 accessed through any symlink which points to the same directory.
577 """
578 # Resolve any symlink
579 root = os.path.realpath(root)
580 root = (
581 file_path.get_native_path_case(root).rstrip(os.path.sep) + os.path.sep)
582 logging.debug('strip_root(%s)' % root)
583 return Results(self.process.strip_root(root))
584
585
586 class ApiBase(object):
587 """OS-agnostic API to trace a process and its children."""
588 class Context(object):
589 """Processes one log line at a time and keeps the list of traced processes.
590
591 The parsing is complicated by the fact that logs are traced out of order for
592 strace but in-order for dtrace and logman. In addition, on Windows it is
593 very frequent that processids are reused so a flat list cannot be used. But
594 at the same time, it is impossible to faithfully construct a graph when the
595 logs are processed out of order. So both a tree and a flat mapping are used,
596 the tree is the real process tree, while the flat mapping stores the last
597 valid process for the corresponding processid. For the strace case, the
598 tree's head is guessed at the last moment.
599 """
600 class Process(object):
601 """Keeps context for one traced child process.
602
603 Logs all the files this process touched. Ignores directories.
604 """
605 def __init__(self, blacklist, pid, initial_cwd):
606 # Check internal consistency.
607 assert isinstance(pid, int), repr(pid)
608 assert_is_renderable(initial_cwd)
609 self.pid = pid
610 # children are Process instances.
611 self.children = []
612 self.initial_cwd = initial_cwd
613 self.cwd = None
614 self.files = {}
615 self.executable = None
616 self.command = None
617 self._blacklist = blacklist
618
619 def to_results_process(self):
620 """Resolves file case sensitivity and or late-bound strings."""
621 # When resolving files, it's normal to get dupe because a file could be
622 # opened multiple times with different case. Resolve the deduplication
623 # here.
624 def fix_path(x):
625 """Returns the native file path case.
626
627 Converts late-bound strings.
628 """
629 if not x:
630 # Do not convert None instance to 'None'.
631 return x
632 x = render(x)
633 if os.path.isabs(x):
634 # If the path is not absolute, which tends to happen occasionally on
635 # Windows, it is not possible to get the native path case so ignore
636 # that trace. It mostly happens for 'executable' value.
637 x = file_path.get_native_path_case(x)
638 return x
639
640 def fix_and_blacklist_path(x, m):
641 """Receives a tuple (filepath, mode) and processes filepath."""
642 x = fix_path(x)
643 if not x:
644 return
645 # The blacklist needs to be reapplied, since path casing could
646 # influence blacklisting.
647 if self._blacklist(x):
648 return
649 # Filters out directories. Some may have passed through.
650 if os.path.isdir(x):
651 return
652 return x, m
653
654 # Renders all the files as strings, as some could be RelativePath
655 # instances. It is important to do it first since there could still be
656 # multiple entries with the same path but different modes.
657 rendered = (
658 fix_and_blacklist_path(f, m) for f, m in self.files.iteritems())
659 files = sorted(
660 (f for f in rendered if f),
661 key=lambda x: (x[0], Results.File.ACCEPTABLE_MODES.index(x[1])))
662 # Then converting into a dict will automatically clean up lesser
663 # important values.
664 files = [
665 Results.File(None, f, False, None, m)
666 for f, m in dict(files).iteritems()
667 ]
668 return Results.Process(
669 self.pid,
670 files,
671 fix_path(self.executable),
672 self.command,
673 fix_path(self.initial_cwd),
674 [c.to_results_process() for c in self.children])
675
676 def add_file(self, filepath, mode):
677 """Adds a file if it passes the blacklist."""
678 if self._blacklist(render(filepath)):
679 return
680 logging.debug('add_file(%d, %s, %s)', self.pid, filepath, mode)
681 # Note that filepath and not render(filepath) is added. It is because
682 # filepath could be something else than a string, like a RelativePath
683 # instance for dtrace logs.
684 modes = Results.File.ACCEPTABLE_MODES
685 old_mode = self.files.setdefault(filepath, mode)
686 if old_mode != mode and modes.index(old_mode) < modes.index(mode):
687 # Take the highest value.
688 self.files[filepath] = mode
689
690 def __init__(self, blacklist):
691 self.blacklist = blacklist
692 # Initial process.
693 self.root_process = None
694 # dict to accelerate process lookup, to not have to lookup the whole graph
695 # each time.
696 self._process_lookup = {}
697
698 class Tracer(object):
699 """During it's lifetime, the tracing subsystem is enabled."""
700 def __init__(self, logname):
701 self._logname = logname
702 self._lock = threading.RLock()
703 self._traces = []
704 self._initialized = True
705 self._scripts_to_cleanup = []
706
707 def trace(self, cmd, cwd, tracename, output):
708 """Runs the OS-specific trace program on an executable.
709
710 Arguments:
711 - cmd: The command (a list) to run.
712 - cwd: Current directory to start the child process in.
713 - tracename: Name of the trace in the logname file.
714 - output: If False, redirects output to PIPEs.
715
716 Returns a tuple (resultcode, output) and updates the internal trace
717 entries.
718 """
719 # The implementation adds an item to self._traces.
720 raise NotImplementedError(self.__class__.__name__)
721
722 def close(self, _timeout=None):
723 """Saves the meta-data in the logname file.
724
725 For kernel-based tracing, stops the tracing subsystem.
726
727 Must not be used manually when using 'with' construct.
728 """
729 with self._lock:
730 if not self._initialized:
731 raise TracingFailure(
732 'Called %s.close() on an unitialized object' %
733 self.__class__.__name__,
734 None, None, None)
735 try:
736 while self._scripts_to_cleanup:
737 try:
738 os.remove(self._scripts_to_cleanup.pop())
739 except OSError as e:
740 logging.error('Failed to delete a temporary script: %s', e)
741 write_json(self._logname, self._gen_logdata(), False)
742 finally:
743 self._initialized = False
744
745 def post_process_log(self):
746 """Post-processes the log so it becomes faster to load afterward.
747
748 Must not be used manually when using 'with' construct.
749 """
750 assert not self._initialized, 'Must stop tracing first.'
751
752 def _gen_logdata(self):
753 """Returns the data to be saved in the trace file."""
754 return {
755 'traces': self._traces,
756 }
757
758 def __enter__(self):
759 """Enables 'with' statement."""
760 return self
761
762 def __exit__(self, exc_type, exc_value, traceback):
763 """Enables 'with' statement."""
764 self.close()
765 # If an exception was thrown, do not process logs.
766 if not exc_type:
767 self.post_process_log()
768
769 def get_tracer(self, logname):
770 """Returns an ApiBase.Tracer instance.
771
772 Initializes the tracing subsystem, which is a requirement for kernel-based
773 tracers. Only one tracer instance should be live at a time!
774
775 logname is the filepath to the json file that will contain the meta-data
776 about the logs.
777 """
778 return self.Tracer(logname)
779
780 @staticmethod
781 def clean_trace(logname):
782 """Deletes an old log."""
783 raise NotImplementedError()
784
785 @classmethod
786 def parse_log(cls, logname, blacklist, trace_name):
787 """Processes trace logs and returns the files opened and the files that do
788 not exist.
789
790 It does not track directories.
791
792 Arguments:
793 - logname: must be an absolute path.
794 - blacklist: must be a lambda.
795 - trace_name: optional trace to read, defaults to reading all traces.
796
797 Most of the time, files that do not exist are temporary test files that
798 should be put in /tmp instead. See http://crbug.com/116251.
799
800 Returns a list of dict with keys:
801 - results: A Results instance.
802 - trace: The corresponding tracename parameter provided to
803 get_tracer().trace().
804 - output: Output gathered during execution, if get_tracer().trace(...,
805 output=False) was used.
806 """
807 raise NotImplementedError(cls.__class__.__name__)
808
809
810 class Strace(ApiBase):
811 """strace implies linux."""
812 @staticmethod
813 def load_filename(filename):
814 """Parses a filename in a log."""
815 # TODO(maruel): Be compatible with strace -x.
816 assert isinstance(filename, str)
817 out = ''
818 i = 0
819 while i < len(filename):
820 c = filename[i]
821 if c == '\\':
822 out += chr(int(filename[i+1:i+4], 8))
823 i += 4
824 else:
825 out += c
826 i += 1
827 # TODO(maruel): That's not necessarily true that the current code page is
828 # utf-8.
829 return out.decode('utf-8')
830
831 class Context(ApiBase.Context):
832 """Processes a strace log line and keeps the list of existent and non
833 existent files accessed.
834
835 Ignores directories.
836
837 Uses late-binding to processes the cwd of each process. The problem is that
838 strace generates one log file per process it traced but doesn't give any
839 information about which process was started when and by who. So process the
840 logs out of order and use late binding with RelativePath to be able to
841 deduce the initial directory of each process once all the logs are parsed.
842
843 TODO(maruel): Use the script even in the non-sudo case, so log parsing can
844 be done in two phase: first find the root process, then process the child
845 processes in order. With that, it should be possible to not use RelativePath
846 anymore. This would significantly simplify the code!
847 """
848 class Process(ApiBase.Context.Process):
849 """Represents the state of a process.
850
851 Contains all the information retrieved from the pid-specific log.
852 """
853 # Function names are using ([a-z_0-9]+)
854 # This is the most common format. function(args) = result
855 RE_HEADER = re.compile(r'^([a-z_0-9]+)\((.*?)\)\s+= (.+)$')
856 # An interrupted function call, only grab the minimal header.
857 RE_UNFINISHED = re.compile(r'^([^\(]+)(.*) \<unfinished \.\.\.\>$')
858 # A resumed function call.
859 RE_RESUMED = re.compile(r'^<\.\.\. ([^ ]+) resumed> (.+)$')
860 # A process received a signal.
861 RE_SIGNAL = re.compile(r'^--- SIG[A-Z]+ .+ ---')
862 # A process didn't handle a signal. Ignore any junk appearing before,
863 # because the process was forcibly killed so it won't open any new file.
864 RE_KILLED = re.compile(
865 r'^.*\+\+\+ killed by ([A-Z]+)( \(core dumped\))? \+\+\+$')
866 # The process has exited.
867 RE_PROCESS_EXITED = re.compile(r'^\+\+\+ exited with (\d+) \+\+\+')
868 # A call was canceled. Ignore any prefix.
869 RE_UNAVAILABLE = re.compile(r'^.*\)\s*= \? <unavailable>$')
870 # Happens when strace fails to even get the function name.
871 UNNAMED_FUNCTION = '????'
872
873 # Corner-case in python, a class member function decorator must not be
874 # @staticmethod.
875 def parse_args(regexp, expect_zero): # pylint: disable=E0213
876 """Automatically convert the str 'args' into a list of processed
877 arguments.
878
879 Arguments:
880 - regexp is used to parse args.
881 - expect_zero: one of True, False or None.
882 - True: will check for result.startswith('0') first and will ignore
883 the trace line completely otherwise. This is important because for
884 many functions, the regexp will not process if the call failed.
885 - False: will check for not result.startswith(('?', '-1')) for the
886 same reason than with True.
887 - None: ignore result.
888 """
889 def meta_hook(function):
890 assert function.__name__.startswith('handle_')
891 def hook(self, args, result):
892 if expect_zero is True and not result.startswith('0'):
893 return
894 if expect_zero is False and result.startswith(('?', '-1')):
895 return
896 match = re.match(regexp, args)
897 if not match:
898 raise TracingFailure(
899 'Failed to parse %s(%s) = %s' %
900 (function.__name__[len('handle_'):], args, result),
901 None, None, None)
902 return function(self, match.groups(), result)
903 return hook
904 return meta_hook
905
906 class RelativePath(object):
907 """A late-bound relative path."""
908 def __init__(self, parent, value):
909 assert_is_renderable(parent)
910 self.parent = parent
911 assert (
912 value is None or
913 (isinstance(value, unicode) and not os.path.isabs(value)))
914 self.value = value
915 if self.value:
916 # TODO(maruel): On POSIX, '\\' is a valid character so remove this
917 # assert.
918 assert '\\' not in self.value, value
919 assert '\\' not in self.value, (repr(value), repr(self.value))
920
921 def render(self):
922 """Returns the current directory this instance is representing.
923
924 This function is used to return the late-bound value.
925 """
926 assert self.parent is not None
927 parent = render(self.parent)
928 if self.value:
929 return os.path.normpath(os.path.join(parent, self.value))
930 return parent
931
932 def __init__(self, root, pid):
933 """Keeps enough information to be able to guess the original process
934 root.
935
936 strace doesn't store which process was the initial process. So more
937 information needs to be kept so the graph can be reconstructed from the
938 flat map.
939 """
940 logging.info('%s(%d)' % (self.__class__.__name__, pid))
941 super(Strace.Context.Process, self).__init__(root.blacklist, pid, None)
942 assert isinstance(root, ApiBase.Context)
943 self._root = weakref.ref(root)
944 # The dict key is the function name of the pending call, like 'open'
945 # or 'execve'.
946 self._pending_calls = {}
947 self._line_number = 0
948 # Current directory when the process started.
949 if isinstance(self._root(), unicode):
950 self.initial_cwd = self._root()
951 else:
952 self.initial_cwd = self.RelativePath(self._root(), None)
953 self.parentid = None
954 self._done = False
955
956 def get_cwd(self):
957 """Returns the best known value of cwd."""
958 return self.cwd or self.initial_cwd
959
960 def render(self):
961 """Returns the string value of the RelativePath() object.
962
963 Used by RelativePath. Returns the initial directory and not the
964 current one since the current directory 'cwd' validity is time-limited.
965
966 The validity is only guaranteed once all the logs are processed.
967 """
968 return self.initial_cwd.render()
969
970 def on_line(self, line):
971 assert isinstance(line, str)
972 self._line_number += 1
973 try:
974 if self._done:
975 raise TracingFailure(
976 'Found a trace for a terminated process or corrupted log',
977 None, None, None)
978
979 if self.RE_SIGNAL.match(line):
980 # Ignore signals.
981 return
982
983 match = self.RE_KILLED.match(line)
984 if match:
985 # Converts a '+++ killed by Foo +++' trace into an exit_group().
986 self.handle_exit_group(match.group(1), None)
987 return
988
989 match = self.RE_PROCESS_EXITED.match(line)
990 if match:
991 # Converts a '+++ exited with 1 +++' trace into an exit_group()
992 self.handle_exit_group(match.group(1), None)
993 return
994
995 match = self.RE_UNFINISHED.match(line)
996 if match:
997 if match.group(1) in self._pending_calls:
998 raise TracingFailure(
999 'Found two unfinished calls for the same function',
1000 None, None, None,
1001 self._pending_calls)
1002 self._pending_calls[match.group(1)] = (
1003 match.group(1) + match.group(2))
1004 return
1005
1006 match = self.RE_UNAVAILABLE.match(line)
1007 if match:
1008 # This usually means a process was killed and a pending call was
1009 # canceled.
1010 # TODO(maruel): Look up the last exit_group() trace just above and
1011 # make sure any self._pending_calls[anything] is properly flushed.
1012 return
1013
1014 match = self.RE_RESUMED.match(line)
1015 if match:
1016 if match.group(1) not in self._pending_calls:
1017 raise TracingFailure(
1018 'Found a resumed call that was not logged as unfinished',
1019 None, None, None,
1020 self._pending_calls)
1021 pending = self._pending_calls.pop(match.group(1))
1022 # Reconstruct the line.
1023 line = pending + match.group(2)
1024
1025 match = self.RE_HEADER.match(line)
1026 if not match:
1027 # The line is corrupted. It happens occasionally when a process is
1028 # killed forcibly with activity going on. Assume the process died.
1029 # No other line can be processed afterward.
1030 logging.debug('%d is done: %s', self.pid, line)
1031 self._done = True
1032 return
1033
1034 if match.group(1) == self.UNNAMED_FUNCTION:
1035 return
1036
1037 # It's a valid line, handle it.
1038 handler = getattr(self, 'handle_%s' % match.group(1), None)
1039 if not handler:
1040 self._handle_unknown(match.group(1), match.group(2), match.group(3))
1041 return handler(match.group(2), match.group(3))
1042 except TracingFailure, e:
1043 # Hack in the values since the handler could be a static function.
1044 e.pid = self.pid
1045 e.line = line
1046 e.line_number = self._line_number
1047 # Re-raise the modified exception.
1048 raise
1049 except (KeyError, NotImplementedError, ValueError), e:
1050 raise TracingFailure(
1051 'Trace generated a %s exception: %s' % (
1052 e.__class__.__name__, str(e)),
1053 self.pid,
1054 self._line_number,
1055 line,
1056 e)
1057
1058 @parse_args(r'^\"(.+?)\", [FKORWX_|]+$', True)
1059 def handle_access(self, args, _result):
1060 self._handle_file(args[0], Results.File.TOUCHED)
1061
1062 @parse_args(r'^\"(.+?)\"$', True)
1063 def handle_chdir(self, args, _result):
1064 """Updates cwd."""
1065 self.cwd = self._mangle(args[0])
1066 logging.debug('handle_chdir(%d, %s)' % (self.pid, self.cwd))
1067
1068 @parse_args(r'^\"(.+?)\", (\d+), (\d+)$', False)
1069 def handle_chown(self, args, _result):
1070 # TODO(maruel): Look at result?
1071 self._handle_file(args[0], Results.File.WRITE)
1072
1073 def handle_clone(self, _args, result):
1074 self._handling_forking('clone', result)
1075
1076 def handle_close(self, _args, _result):
1077 pass
1078
1079 @parse_args(r'^\"(.+?)\", (\d+)$', False)
1080 def handle_chmod(self, args, _result):
1081 self._handle_file(args[0], Results.File.WRITE)
1082
1083 @parse_args(r'^\"(.+?)\", (\d+)$', False)
1084 def handle_creat(self, args, _result):
1085 self._handle_file(args[0], Results.File.WRITE)
1086
1087 @parse_args(r'^\"(.+?)\", \[(.+)\], \[\/\* \d+ vars? \*\/\]$', True)
1088 def handle_execve(self, args, _result):
1089 # Even if in practice execve() doesn't returns when it succeeds, strace
1090 # still prints '0' as the result.
1091 filepath = args[0]
1092 self._handle_file(filepath, Results.File.READ)
1093 self.executable = self._mangle(filepath)
1094 try:
1095 self.command = strace_process_quoted_arguments(args[1])
1096 except ValueError as e:
1097 raise TracingFailure(
1098 'Failed to process command line argument:\n%s' % e.args[0],
1099 None, None, None)
1100
1101 def handle_exit_group(self, _args, _result):
1102 """Removes cwd."""
1103 self.cwd = None
1104
1105 @parse_args(r'^(\d+|AT_FDCWD), \"(.*?)\", ([A-Z\_\|]+)(|, \d+)$', True)
1106 def handle_faccessat(self, args, _results):
1107 if args[0] == 'AT_FDCWD':
1108 self._handle_file(args[1], Results.File.TOUCHED)
1109 else:
1110 raise Exception('Relative faccess not implemented.')
1111
1112 def handle_fallocate(self, _args, result):
1113 pass
1114
1115 def handle_fork(self, args, result):
1116 self._handle_unknown('fork', args, result)
1117
1118 def handle_futex(self, _args, _result):
1119 pass
1120
1121 @parse_args(r'^\"(.+?)\", (\d+)$', False)
1122 def handle_getcwd(self, args, _result):
1123 if os.path.isabs(args[0]):
1124 logging.debug('handle_chdir(%d, %s)' % (self.pid, self.cwd))
1125 if not isinstance(self.cwd, unicode):
1126 # Take the occasion to reset the path.
1127 self.cwd = self._mangle(args[0])
1128 else:
1129 # It should always match.
1130 assert self.cwd == Strace.load_filename(args[0]), (
1131 self.cwd, args[0])
1132
1133 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)
1134 def handle_link(self, args, _result):
1135 self._handle_file(args[0], Results.File.READ)
1136 self._handle_file(args[1], Results.File.WRITE)
1137
1138 @parse_args(r'\"(.+?)\", \{.+?, \.\.\.\}', True)
1139 def handle_lstat(self, args, _result):
1140 self._handle_file(args[0], Results.File.TOUCHED)
1141
1142 def handle_mkdir(self, _args, _result):
1143 # We track content, not directories.
1144 pass
1145
1146 @parse_args(r'^\"(.*?)\", ([A-Z\_\|]+)(|, \d+)$', False)
1147 def handle_open(self, args, _result):
1148 if 'O_DIRECTORY' in args[1]:
1149 return
1150 self._handle_file(
1151 args[0],
1152 Results.File.READ if 'O_RDONLY' in args[1] else Results.File.WRITE)
1153
1154 @parse_args(r'^(\d+|AT_FDCWD), \"(.*?)\", ([A-Z\_\|]+)(|, \d+)$', False)
1155 def handle_openat(self, args, _result):
1156 if 'O_DIRECTORY' in args[2]:
1157 return
1158 if args[0] == 'AT_FDCWD':
1159 self._handle_file(
1160 args[1],
1161 Results.File.READ if 'O_RDONLY' in args[2]
1162 else Results.File.WRITE)
1163 else:
1164 # TODO(maruel): Implement relative open if necessary instead of the
1165 # AT_FDCWD flag, let's hope not since this means tracking all active
1166 # directory handles.
1167 raise NotImplementedError('Relative open via openat not implemented.')
1168
1169 @parse_args(r'^\"(.+?)\", \".+?\"(\.\.\.)?, \d+$', False)
1170 def handle_readlink(self, args, _result):
1171 self._handle_file(args[0], Results.File.READ)
1172
1173 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)
1174 def handle_rename(self, args, _result):
1175 self._handle_file(args[0], Results.File.READ)
1176 self._handle_file(args[1], Results.File.WRITE)
1177
1178 def handle_rmdir(self, _args, _result):
1179 # We track content, not directories.
1180 pass
1181
1182 def handle_setxattr(self, _args, _result):
1183 # TODO(maruel): self._handle_file(args[0], Results.File.WRITE)
1184 pass
1185
1186 @parse_args(r'\"(.+?)\", \{.+?, \.\.\.\}', True)
1187 def handle_stat(self, args, _result):
1188 self._handle_file(args[0], Results.File.TOUCHED)
1189
1190 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)
1191 def handle_symlink(self, args, _result):
1192 self._handle_file(args[0], Results.File.TOUCHED)
1193 self._handle_file(args[1], Results.File.WRITE)
1194
1195 @parse_args(r'^\"(.+?)\", \d+', True)
1196 def handle_truncate(self, args, _result):
1197 self._handle_file(args[0], Results.File.WRITE)
1198
1199 def handle_unlink(self, _args, _result):
1200 # In theory, the file had to be created anyway.
1201 pass
1202
1203 def handle_unlinkat(self, _args, _result):
1204 # In theory, the file had to be created anyway.
1205 pass
1206
1207 def handle_statfs(self, _args, _result):
1208 pass
1209
1210 def handle_utimensat(self, _args, _result):
1211 pass
1212
1213 def handle_vfork(self, _args, result):
1214 self._handling_forking('vfork', result)
1215
1216 @staticmethod
1217 def _handle_unknown(function, args, result):
1218 raise TracingFailure(
1219 'Unexpected/unimplemented trace %s(%s)= %s' %
1220 (function, args, result),
1221 None, None, None)
1222
1223 def _handling_forking(self, name, result):
1224 """Transfers cwd."""
1225 if result.startswith(('?', '-1')):
1226 # The call failed.
1227 return
1228 # Update the other process right away.
1229 childpid = int(result)
1230 child = self._root().get_or_set_proc(childpid)
1231 if child.parentid is not None or childpid in self.children:
1232 raise TracingFailure(
1233 'Found internal inconsitency in process lifetime detection '
1234 'during a %s() call' % name,
1235 None, None, None)
1236
1237 # Copy the cwd object.
1238 child.initial_cwd = self.get_cwd()
1239 child.parentid = self.pid
1240 # It is necessary because the logs are processed out of order.
1241 self.children.append(child)
1242
1243 def _handle_file(self, filepath, mode):
1244 filepath = self._mangle(filepath)
1245 self.add_file(filepath, mode)
1246
1247 def _mangle(self, filepath):
1248 """Decodes a filepath found in the log and convert it to a late-bound
1249 path if necessary.
1250
1251 |filepath| is an strace 'encoded' string and the returned value is
1252 either an unicode string if the path was absolute or a late bound path
1253 otherwise.
1254 """
1255 filepath = Strace.load_filename(filepath)
1256 if os.path.isabs(filepath):
1257 return filepath
1258 else:
1259 if isinstance(self.get_cwd(), unicode):
1260 return os.path.normpath(os.path.join(self.get_cwd(), filepath))
1261 return self.RelativePath(self.get_cwd(), filepath)
1262
1263 def __init__(self, blacklist, root_pid, initial_cwd):
1264 """|root_pid| may be None when the root process is not known.
1265
1266 In that case, a search is done after reading all the logs to figure out
1267 the root process.
1268 """
1269 super(Strace.Context, self).__init__(blacklist)
1270 assert_is_renderable(initial_cwd)
1271 self.root_pid = root_pid
1272 self.initial_cwd = initial_cwd
1273
1274 def render(self):
1275 """Returns the string value of the initial cwd of the root process.
1276
1277 Used by RelativePath.
1278 """
1279 return self.initial_cwd
1280
1281 def on_line(self, pid, line):
1282 """Transfers control into the Process.on_line() function."""
1283 self.get_or_set_proc(pid).on_line(line.strip())
1284
1285 def to_results(self):
1286 """If necessary, finds back the root process and verify consistency."""
1287 if not self.root_pid:
1288 # The non-sudo case. The traced process was started by strace itself,
1289 # so the pid of the traced process is not known.
1290 root = [p for p in self._process_lookup.itervalues() if not p.parentid]
1291 if len(root) == 1:
1292 self.root_process = root[0]
1293 # Save it for later.
1294 self.root_pid = self.root_process.pid
1295 else:
1296 # The sudo case. The traced process was started manually so its pid is
1297 # known.
1298 self.root_process = self._process_lookup.get(self.root_pid)
1299 if not self.root_process:
1300 raise TracingFailure(
1301 'Found internal inconsitency in process lifetime detection '
1302 'while finding the root process',
1303 None,
1304 None,
1305 None,
1306 self.root_pid,
1307 sorted(self._process_lookup))
1308 process = self.root_process.to_results_process()
1309 if sorted(self._process_lookup) != sorted(p.pid for p in process.all):
1310 raise TracingFailure(
1311 'Found internal inconsitency in process lifetime detection '
1312 'while looking for len(tree) == len(list)',
1313 None,
1314 None,
1315 None,
1316 sorted(self._process_lookup),
1317 sorted(p.pid for p in process.all))
1318 return Results(process)
1319
1320 def get_or_set_proc(self, pid):
1321 """Returns the Context.Process instance for this pid or creates a new one.
1322 """
1323 if not pid or not isinstance(pid, int):
1324 raise TracingFailure(
1325 'Unpexpected value for pid: %r' % pid,
1326 pid,
1327 None,
1328 None,
1329 pid)
1330 if pid not in self._process_lookup:
1331 self._process_lookup[pid] = self.Process(self, pid)
1332 return self._process_lookup[pid]
1333
1334 @classmethod
1335 def traces(cls):
1336 """Returns the list of all handled traces to pass this as an argument to
1337 strace.
1338 """
1339 prefix = 'handle_'
1340 return [i[len(prefix):] for i in dir(cls.Process) if i.startswith(prefix)]
1341
1342 class Tracer(ApiBase.Tracer):
1343 MAX_LEN = 256
1344
1345 def __init__(self, logname, use_sudo):
1346 super(Strace.Tracer, self).__init__(logname)
1347 self.use_sudo = use_sudo
1348 if use_sudo:
1349 # TODO(maruel): Use the jump script systematically to make it easy to
1350 # figure out the root process, so RelativePath is not necessary anymore.
1351 self._child_script = create_exec_thunk()
1352 self._scripts_to_cleanup.append(self._child_script)
1353
1354 def trace(self, cmd, cwd, tracename, output):
1355 """Runs strace on an executable.
1356
1357 When use_sudo=True, it is a 3-phases process: start the thunk, start
1358 sudo strace with the pid of the thunk and then have the thunk os.execve()
1359 the process to trace.
1360 """
1361 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
1362 assert os.path.isabs(cmd[0]), cmd[0]
1363 assert os.path.isabs(cwd), cwd
1364 assert os.path.normpath(cwd) == cwd, cwd
1365 with self._lock:
1366 if not self._initialized:
1367 raise TracingFailure(
1368 'Called Tracer.trace() on an unitialized object',
1369 None, None, None, tracename)
1370 assert tracename not in (i['trace'] for i in self._traces)
1371 stdout = stderr = None
1372 if output:
1373 stdout = subprocess.PIPE
1374 stderr = subprocess.STDOUT
1375
1376 # Ensure all file related APIs are hooked.
1377 traces = ','.join(Strace.Context.traces() + ['file'])
1378 flags = [
1379 # Each child process has its own trace file. It is necessary because
1380 # strace may generate corrupted log file if multiple processes are
1381 # heavily doing syscalls simultaneously.
1382 '-ff',
1383 # Reduce whitespace usage.
1384 '-a1',
1385 # hex encode non-ascii strings.
1386 # TODO(maruel): '-x',
1387 # TODO(maruel): '-ttt',
1388 # Signals are unnecessary noise here. Note the parser can cope with them
1389 # but reduce the unnecessary output.
1390 '-esignal=none',
1391 # Print as much data as wanted.
1392 '-s', '%d' % self.MAX_LEN,
1393 '-e', 'trace=%s' % traces,
1394 '-o', self._logname + '.' + tracename,
1395 ]
1396
1397 if self.use_sudo:
1398 pipe_r, pipe_w = os.pipe()
1399 # Start the child process paused.
1400 target_cmd = [sys.executable, self._child_script, str(pipe_r)] + cmd
1401 logging.debug(' '.join(target_cmd))
1402 child_proc = subprocess.Popen(
1403 target_cmd,
1404 stdin=subprocess.PIPE,
1405 stdout=stdout,
1406 stderr=stderr,
1407 cwd=cwd)
1408
1409 # TODO(maruel): both processes must use the same UID for it to work
1410 # without sudo. Look why -p is failing at the moment without sudo.
1411 trace_cmd = [
1412 'sudo',
1413 'strace',
1414 '-p', str(child_proc.pid),
1415 ] + flags
1416 logging.debug(' '.join(trace_cmd))
1417 strace_proc = subprocess.Popen(
1418 trace_cmd,
1419 cwd=cwd,
1420 stdin=subprocess.PIPE,
1421 stdout=subprocess.PIPE,
1422 stderr=subprocess.PIPE)
1423
1424 line = strace_proc.stderr.readline()
1425 if not re.match(r'^Process \d+ attached \- interrupt to quit$', line):
1426 # TODO(maruel): Raise an exception.
1427 assert False, line
1428
1429 # Now fire the child process.
1430 os.write(pipe_w, 'x')
1431
1432 out = child_proc.communicate()[0]
1433 strace_out = strace_proc.communicate()[0]
1434
1435 # TODO(maruel): if strace_proc.returncode: Add an exception.
1436 saved_out = strace_out if strace_proc.returncode else out
1437 root_pid = child_proc.pid
1438 else:
1439 # Non-sudo case.
1440 trace_cmd = [
1441 'strace',
1442 ] + flags + cmd
1443 logging.debug(' '.join(trace_cmd))
1444 child_proc = subprocess.Popen(
1445 trace_cmd,
1446 cwd=cwd,
1447 stdin=subprocess.PIPE,
1448 stdout=stdout,
1449 stderr=stderr)
1450 out = child_proc.communicate()[0]
1451 # TODO(maruel): Walk the logs and figure out the root process would
1452 # simplify parsing the logs a *lot*.
1453 saved_out = out
1454 # The trace reader will have to figure out.
1455 root_pid = None
1456
1457 with self._lock:
1458 assert tracename not in (i['trace'] for i in self._traces)
1459 self._traces.append(
1460 {
1461 'cmd': cmd,
1462 'cwd': cwd,
1463 'output': saved_out,
1464 'pid': root_pid,
1465 'trace': tracename,
1466 })
1467 return child_proc.returncode, out
1468
1469 def __init__(self, use_sudo=None):
1470 super(Strace, self).__init__()
1471 self.use_sudo = use_sudo
1472
1473 def get_tracer(self, logname):
1474 return self.Tracer(logname, self.use_sudo)
1475
1476 @staticmethod
1477 def clean_trace(logname):
1478 if os.path.isfile(logname):
1479 os.remove(logname)
1480 # Also delete any pid specific file from previous traces.
1481 for i in glob.iglob(logname + '.*'):
1482 if i.rsplit('.', 1)[1].isdigit():
1483 os.remove(i)
1484
1485 @classmethod
1486 def parse_log(cls, logname, blacklist, trace_name):
1487 logging.info('parse_log(%s, ..., %s)', logname, trace_name)
1488 assert os.path.isabs(logname)
1489 data = read_json(logname)
1490 out = []
1491 for item in data['traces']:
1492 if trace_name and item['trace'] != trace_name:
1493 continue
1494 result = {
1495 'output': item['output'],
1496 'trace': item['trace'],
1497 }
1498 try:
1499 context = cls.Context(blacklist, item['pid'], item['cwd'])
1500 for pidfile in glob.iglob('%s.%s.*' % (logname, item['trace'])):
1501 logging.debug('Reading %s', pidfile)
1502 pid = pidfile.rsplit('.', 1)[1]
1503 if pid.isdigit():
1504 pid = int(pid)
1505 found_line = False
1506 for line in open(pidfile, 'rb'):
1507 context.on_line(pid, line)
1508 found_line = True
1509 if not found_line:
1510 # Ensures that a completely empty trace still creates the
1511 # corresponding Process instance by logging a dummy line.
1512 context.on_line(pid, '')
1513 else:
1514 logging.warning('Found unexpected file %s', pidfile)
1515 result['results'] = context.to_results()
1516 except TracingFailure:
1517 result['exception'] = sys.exc_info()
1518 out.append(result)
1519 return out
1520
1521
1522 class Dtrace(ApiBase):
1523 """Uses DTrace framework through dtrace. Requires root access.
1524
1525 Implies Mac OSX.
1526
1527 dtruss can't be used because it has compatibility issues with python.
1528
1529 Also, the pid->cwd handling needs to be done manually since OSX has no way to
1530 get the absolute path of the 'cwd' dtrace variable from the probe.
1531
1532 Also, OSX doesn't populate curpsinfo->pr_psargs properly, see
1533 https://discussions.apple.com/thread/1980539. So resort to handling execve()
1534 manually.
1535
1536 errno is not printed in the log since this implementation currently only cares
1537 about files that were successfully opened.
1538 """
1539 class Context(ApiBase.Context):
1540 # Format: index pid function(args)
1541 RE_HEADER = re.compile(r'^\d+ (\d+) ([a-zA-Z_\-]+)\((.*?)\)$')
1542
1543 # Arguments parsing.
1544 RE_DTRACE_BEGIN = re.compile(r'^\"(.+?)\"$')
1545 RE_CHDIR = re.compile(r'^\"(.+?)\"$')
1546 RE_EXECVE = re.compile(r'^\"(.+?)\", \[(\d+), (.+)\]$')
1547 RE_OPEN = re.compile(r'^\"(.+?)\", (0x[0-9a-z]+), (0x[0-9a-z]+)$')
1548 RE_PROC_START = re.compile(r'^(\d+), \"(.+?)\", (\d+)$')
1549 RE_RENAME = re.compile(r'^\"(.+?)\", \"(.+?)\"$')
1550
1551 # O_DIRECTORY is not defined on Windows and dtrace doesn't exist on Windows.
1552 O_DIRECTORY = os.O_DIRECTORY if hasattr(os, 'O_DIRECTORY') else None
1553 O_RDWR = os.O_RDWR
1554 O_WRONLY = os.O_WRONLY
1555
1556 class Process(ApiBase.Context.Process):
1557 def __init__(self, *args):
1558 super(Dtrace.Context.Process, self).__init__(*args)
1559 self.cwd = self.initial_cwd
1560
1561 def __init__(self, blacklist, thunk_pid, initial_cwd):
1562 logging.info(
1563 '%s(%d, %s)' % (self.__class__.__name__, thunk_pid, initial_cwd))
1564 super(Dtrace.Context, self).__init__(blacklist)
1565 assert isinstance(initial_cwd, unicode), initial_cwd
1566 # Process ID of the temporary script created by create_subprocess_thunk().
1567 self._thunk_pid = thunk_pid
1568 self._initial_cwd = initial_cwd
1569 self._line_number = 0
1570
1571 def on_line(self, line):
1572 assert isinstance(line, unicode), line
1573 self._line_number += 1
1574 match = self.RE_HEADER.match(line)
1575 if not match:
1576 raise TracingFailure(
1577 'Found malformed line: %s' % line,
1578 None,
1579 self._line_number,
1580 line)
1581 fn = getattr(
1582 self,
1583 'handle_%s' % match.group(2).replace('-', '_'),
1584 self._handle_ignored)
1585 # It is guaranteed to succeed because of the regexp. Or at least I thought
1586 # it would.
1587 pid = int(match.group(1))
1588 try:
1589 return fn(pid, match.group(3))
1590 except TracingFailure, e:
1591 # Hack in the values since the handler could be a static function.
1592 e.pid = pid
1593 e.line = line
1594 e.line_number = self._line_number
1595 # Re-raise the modified exception.
1596 raise
1597 except (KeyError, NotImplementedError, ValueError), e:
1598 raise TracingFailure(
1599 'Trace generated a %s exception: %s' % (
1600 e.__class__.__name__, str(e)),
1601 pid,
1602 self._line_number,
1603 line,
1604 e)
1605
1606 def to_results(self):
1607 process = self.root_process.to_results_process()
1608 # Internal concistency check.
1609 if sorted(self._process_lookup) != sorted(p.pid for p in process.all):
1610 raise TracingFailure(
1611 'Found internal inconsitency in process lifetime detection '
1612 'while looking for len(tree) == len(list)',
1613 None,
1614 None,
1615 None,
1616 sorted(self._process_lookup),
1617 sorted(p.pid for p in process.all))
1618 return Results(process)
1619
1620 def handle_dtrace_BEGIN(self, _pid, args):
1621 if not self.RE_DTRACE_BEGIN.match(args):
1622 raise TracingFailure(
1623 'Found internal inconsitency in dtrace_BEGIN log line',
1624 None, None, None)
1625
1626 def handle_proc_start(self, pid, args):
1627 """Transfers cwd.
1628
1629 The dtrace script already takes care of only tracing the processes that
1630 are child of the traced processes so there is no need to verify the
1631 process hierarchy.
1632 """
1633 if pid in self._process_lookup:
1634 raise TracingFailure(
1635 'Found internal inconsitency in proc_start: %d started two times' %
1636 pid,
1637 None, None, None)
1638 match = self.RE_PROC_START.match(args)
1639 if not match:
1640 raise TracingFailure(
1641 'Failed to parse arguments: %s' % args,
1642 None, None, None)
1643 ppid = int(match.group(1))
1644 if ppid == self._thunk_pid and not self.root_process:
1645 proc = self.root_process = self.Process(
1646 self.blacklist, pid, self._initial_cwd)
1647 elif ppid in self._process_lookup:
1648 proc = self.Process(self.blacklist, pid, self._process_lookup[ppid].cwd)
1649 self._process_lookup[ppid].children.append(proc)
1650 else:
1651 # Another process tree, ignore.
1652 return
1653 self._process_lookup[pid] = proc
1654 logging.debug(
1655 'New child: %s -> %d cwd:%s' %
1656 (ppid, pid, render(proc.initial_cwd)))
1657
1658 def handle_proc_exit(self, pid, _args):
1659 """Removes cwd."""
1660 if pid in self._process_lookup:
1661 # self._thunk_pid is not traced itself and other traces run neither.
1662 self._process_lookup[pid].cwd = None
1663
1664 def handle_execve(self, pid, args):
1665 """Sets the process' executable.
1666
1667 TODO(maruel): Read command line arguments. See
1668 https://discussions.apple.com/thread/1980539 for an example.
1669 https://gist.github.com/1242279
1670
1671 Will have to put the answer at http://stackoverflow.com/questions/7556249.
1672 :)
1673 """
1674 if not pid in self._process_lookup:
1675 # Another process tree, ignore.
1676 return
1677 match = self.RE_EXECVE.match(args)
1678 if not match:
1679 raise TracingFailure(
1680 'Failed to parse arguments: %r' % args,
1681 None, None, None)
1682 proc = self._process_lookup[pid]
1683 proc.executable = match.group(1)
1684 self._handle_file(pid, proc.executable, Results.File.READ)
1685 proc.command = self.process_escaped_arguments(match.group(3))
1686 if int(match.group(2)) != len(proc.command):
1687 raise TracingFailure(
1688 'Failed to parse execve() arguments: %s' % args,
1689 None, None, None)
1690
1691 def handle_chdir(self, pid, args):
1692 """Updates cwd."""
1693 if pid not in self._process_lookup:
1694 # Another process tree, ignore.
1695 return
1696 cwd = self.RE_CHDIR.match(args).group(1)
1697 if not cwd.startswith('/'):
1698 cwd2 = os.path.join(self._process_lookup[pid].cwd, cwd)
1699 logging.debug('handle_chdir(%d, %s) -> %s' % (pid, cwd, cwd2))
1700 else:
1701 logging.debug('handle_chdir(%d, %s)' % (pid, cwd))
1702 cwd2 = cwd
1703 self._process_lookup[pid].cwd = cwd2
1704
1705 def handle_open_nocancel(self, pid, args):
1706 """Redirects to handle_open()."""
1707 return self.handle_open(pid, args)
1708
1709 def handle_open(self, pid, args):
1710 if pid not in self._process_lookup:
1711 # Another process tree, ignore.
1712 return
1713 match = self.RE_OPEN.match(args)
1714 if not match:
1715 raise TracingFailure(
1716 'Failed to parse arguments: %s' % args,
1717 None, None, None)
1718 flag = int(match.group(2), 16)
1719 if self.O_DIRECTORY & flag == self.O_DIRECTORY:
1720 # Ignore directories.
1721 return
1722 self._handle_file(
1723 pid,
1724 match.group(1),
1725 Results.File.READ if not ((self.O_RDWR | self.O_WRONLY) & flag)
1726 else Results.File.WRITE)
1727
1728 def handle_rename(self, pid, args):
1729 if pid not in self._process_lookup:
1730 # Another process tree, ignore.
1731 return
1732 match = self.RE_RENAME.match(args)
1733 if not match:
1734 raise TracingFailure(
1735 'Failed to parse arguments: %s' % args,
1736 None, None, None)
1737 self._handle_file(pid, match.group(1), Results.File.READ)
1738 self._handle_file(pid, match.group(2), Results.File.WRITE)
1739
1740 def _handle_file(self, pid, filepath, mode):
1741 if not filepath.startswith('/'):
1742 filepath = os.path.join(self._process_lookup[pid].cwd, filepath)
1743 # We can get '..' in the path.
1744 filepath = os.path.normpath(filepath)
1745 # Sadly, still need to filter out directories here;
1746 # saw open_nocancel(".", 0, 0) = 0 lines.
1747 if os.path.isdir(filepath):
1748 return
1749 self._process_lookup[pid].add_file(filepath, mode)
1750
1751 def handle_ftruncate(self, pid, args):
1752 """Just used as a signal to kill dtrace, ignoring."""
1753 pass
1754
1755 @staticmethod
1756 def _handle_ignored(pid, args):
1757 """Is called for all the event traces that are not handled."""
1758 raise NotImplementedError('Please implement me')
1759
1760 @staticmethod
1761 def process_escaped_arguments(text):
1762 """Extracts escaped arguments on a string and return the arguments as a
1763 list.
1764
1765 Implemented as an automaton.
1766
1767 Example:
1768 With text = '\\001python2.7\\001-c\\001print(\\"hi\\")\\0', the
1769 function will return ['python2.7', '-c', 'print("hi")]
1770 """
1771 if not text.endswith('\\0'):
1772 raise ValueError('String is not null terminated: %r' % text, text)
1773 text = text[:-2]
1774
1775 def unescape(x):
1776 """Replaces '\\' with '\' and '\?' (where ? is anything) with ?."""
1777 out = []
1778 escaped = False
1779 for i in x:
1780 if i == '\\' and not escaped:
1781 escaped = True
1782 continue
1783 escaped = False
1784 out.append(i)
1785 return ''.join(out)
1786
1787 return [unescape(i) for i in text.split('\\001')]
1788
1789 class Tracer(ApiBase.Tracer):
1790 # pylint: disable=C0301
1791 #
1792 # To understand the following code, you'll want to take a look at:
1793 # http://developers.sun.com/solaris/articles/dtrace_quickref/dtrace_quickref .html
1794 # https://wikis.oracle.com/display/DTrace/Variables
1795 # http://docs.oracle.com/cd/E19205-01/820-4221/
1796 #
1797 # 0. Dump all the valid probes into a text file. It is important, you
1798 # want to redirect into a file and you don't want to constantly 'sudo'.
1799 # $ sudo dtrace -l > probes.txt
1800 #
1801 # 1. Count the number of probes:
1802 # $ wc -l probes.txt
1803 # 81823 # On OSX 10.7, including 1 header line.
1804 #
1805 # 2. List providers, intentionally skipping all the 'syspolicy10925' and the
1806 # likes and skipping the header with NR>1:
1807 # $ awk 'NR>1 { print $2 }' probes.txt | sort | uniq | grep -v '[[:digit:]]'
1808 # dtrace
1809 # fbt
1810 # io
1811 # ip
1812 # lockstat
1813 # mach_trap
1814 # proc
1815 # profile
1816 # sched
1817 # syscall
1818 # tcp
1819 # vminfo
1820 #
1821 # 3. List of valid probes:
1822 # $ grep syscall probes.txt | less
1823 # or use dtrace directly:
1824 # $ sudo dtrace -l -P syscall | less
1825 #
1826 # trackedpid is an associative array where its value can be 0, 1 or 2.
1827 # 0 is for untracked processes and is the default value for items not
1828 # in the associative array.
1829 # 1 is for tracked processes.
1830 # 2 is for the script created by create_subprocess_thunk() only. It is not
1831 # tracked itself but all its decendants are.
1832 #
1833 # The script will kill itself only once waiting_to_die == 1 and
1834 # current_processes == 0, so that both getlogin() was called and that
1835 # all traced processes exited.
1836 #
1837 # TODO(maruel): Use cacheable predicates. See
1838 # https://wikis.oracle.com/display/DTrace/Performance+Considerations
1839 D_CODE = """
1840 dtrace:::BEGIN {
1841 waiting_to_die = 0;
1842 current_processes = 0;
1843 logindex = 0;
1844 printf("%d %d %s_%s(\\"%s\\")\\n",
1845 logindex, PID, probeprov, probename, SCRIPT);
1846 logindex++;
1847 }
1848
1849 proc:::start /trackedpid[ppid]/ {
1850 trackedpid[pid] = 1;
1851 current_processes += 1;
1852 printf("%d %d %s_%s(%d, \\"%s\\", %d)\\n",
1853 logindex, pid, probeprov, probename,
1854 ppid,
1855 execname,
1856 current_processes);
1857 logindex++;
1858 }
1859 /* Should use SCRIPT but there is no access to this variable at that
1860 * point. */
1861 proc:::start /ppid == PID && execname == "Python"/ {
1862 trackedpid[pid] = 2;
1863 current_processes += 1;
1864 printf("%d %d %s_%s(%d, \\"%s\\", %d)\\n",
1865 logindex, pid, probeprov, probename,
1866 ppid,
1867 execname,
1868 current_processes);
1869 logindex++;
1870 }
1871 proc:::exit /trackedpid[pid] &&
1872 current_processes == 1 &&
1873 waiting_to_die == 1/ {
1874 trackedpid[pid] = 0;
1875 current_processes -= 1;
1876 printf("%d %d %s_%s(%d)\\n",
1877 logindex, pid, probeprov, probename,
1878 current_processes);
1879 logindex++;
1880 exit(0);
1881 }
1882 proc:::exit /trackedpid[pid]/ {
1883 trackedpid[pid] = 0;
1884 current_processes -= 1;
1885 printf("%d %d %s_%s(%d)\\n",
1886 logindex, pid, probeprov, probename,
1887 current_processes);
1888 logindex++;
1889 }
1890
1891 /* Use an arcane function to detect when we need to die */
1892 syscall::ftruncate:entry /pid == PID && arg0 == FILE_ID/ {
1893 waiting_to_die = 1;
1894 printf("%d %d %s()\\n", logindex, pid, probefunc);
1895 logindex++;
1896 }
1897 syscall::ftruncate:entry /
1898 pid == PID && arg0 == FILE_ID && current_processes == 0/ {
1899 exit(0);
1900 }
1901
1902 syscall::open*:entry /trackedpid[pid] == 1/ {
1903 self->open_arg0 = arg0;
1904 self->open_arg1 = arg1;
1905 self->open_arg2 = arg2;
1906 }
1907 syscall::open*:return /trackedpid[pid] == 1 && errno == 0/ {
1908 this->open_arg0 = copyinstr(self->open_arg0);
1909 printf("%d %d %s(\\"%s\\", 0x%x, 0x%x)\\n",
1910 logindex, pid, probefunc,
1911 this->open_arg0,
1912 self->open_arg1,
1913 self->open_arg2);
1914 logindex++;
1915 this->open_arg0 = 0;
1916 }
1917 syscall::open*:return /trackedpid[pid] == 1/ {
1918 self->open_arg0 = 0;
1919 self->open_arg1 = 0;
1920 self->open_arg2 = 0;
1921 }
1922
1923 syscall::rename:entry /trackedpid[pid] == 1/ {
1924 self->rename_arg0 = arg0;
1925 self->rename_arg1 = arg1;
1926 }
1927 syscall::rename:return /trackedpid[pid] == 1 && errno == 0/ {
1928 this->rename_arg0 = copyinstr(self->rename_arg0);
1929 this->rename_arg1 = copyinstr(self->rename_arg1);
1930 printf("%d %d %s(\\"%s\\", \\"%s\\")\\n",
1931 logindex, pid, probefunc,
1932 this->rename_arg0,
1933 this->rename_arg1);
1934 logindex++;
1935 this->rename_arg0 = 0;
1936 this->rename_arg1 = 0;
1937 }
1938 syscall::rename:return /trackedpid[pid] == 1/ {
1939 self->rename_arg0 = 0;
1940 self->rename_arg1 = 0;
1941 }
1942
1943 /* Track chdir, it's painful because it is only receiving relative path.
1944 */
1945 syscall::chdir:entry /trackedpid[pid] == 1/ {
1946 self->chdir_arg0 = arg0;
1947 }
1948 syscall::chdir:return /trackedpid[pid] == 1 && errno == 0/ {
1949 this->chdir_arg0 = copyinstr(self->chdir_arg0);
1950 printf("%d %d %s(\\"%s\\")\\n",
1951 logindex, pid, probefunc,
1952 this->chdir_arg0);
1953 logindex++;
1954 this->chdir_arg0 = 0;
1955 }
1956 syscall::chdir:return /trackedpid[pid] == 1/ {
1957 self->chdir_arg0 = 0;
1958 }
1959 """
1960
1961 # execve-specific code, tends to throw a lot of exceptions.
1962 D_CODE_EXECVE = """
1963 /* Finally what we care about! */
1964 syscall::exec*:entry /trackedpid[pid]/ {
1965 self->exec_arg0 = copyinstr(arg0);
1966 /* Incrementally probe for a NULL in the argv parameter of execve() to
1967 * figure out argc. */
1968 /* TODO(maruel): Skip the remaining copyin() when a NULL pointer was
1969 * found. */
1970 self->exec_argc = 0;
1971 /* Probe for argc==1 */
1972 this->exec_argv = (user_addr_t*)copyin(
1973 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
1974 self->exec_argc = this->exec_argv[self->exec_argc] ?
1975 (self->exec_argc + 1) : self->exec_argc;
1976
1977 /* Probe for argc==2 */
1978 this->exec_argv = (user_addr_t*)copyin(
1979 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
1980 self->exec_argc = this->exec_argv[self->exec_argc] ?
1981 (self->exec_argc + 1) : self->exec_argc;
1982
1983 /* Probe for argc==3 */
1984 this->exec_argv = (user_addr_t*)copyin(
1985 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
1986 self->exec_argc = this->exec_argv[self->exec_argc] ?
1987 (self->exec_argc + 1) : self->exec_argc;
1988
1989 /* Probe for argc==4 */
1990 this->exec_argv = (user_addr_t*)copyin(
1991 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));
1992 self->exec_argc = this->exec_argv[self->exec_argc] ?
1993 (self->exec_argc + 1) : self->exec_argc;
1994
1995 /* Copy the inputs strings since there is no guarantee they'll be
1996 * present after the call completed. */
1997 self->exec_argv0 = (self->exec_argc > 0) ?
1998 copyinstr(this->exec_argv[0]) : "";
1999 self->exec_argv1 = (self->exec_argc > 1) ?
2000 copyinstr(this->exec_argv[1]) : "";
2001 self->exec_argv2 = (self->exec_argc > 2) ?
2002 copyinstr(this->exec_argv[2]) : "";
2003 self->exec_argv3 = (self->exec_argc > 3) ?
2004 copyinstr(this->exec_argv[3]) : "";
2005 this->exec_argv = 0;
2006 }
2007 syscall::exec*:return /trackedpid[pid] && errno == 0/ {
2008 /* We need to join strings here, as using multiple printf() would
2009 * cause tearing when multiple threads/processes are traced.
2010 * Since it is impossible to escape a string and join it to another one,
2011 * like sprintf("%s%S", previous, more), use hackery.
2012 * Each of the elements are split with a \\1. \\0 cannot be used because
2013 * it is simply ignored. This will conflict with any program putting a
2014 * \\1 in their execve() string but this should be "rare enough" */
2015 this->args = "";
2016 /* Process exec_argv[0] */
2017 this->args = strjoin(
2018 this->args, (self->exec_argc > 0) ? self->exec_argv0 : "");
2019
2020 /* Process exec_argv[1] */
2021 this->args = strjoin(
2022 this->args, (self->exec_argc > 1) ? "\\1" : "");
2023 this->args = strjoin(
2024 this->args, (self->exec_argc > 1) ? self->exec_argv1 : "");
2025
2026 /* Process exec_argv[2] */
2027 this->args = strjoin(
2028 this->args, (self->exec_argc > 2) ? "\\1" : "");
2029 this->args = strjoin(
2030 this->args, (self->exec_argc > 2) ? self->exec_argv2 : "");
2031
2032 /* Process exec_argv[3] */
2033 this->args = strjoin(
2034 this->args, (self->exec_argc > 3) ? "\\1" : "");
2035 this->args = strjoin(
2036 this->args, (self->exec_argc > 3) ? self->exec_argv3 : "");
2037
2038 /* Prints self->exec_argc to permits verifying the internal
2039 * consistency since this code is quite fishy. */
2040 printf("%d %d %s(\\"%s\\", [%d, %S])\\n",
2041 logindex, pid, probefunc,
2042 self->exec_arg0,
2043 self->exec_argc,
2044 this->args);
2045 logindex++;
2046 this->args = 0;
2047 }
2048 syscall::exec*:return /trackedpid[pid]/ {
2049 self->exec_arg0 = 0;
2050 self->exec_argc = 0;
2051 self->exec_argv0 = 0;
2052 self->exec_argv1 = 0;
2053 self->exec_argv2 = 0;
2054 self->exec_argv3 = 0;
2055 }
2056 """
2057
2058 # Code currently not used.
2059 D_EXTRANEOUS = """
2060 /* This is a good learning experience, since it traces a lot of things
2061 * related to the process and child processes.
2062 * Warning: it generates a gigantic log. For example, tracing
2063 * "data/trace_inputs/child1.py --child" generates a 2mb log and takes
2064 * several minutes to execute.
2065 */
2066 /*
2067 mach_trap::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2068 printf("%d %d %s_%s() = %d\\n",
2069 logindex, pid, probeprov, probefunc, errno);
2070 logindex++;
2071 }
2072 proc::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2073 printf("%d %d %s_%s() = %d\\n",
2074 logindex, pid, probeprov, probefunc, errno);
2075 logindex++;
2076 }
2077 sched::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2078 printf("%d %d %s_%s() = %d\\n",
2079 logindex, pid, probeprov, probefunc, errno);
2080 logindex++;
2081 }
2082 syscall::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2083 printf("%d %d %s_%s() = %d\\n",
2084 logindex, pid, probeprov, probefunc, errno);
2085 logindex++;
2086 }
2087 vminfo::: /trackedpid[pid] == 1 || trackedpid[ppid]/ {
2088 printf("%d %d %s_%s() = %d\\n",
2089 logindex, pid, probeprov, probefunc, errno);
2090 logindex++;
2091 }
2092 */
2093 /* TODO(maruel): *stat* functions and friends
2094 syscall::access:return,
2095 syscall::chdir:return,
2096 syscall::chflags:return,
2097 syscall::chown:return,
2098 syscall::chroot:return,
2099 syscall::getattrlist:return,
2100 syscall::getxattr:return,
2101 syscall::lchown:return,
2102 syscall::lstat64:return,
2103 syscall::lstat:return,
2104 syscall::mkdir:return,
2105 syscall::pathconf:return,
2106 syscall::readlink:return,
2107 syscall::removexattr:return,
2108 syscall::setxattr:return,
2109 syscall::stat64:return,
2110 syscall::stat:return,
2111 syscall::truncate:return,
2112 syscall::unlink:return,
2113 syscall::utimes:return,
2114 */
2115 """
2116
2117 def __init__(self, logname, use_sudo):
2118 """Starts the log collection with dtrace.
2119
2120 Requires root access or chmod 4555 on dtrace. dtrace is asynchronous so
2121 this needs to wait for dtrace to be "warmed up".
2122 """
2123 super(Dtrace.Tracer, self).__init__(logname)
2124 # This script is used as a signal to figure out the root process.
2125 self._signal_script = create_subprocess_thunk()
2126 self._scripts_to_cleanup.append(self._signal_script)
2127 # This unique dummy temp file is used to signal the dtrace script that it
2128 # should stop as soon as all the child processes are done. A bit hackish
2129 # but works fine enough.
2130 self._dummy_file_id, self._dummy_file_name = tempfile.mkstemp(
2131 prefix='trace_signal_file')
2132
2133 dtrace_path = '/usr/sbin/dtrace'
2134 if not os.path.isfile(dtrace_path):
2135 dtrace_path = 'dtrace'
2136 elif use_sudo is None and (os.stat(dtrace_path).st_mode & stat.S_ISUID):
2137 # No need to sudo. For those following at home, don't do that.
2138 use_sudo = False
2139
2140 # Note: do not use the -p flag. It's useless if the initial process quits
2141 # too fast, resulting in missing traces from the grand-children. The D
2142 # code manages the dtrace lifetime itself.
2143 trace_cmd = [
2144 dtrace_path,
2145 # Use a larger buffer if getting 'out of scratch space' errors.
2146 # Ref: https://wikis.oracle.com/display/DTrace/Options+and+Tunables
2147 '-b', '10m',
2148 '-x', 'dynvarsize=10m',
2149 #'-x', 'dtrace_global_maxsize=1m',
2150 '-x', 'evaltime=exec',
2151 '-o', '/dev/stderr',
2152 '-q',
2153 '-n', self._get_dtrace_code(),
2154 ]
2155 if use_sudo is not False:
2156 trace_cmd.insert(0, 'sudo')
2157
2158 with open(self._logname + '.log', 'wb') as logfile:
2159 self._dtrace = subprocess.Popen(
2160 trace_cmd, stdout=logfile, stderr=subprocess.STDOUT)
2161 logging.debug('Started dtrace pid: %d' % self._dtrace.pid)
2162
2163 # Reads until one line is printed, which signifies dtrace is up and ready.
2164 with open(self._logname + '.log', 'rb') as logfile:
2165 while 'dtrace_BEGIN' not in logfile.readline():
2166 if self._dtrace.poll() is not None:
2167 # Do a busy wait. :/
2168 break
2169 logging.debug('dtrace started')
2170
2171 def _get_dtrace_code(self):
2172 """Setups the D code to implement child process tracking.
2173
2174 Injects the cookie in the script so it knows when to stop.
2175
2176 The script will detect any instance of the script created with
2177 create_subprocess_thunk() and will start tracing it.
2178 """
2179 out = (
2180 'inline int PID = %d;\n'
2181 'inline string SCRIPT = "%s";\n'
2182 'inline int FILE_ID = %d;\n'
2183 '\n'
2184 '%s') % (
2185 os.getpid(),
2186 self._signal_script,
2187 self._dummy_file_id,
2188 self.D_CODE)
2189 if os.environ.get('TRACE_INPUTS_DTRACE_ENABLE_EXECVE') == '1':
2190 # Do not enable by default since it tends to spew dtrace: error lines
2191 # because the execve() parameters are not in valid memory at the time of
2192 # logging.
2193 # TODO(maruel): Find a way to make this reliable since it's useful but
2194 # only works in limited/trivial uses cases for now.
2195 out += self.D_CODE_EXECVE
2196 return out
2197
2198 def trace(self, cmd, cwd, tracename, output):
2199 """Runs dtrace on an executable.
2200
2201 This dtruss is broken when it starts the process itself or when tracing
2202 child processes, this code starts a wrapper process
2203 generated with create_subprocess_thunk() which starts the executable to
2204 trace.
2205 """
2206 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
2207 assert os.path.isabs(cmd[0]), cmd[0]
2208 assert os.path.isabs(cwd), cwd
2209 assert os.path.normpath(cwd) == cwd, cwd
2210 with self._lock:
2211 if not self._initialized:
2212 raise TracingFailure(
2213 'Called Tracer.trace() on an unitialized object',
2214 None, None, None, tracename)
2215 assert tracename not in (i['trace'] for i in self._traces)
2216
2217 # Starts the script wrapper to start the child process. This signals the
2218 # dtrace script that this process is to be traced.
2219 stdout = stderr = None
2220 if output:
2221 stdout = subprocess.PIPE
2222 stderr = subprocess.STDOUT
2223 child_cmd = [
2224 sys.executable,
2225 self._signal_script,
2226 tracename,
2227 ]
2228 # Call a dummy function so that dtrace knows I'm about to launch a process
2229 # that needs to be traced.
2230 # Yummy.
2231 child = subprocess.Popen(
2232 child_cmd + tools.fix_python_path(cmd),
2233 stdin=subprocess.PIPE,
2234 stdout=stdout,
2235 stderr=stderr,
2236 cwd=cwd)
2237 logging.debug('Started child pid: %d' % child.pid)
2238
2239 out = child.communicate()[0]
2240 # This doesn't mean tracing is done, one of the grand-child process may
2241 # still be alive. It will be tracked with the dtrace script.
2242
2243 with self._lock:
2244 assert tracename not in (i['trace'] for i in self._traces)
2245 self._traces.append(
2246 {
2247 'cmd': cmd,
2248 'cwd': cwd,
2249 'pid': child.pid,
2250 'output': out,
2251 'trace': tracename,
2252 })
2253 return child.returncode, out
2254
2255 def close(self, timeout=None):
2256 """Terminates dtrace."""
2257 logging.debug('close(%s)' % timeout)
2258 try:
2259 try:
2260 super(Dtrace.Tracer, self).close(timeout)
2261 # Signal dtrace that it should stop now.
2262 # ftruncate doesn't exist on Windows.
2263 os.ftruncate(self._dummy_file_id, 0) # pylint: disable=E1101
2264 if timeout:
2265 start = time.time()
2266 # Use polling. :/
2267 while (self._dtrace.poll() is None and
2268 (time.time() - start) < timeout):
2269 time.sleep(0.1)
2270 self._dtrace.kill()
2271 self._dtrace.wait()
2272 finally:
2273 # Make sure to kill it in any case.
2274 if self._dtrace.poll() is None:
2275 try:
2276 self._dtrace.kill()
2277 self._dtrace.wait()
2278 except OSError:
2279 pass
2280
2281 if self._dtrace.returncode != 0:
2282 # Warn about any dtrace failure but basically ignore it.
2283 print 'dtrace failure: %s' % self._dtrace.returncode
2284 finally:
2285 os.close(self._dummy_file_id)
2286 os.remove(self._dummy_file_name)
2287
2288 def post_process_log(self):
2289 """Sorts the log back in order when each call occured.
2290
2291 dtrace doesn't save the buffer in strict order since it keeps one buffer
2292 per CPU.
2293 """
2294 super(Dtrace.Tracer, self).post_process_log()
2295 logname = self._logname + '.log'
2296 with open(logname, 'rb') as logfile:
2297 lines = [l for l in logfile if l.strip()]
2298 errors = [l for l in lines if l.startswith('dtrace:')]
2299 if errors:
2300 raise TracingFailure(
2301 'Found errors in the trace: %s' % '\n'.join(errors),
2302 None, None, None, logname)
2303 try:
2304 lines = sorted(lines, key=lambda l: int(l.split(' ', 1)[0]))
2305 except ValueError:
2306 raise TracingFailure(
2307 'Found errors in the trace: %s' % '\n'.join(
2308 l for l in lines if l.split(' ', 1)[0].isdigit()),
2309 None, None, None, logname)
2310 with open(logname, 'wb') as logfile:
2311 logfile.write(''.join(lines))
2312
2313 def __init__(self, use_sudo=None):
2314 super(Dtrace, self).__init__()
2315 self.use_sudo = use_sudo
2316
2317 def get_tracer(self, logname):
2318 return self.Tracer(logname, self.use_sudo)
2319
2320 @staticmethod
2321 def clean_trace(logname):
2322 for ext in ('', '.log'):
2323 if os.path.isfile(logname + ext):
2324 os.remove(logname + ext)
2325
2326 @classmethod
2327 def parse_log(cls, logname, blacklist, trace_name):
2328 logging.info('parse_log(%s, ..., %s)', logname, trace_name)
2329 assert os.path.isabs(logname)
2330
2331 def blacklist_more(filepath):
2332 # All the HFS metadata is in the form /.vol/...
2333 return blacklist(filepath) or re.match(r'^\/\.vol\/.+$', filepath)
2334
2335 data = read_json(logname)
2336 out = []
2337 for item in data['traces']:
2338 if trace_name and item['trace'] != trace_name:
2339 continue
2340 result = {
2341 'output': item['output'],
2342 'trace': item['trace'],
2343 }
2344 try:
2345 context = cls.Context(blacklist_more, item['pid'], item['cwd'])
2346 # It's fine to assume the file as UTF-8: OSX enforces the file names to
2347 # be valid UTF-8 and we control the log output.
2348 for line in codecs.open(logname + '.log', 'rb', encoding='utf-8'):
2349 context.on_line(line)
2350 result['results'] = context.to_results()
2351 except TracingFailure:
2352 result['exception'] = sys.exc_info()
2353 out.append(result)
2354 return out
2355
2356
2357 class LogmanTrace(ApiBase):
2358 """Uses the native Windows ETW based tracing functionality to trace a child
2359 process.
2360
2361 Caveat: this implementations doesn't track cwd or initial_cwd. It is because
2362 the Windows Kernel doesn't have a concept of 'current working directory' at
2363 all. A Win32 process has a map of current directories, one per drive letter
2364 and it is managed by the user mode kernel32.dll. In kernel, a file is always
2365 opened relative to another file_object or as an absolute path. All the current
2366 working directory logic is done in user mode.
2367 """
2368 class Context(ApiBase.Context):
2369 """Processes a ETW log line and keeps the list of existent and non
2370 existent files accessed.
2371
2372 Ignores directories.
2373 """
2374 # These indexes are for the stripped version in json.
2375 EVENT_NAME = 0
2376 TYPE = 1
2377 PID = 2
2378 TID = 3
2379 PROCESSOR_ID = 4
2380 TIMESTAMP = 5
2381 USER_DATA = 6
2382
2383 class Process(ApiBase.Context.Process):
2384 def __init__(self, *args):
2385 super(LogmanTrace.Context.Process, self).__init__(*args)
2386 # Handle file objects that succeeded.
2387 self.file_objects = {}
2388
2389 def __init__(self, blacklist, thunk_pid, trace_name, thunk_cmd):
2390 logging.info(
2391 '%s(%d, %s, %s)', self.__class__.__name__, thunk_pid, trace_name,
2392 thunk_cmd)
2393 super(LogmanTrace.Context, self).__init__(blacklist)
2394 self._drive_map = file_path.DosDriveMap()
2395 # Threads mapping to the corresponding process id.
2396 self._threads_active = {}
2397 # Process ID of the tracer, e.g. the temporary script created by
2398 # create_subprocess_thunk(). This is tricky because the process id may
2399 # have been reused.
2400 self._thunk_pid = thunk_pid
2401 self._thunk_cmd = thunk_cmd
2402 self._trace_name = trace_name
2403 self._line_number = 0
2404 self._thunk_process = None
2405
2406 def on_line(self, line):
2407 """Processes a json Event line."""
2408 self._line_number += 1
2409 try:
2410 # By Opcode
2411 handler = getattr(
2412 self,
2413 'handle_%s_%s' % (line[self.EVENT_NAME], line[self.TYPE]),
2414 None)
2415 if not handler:
2416 raise TracingFailure(
2417 'Unexpected event %s_%s' % (
2418 line[self.EVENT_NAME], line[self.TYPE]),
2419 None, None, None)
2420 handler(line)
2421 except TracingFailure, e:
2422 # Hack in the values since the handler could be a static function.
2423 e.pid = line[self.PID]
2424 e.line = line
2425 e.line_number = self._line_number
2426 # Re-raise the modified exception.
2427 raise
2428 except (KeyError, NotImplementedError, ValueError), e:
2429 raise TracingFailure(
2430 'Trace generated a %s exception: %s' % (
2431 e.__class__.__name__, str(e)),
2432 line[self.PID],
2433 self._line_number,
2434 line,
2435 e)
2436
2437 def to_results(self):
2438 if not self.root_process:
2439 raise TracingFailure(
2440 'Failed to detect the initial process %d' % self._thunk_pid,
2441 None, None, None)
2442 process = self.root_process.to_results_process()
2443 return Results(process)
2444
2445 def _thread_to_process(self, tid):
2446 """Finds the process from the thread id."""
2447 tid = int(tid, 16)
2448 pid = self._threads_active.get(tid)
2449 if not pid or not self._process_lookup.get(pid):
2450 return
2451 return self._process_lookup[pid]
2452
2453 @classmethod
2454 def handle_EventTrace_Header(cls, line):
2455 """Verifies no event was dropped, e.g. no buffer overrun occured."""
2456 BUFFER_SIZE = cls.USER_DATA
2457 #VERSION = cls.USER_DATA + 1
2458 #PROVIDER_VERSION = cls.USER_DATA + 2
2459 #NUMBER_OF_PROCESSORS = cls.USER_DATA + 3
2460 #END_TIME = cls.USER_DATA + 4
2461 #TIMER_RESOLUTION = cls.USER_DATA + 5
2462 #MAX_FILE_SIZE = cls.USER_DATA + 6
2463 #LOG_FILE_MODE = cls.USER_DATA + 7
2464 #BUFFERS_WRITTEN = cls.USER_DATA + 8
2465 #START_BUFFERS = cls.USER_DATA + 9
2466 #POINTER_SIZE = cls.USER_DATA + 10
2467 EVENTS_LOST = cls.USER_DATA + 11
2468 #CPU_SPEED = cls.USER_DATA + 12
2469 #LOGGER_NAME = cls.USER_DATA + 13
2470 #LOG_FILE_NAME = cls.USER_DATA + 14
2471 #BOOT_TIME = cls.USER_DATA + 15
2472 #PERF_FREQ = cls.USER_DATA + 16
2473 #START_TIME = cls.USER_DATA + 17
2474 #RESERVED_FLAGS = cls.USER_DATA + 18
2475 #BUFFERS_LOST = cls.USER_DATA + 19
2476 #SESSION_NAME_STRING = cls.USER_DATA + 20
2477 #LOG_FILE_NAME_STRING = cls.USER_DATA + 21
2478 if line[EVENTS_LOST] != '0':
2479 raise TracingFailure(
2480 ( '%s events were lost during trace, please increase the buffer '
2481 'size from %s') % (line[EVENTS_LOST], line[BUFFER_SIZE]),
2482 None, None, None)
2483
2484 def handle_FileIo_Cleanup(self, line):
2485 """General wisdom: if a file is closed, it's because it was opened.
2486
2487 Note that FileIo_Close is not used since if a file was opened properly but
2488 not closed before the process exits, only Cleanup will be logged.
2489 """
2490 #IRP = self.USER_DATA
2491 TTID = self.USER_DATA + 1 # Thread ID, that's what we want.
2492 FILE_OBJECT = self.USER_DATA + 2
2493 #FILE_KEY = self.USER_DATA + 3
2494 proc = self._thread_to_process(line[TTID])
2495 if not proc:
2496 # Not a process we care about.
2497 return
2498 file_object = line[FILE_OBJECT]
2499 if file_object in proc.file_objects:
2500 filepath, access_type = proc.file_objects.pop(file_object)
2501 proc.add_file(filepath, access_type)
2502
2503 def handle_FileIo_Create(self, line):
2504 """Handles a file open.
2505
2506 All FileIo events are described at
2507 http://msdn.microsoft.com/library/windows/desktop/aa363884.aspx
2508 for some value of 'description'.
2509
2510 " (..) process and thread id values of the IO events (..) are not valid "
2511 http://msdn.microsoft.com/magazine/ee358703.aspx
2512
2513 The FileIo.Create event doesn't return if the CreateFile() call
2514 succeeded, so keep track of the file_object and check that it is
2515 eventually closed with FileIo_Cleanup.
2516 """
2517 #IRP = self.USER_DATA
2518 TTID = self.USER_DATA + 1 # Thread ID, that's what we want.
2519 FILE_OBJECT = self.USER_DATA + 2
2520 #CREATE_OPTIONS = self.USER_DATA + 3
2521 #FILE_ATTRIBUTES = self.USER_DATA + 4
2522 #SHARE_ACCESS = self.USER_DATA + 5
2523 OPEN_PATH = self.USER_DATA + 6
2524
2525 proc = self._thread_to_process(line[TTID])
2526 if not proc:
2527 # Not a process we care about.
2528 return
2529
2530 raw_path = line[OPEN_PATH]
2531 # Ignore directories and bare drive right away.
2532 if raw_path.endswith(os.path.sep):
2533 return
2534 filepath = self._drive_map.to_win32(raw_path)
2535 # Ignore bare drive right away. Some may still fall through with format
2536 # like '\\?\X:'
2537 if len(filepath) == 2:
2538 return
2539 file_object = line[FILE_OBJECT]
2540 if os.path.isdir(filepath):
2541 # There is no O_DIRECTORY equivalent on Windows. The closed is
2542 # FILE_FLAG_BACKUP_SEMANTICS but it's not exactly right either. So
2543 # simply discard directories are they are found.
2544 return
2545 # Override any stale file object.
2546 # TODO(maruel): Figure out a way to detect if the file was opened for
2547 # reading or writting. Sadly CREATE_OPTIONS doesn't seem to be of any help
2548 # here. For now mark as None to make it clear we have no idea what it is
2549 # about.
2550 proc.file_objects[file_object] = (filepath, None)
2551
2552 def handle_FileIo_Rename(self, line):
2553 # TODO(maruel): Handle?
2554 pass
2555
2556 def handle_Process_End(self, line):
2557 pid = line[self.PID]
2558 if self._process_lookup.get(pid):
2559 logging.info('Terminated: %d' % pid)
2560 self._process_lookup[pid] = None
2561 else:
2562 logging.debug('Terminated: %d' % pid)
2563 if self._thunk_process and self._thunk_process.pid == pid:
2564 self._thunk_process = None
2565
2566 def handle_Process_Start(self, line):
2567 """Handles a new child process started by PID."""
2568 #UNIQUE_PROCESS_KEY = self.USER_DATA
2569 PROCESS_ID = self.USER_DATA + 1
2570 #PARENT_PID = self.USER_DATA + 2
2571 #SESSION_ID = self.USER_DATA + 3
2572 #EXIT_STATUS = self.USER_DATA + 4
2573 #DIRECTORY_TABLE_BASE = self.USER_DATA + 5
2574 #USER_SID = self.USER_DATA + 6
2575 IMAGE_FILE_NAME = self.USER_DATA + 7
2576 COMMAND_LINE = self.USER_DATA + 8
2577
2578 ppid = line[self.PID]
2579 pid = int(line[PROCESS_ID], 16)
2580 command_line = CommandLineToArgvW(line[COMMAND_LINE])
2581 logging.debug(
2582 'New process %d->%d (%s) %s' %
2583 (ppid, pid, line[IMAGE_FILE_NAME], command_line))
2584
2585 if pid == self._thunk_pid:
2586 # Need to ignore processes we don't know about because the log is
2587 # system-wide. self._thunk_pid shall start only one process.
2588 # This is tricky though because Windows *loves* to reuse process id and
2589 # it happens often that the process ID of the thunk script created by
2590 # create_subprocess_thunk() is reused. So just detecting the pid here is
2591 # not sufficient, we must confirm the command line.
2592 if command_line[:len(self._thunk_cmd)] != self._thunk_cmd:
2593 logging.info(
2594 'Ignoring duplicate pid %d for %s: %s while searching for %s',
2595 pid, self._trace_name, command_line, self._thunk_cmd)
2596 return
2597
2598 # TODO(maruel): The check is quite weak. Add the thunk path.
2599 if self._thunk_process:
2600 raise TracingFailure(
2601 ( 'Parent process is _thunk_pid(%d) but thunk_process(%d) is '
2602 'already set') % (self._thunk_pid, self._thunk_process.pid),
2603 None, None, None)
2604 proc = self.Process(self.blacklist, pid, None)
2605 self._thunk_process = proc
2606 return
2607 elif ppid == self._thunk_pid and self._thunk_process:
2608 proc = self.Process(self.blacklist, pid, None)
2609 self.root_process = proc
2610 ppid = None
2611 elif self._process_lookup.get(ppid):
2612 proc = self.Process(self.blacklist, pid, None)
2613 self._process_lookup[ppid].children.append(proc)
2614 else:
2615 # Ignore
2616 return
2617 self._process_lookup[pid] = proc
2618
2619 proc.command = command_line
2620 proc.executable = line[IMAGE_FILE_NAME]
2621 # proc.command[0] may be the absolute path of 'executable' but it may be
2622 # anything else too. If it happens that command[0] ends with executable,
2623 # use it, otherwise defaults to the base name.
2624 cmd0 = proc.command[0].lower()
2625 if not cmd0.endswith('.exe'):
2626 # TODO(maruel): That's not strictly true either.
2627 cmd0 += '.exe'
2628 if cmd0.endswith(proc.executable) and os.path.isfile(cmd0):
2629 # Fix the path.
2630 cmd0 = cmd0.replace('/', os.path.sep)
2631 cmd0 = os.path.normpath(cmd0)
2632 proc.executable = file_path.get_native_path_case(cmd0)
2633 logging.info(
2634 'New child: %s -> %d %s' % (ppid, pid, proc.executable))
2635
2636 def handle_Thread_End(self, line):
2637 """Has the same parameters as Thread_Start."""
2638 tid = int(line[self.TID], 16)
2639 self._threads_active.pop(tid, None)
2640
2641 def handle_Thread_Start(self, line):
2642 """Handles a new thread created.
2643
2644 Do not use self.PID here since a process' initial thread is created by
2645 the parent process.
2646 """
2647 PROCESS_ID = self.USER_DATA
2648 TTHREAD_ID = self.USER_DATA + 1
2649 #STACK_BASE = self.USER_DATA + 2
2650 #STACK_LIMIT = self.USER_DATA + 3
2651 #USER_STACK_BASE = self.USER_DATA + 4
2652 #USER_STACK_LIMIT = self.USER_DATA + 5
2653 #AFFINITY = self.USER_DATA + 6
2654 #WIN32_START_ADDR = self.USER_DATA + 7
2655 #TEB_BASE = self.USER_DATA + 8
2656 #SUB_PROCESS_TAG = self.USER_DATA + 9
2657 #BASE_PRIORITY = self.USER_DATA + 10
2658 #PAGE_PRIORITY = self.USER_DATA + 11
2659 #IO_PRIORITY = self.USER_DATA + 12
2660 #THREAD_FLAGS = self.USER_DATA + 13
2661 # Do not use self.PID here since a process' initial thread is created by
2662 # the parent process.
2663 pid = int(line[PROCESS_ID], 16)
2664 tid = int(line[TTHREAD_ID], 16)
2665 logging.debug('New thread pid:%d, tid:%d' % (pid, tid))
2666 self._threads_active[tid] = pid
2667
2668 @classmethod
2669 def supported_events(cls):
2670 """Returns all the procesed events."""
2671 out = []
2672 for member in dir(cls):
2673 match = re.match(r'^handle_([A-Za-z]+)_([A-Za-z]+)$', member)
2674 if match:
2675 out.append(match.groups())
2676 return out
2677
2678 class Tracer(ApiBase.Tracer):
2679 # The basic headers.
2680 EXPECTED_HEADER = [
2681 u'Event Name',
2682 u'Type',
2683 u'Event ID',
2684 u'Version',
2685 u'Channel',
2686 u'Level', # 5
2687 u'Opcode',
2688 u'Task',
2689 u'Keyword',
2690 u'PID',
2691 u'TID', # 10
2692 u'Processor Number',
2693 u'Instance ID',
2694 u'Parent Instance ID',
2695 u'Activity ID',
2696 u'Related Activity ID', # 15
2697 u'Clock-Time',
2698 u'Kernel(ms)', # Both have a resolution of ~15ms which makes them
2699 u'User(ms)', # pretty much useless.
2700 u'User Data', # Extra arguments that are event-specific.
2701 ]
2702 # Only the useful headers common to all entries are listed there. Any column
2703 # at 19 or higher is dependent on the specific event.
2704 EVENT_NAME = 0
2705 TYPE = 1
2706 PID = 9
2707 TID = 10
2708 PROCESSOR_ID = 11
2709 TIMESTAMP = 16
2710 NULL_GUID = '{00000000-0000-0000-0000-000000000000}'
2711 USER_DATA = 19
2712
2713 class CsvReader(object):
2714 """CSV reader that reads files generated by tracerpt.exe.
2715
2716 csv.reader() fails to read them properly, it mangles file names quoted
2717 with "" with a comma in it.
2718 """
2719 # 0. Had a ',' or one of the following ' ' after a comma, next should
2720 # be ' ', '"' or string or ',' for an empty field.
2721 ( HAD_DELIMITER,
2722 # 1. Processing an unquoted field up to ','.
2723 IN_STR,
2724 # 2. Processing a new field starting with '"'.
2725 STARTING_STR_QUOTED,
2726 # 3. Second quote in a row at the start of a field. It could be either
2727 # '""foo""' or '""'. Who the hell thought it was a great idea to use
2728 # the same character for delimiting and escaping?
2729 STARTING_SECOND_QUOTE,
2730 # 4. A quote inside a quoted string where the previous character was
2731 # not a quote, so the string is not empty. Can be either: end of a
2732 # quoted string (a delimiter) or a quote escape. The next char must be
2733 # either '"' or ','.
2734 HAD_QUOTE_IN_QUOTED,
2735 # 5. Second quote inside a quoted string.
2736 HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED,
2737 # 6. Processing a field that started with '"'.
2738 IN_STR_QUOTED) = range(7)
2739
2740 def __init__(self, f):
2741 self.f = f
2742
2743 def __iter__(self):
2744 return self
2745
2746 def next(self):
2747 """Splits the line in fields."""
2748 line = self.f.readline()
2749 if not line:
2750 raise StopIteration()
2751 line = line.strip()
2752 fields = []
2753 state = self.HAD_DELIMITER
2754 for i, c in enumerate(line):
2755 if state == self.HAD_DELIMITER:
2756 if c == ',':
2757 # Empty field.
2758 fields.append('')
2759 elif c == ' ':
2760 # Ignore initial whitespaces
2761 pass
2762 elif c == '"':
2763 state = self.STARTING_STR_QUOTED
2764 fields.append('')
2765 else:
2766 # Start of a new field.
2767 state = self.IN_STR
2768 fields.append(c)
2769
2770 elif state == self.IN_STR:
2771 # Do not accept quote inside unquoted field.
2772 assert c != '"', (i, c, line, fields)
2773 if c == ',':
2774 fields[-1] = fields[-1].strip()
2775 state = self.HAD_DELIMITER
2776 else:
2777 fields[-1] = fields[-1] + c
2778
2779 elif state == self.STARTING_STR_QUOTED:
2780 if c == '"':
2781 # Do not store the character yet.
2782 state = self.STARTING_SECOND_QUOTE
2783 else:
2784 state = self.IN_STR_QUOTED
2785 fields[-1] = fields[-1] + c
2786
2787 elif state == self.STARTING_SECOND_QUOTE:
2788 if c == ',':
2789 # It was an empty field. '""' == ''.
2790 state = self.HAD_DELIMITER
2791 else:
2792 fields[-1] = fields[-1] + '"' + c
2793 state = self.IN_STR_QUOTED
2794
2795 elif state == self.HAD_QUOTE_IN_QUOTED:
2796 if c == ',':
2797 # End of the string.
2798 state = self.HAD_DELIMITER
2799 elif c == '"':
2800 state = self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED
2801 else:
2802 # The previous double-quote was just an unescaped quote.
2803 fields[-1] = fields[-1] + '"' + c
2804 state = self.IN_STR_QUOTED
2805
2806 elif state == self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED:
2807 if c == ',':
2808 # End of the string.
2809 state = self.HAD_DELIMITER
2810 fields[-1] = fields[-1] + '"'
2811 else:
2812 # That's just how the logger rolls. Revert back to appending the
2813 # char and "guess" it was a quote in a double-quoted string.
2814 state = self.IN_STR_QUOTED
2815 fields[-1] = fields[-1] + '"' + c
2816
2817 elif state == self.IN_STR_QUOTED:
2818 if c == '"':
2819 # Could be a delimiter or an escape.
2820 state = self.HAD_QUOTE_IN_QUOTED
2821 else:
2822 fields[-1] = fields[-1] + c
2823
2824 if state == self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED:
2825 fields[-1] = fields[-1] + '"'
2826 else:
2827 assert state in (
2828 # Terminated with a normal field.
2829 self.IN_STR,
2830 # Terminated with an empty field.
2831 self.STARTING_SECOND_QUOTE,
2832 # Terminated with a normal quoted field.
2833 self.HAD_QUOTE_IN_QUOTED), (
2834 line, state, fields)
2835 return fields
2836
2837 def __init__(self, logname):
2838 """Starts the log collection.
2839
2840 Requires administrative access. logman.exe is synchronous so no need for a
2841 "warmup" call. 'Windows Kernel Trace' is *localized* so use its GUID
2842 instead. The GUID constant name is SystemTraceControlGuid. Lovely.
2843
2844 One can get the list of potentially interesting providers with:
2845 "logman query providers | findstr /i file"
2846 """
2847 super(LogmanTrace.Tracer, self).__init__(logname)
2848 self._signal_script = create_subprocess_thunk()
2849 self._scripts_to_cleanup.append(self._signal_script)
2850 cmd_start = [
2851 'logman.exe',
2852 'start',
2853 'NT Kernel Logger',
2854 '-p', '{9e814aad-3204-11d2-9a82-006008a86939}',
2855 # splitio,fileiocompletion,syscall,file,cswitch,img
2856 '(process,fileio,thread)',
2857 '-o', self._logname + '.etl',
2858 '-ets', # Send directly to kernel
2859 # Values extracted out of thin air.
2860 # Event Trace Session buffer size in kb.
2861 '-bs', '10240',
2862 # Number of Event Trace Session buffers.
2863 '-nb', '16', '256',
2864 ]
2865 logging.debug('Running: %s' % cmd_start)
2866 try:
2867 subprocess.check_call(
2868 cmd_start,
2869 stdin=subprocess.PIPE,
2870 stdout=subprocess.PIPE,
2871 stderr=subprocess.STDOUT)
2872 except subprocess.CalledProcessError, e:
2873 if e.returncode == -2147024891:
2874 print >> sys.stderr, 'Please restart with an elevated admin prompt'
2875 elif e.returncode == -2144337737:
2876 print >> sys.stderr, (
2877 'A kernel trace was already running, stop it and try again')
2878 raise
2879
2880 def trace(self, cmd, cwd, tracename, output):
2881 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))
2882 assert os.path.isabs(cmd[0]), cmd[0]
2883 assert os.path.isabs(cwd), cwd
2884 assert os.path.normpath(cwd) == cwd, cwd
2885 with self._lock:
2886 if not self._initialized:
2887 raise TracingFailure(
2888 'Called Tracer.trace() on an unitialized object',
2889 None, None, None, tracename)
2890 assert tracename not in (i['trace'] for i in self._traces)
2891
2892 # Use "logman -?" for help.
2893
2894 stdout = stderr = None
2895 if output:
2896 stdout = subprocess.PIPE
2897 stderr = subprocess.STDOUT
2898
2899 # Run the child process.
2900 logging.debug('Running: %s' % cmd)
2901 # Use the temporary script generated with create_subprocess_thunk() so we
2902 # have a clear pid owner. Since trace_inputs.py can be used as a library
2903 # and could trace multiple processes simultaneously, it makes it more
2904 # complex if the executable to be traced is executed directly here. It
2905 # also solves issues related to logman.exe that needs to be executed to
2906 # control the kernel trace.
2907 child_cmd = [
2908 sys.executable,
2909 self._signal_script,
2910 tracename,
2911 ]
2912 child = subprocess.Popen(
2913 child_cmd + tools.fix_python_path(cmd),
2914 cwd=cwd,
2915 stdin=subprocess.PIPE,
2916 stdout=stdout,
2917 stderr=stderr)
2918 logging.debug('Started child pid: %d' % child.pid)
2919 out = child.communicate()[0]
2920 # This doesn't mean all the grand-children are done. Sadly, we don't have
2921 # a good way to determine that.
2922
2923 with self._lock:
2924 assert tracename not in (i['trace'] for i in self._traces)
2925 self._traces.append({
2926 'cmd': cmd,
2927 'cwd': cwd,
2928 'output': out,
2929 'pid': child.pid,
2930 # Used to figure out the real process when process ids are reused.
2931 'thunk_cmd': child_cmd,
2932 'trace': tracename,
2933 })
2934
2935 return child.returncode, out
2936
2937 def close(self, _timeout=None):
2938 """Stops the kernel log collection and converts the traces to text
2939 representation.
2940 """
2941 with self._lock:
2942 try:
2943 super(LogmanTrace.Tracer, self).close()
2944 finally:
2945 cmd_stop = [
2946 'logman.exe',
2947 'stop',
2948 'NT Kernel Logger',
2949 '-ets', # Sends the command directly to the kernel.
2950 ]
2951 logging.debug('Running: %s' % cmd_stop)
2952 subprocess.check_call(
2953 cmd_stop,
2954 stdin=subprocess.PIPE,
2955 stdout=subprocess.PIPE,
2956 stderr=subprocess.STDOUT)
2957
2958 def post_process_log(self):
2959 """Converts the .etl file into .csv then into .json."""
2960 super(LogmanTrace.Tracer, self).post_process_log()
2961 logformat = 'csv'
2962 self._convert_log(logformat)
2963 self._trim_log(logformat)
2964
2965 def _gen_logdata(self):
2966 return {
2967 'format': 'csv',
2968 'traces': self._traces,
2969 }
2970
2971 def _trim_log(self, logformat):
2972 """Reduces the amount of data in original log by generating a 'reduced'
2973 log.
2974 """
2975 if logformat == 'csv_utf16':
2976 file_handle = codecs.open(
2977 self._logname + '.' + logformat, 'r', encoding='utf-16')
2978
2979 elif logformat == 'csv':
2980 assert sys.getfilesystemencoding() == 'mbcs'
2981 file_handle = codecs.open(
2982 self._logname + '.' + logformat, 'r',
2983 encoding=get_current_encoding())
2984
2985 supported_events = LogmanTrace.Context.supported_events()
2986
2987 def trim(generator):
2988 """Loads items from the generator and returns the interesting data.
2989
2990 It filters out any uninteresting line and reduce the amount of data in
2991 the trace.
2992 """
2993 for index, line in enumerate(generator):
2994 if not index:
2995 if line != self.EXPECTED_HEADER:
2996 raise TracingFailure(
2997 'Found malformed header: %s' % line,
2998 None, None, None)
2999 continue
3000 # As you can see, the CSV is full of useful non-redundant information:
3001 if (line[2] != '0' or # Event ID
3002 line[3] not in ('2', '3') or # Version
3003 line[4] != '0' or # Channel
3004 line[5] != '0' or # Level
3005 line[7] != '0' or # Task
3006 line[8] != '0x0000000000000000' or # Keyword
3007 line[12] != '' or # Instance ID
3008 line[13] != '' or # Parent Instance ID
3009 line[14] != self.NULL_GUID or # Activity ID
3010 line[15] != ''): # Related Activity ID
3011 raise TracingFailure(
3012 'Found unexpected values in line: %s' % ' '.join(line),
3013 None, None, None)
3014
3015 if (line[self.EVENT_NAME], line[self.TYPE]) not in supported_events:
3016 continue
3017
3018 yield [
3019 line[self.EVENT_NAME],
3020 line[self.TYPE],
3021 line[self.PID],
3022 line[self.TID],
3023 line[self.PROCESSOR_ID],
3024 line[self.TIMESTAMP],
3025 ] + line[self.USER_DATA:]
3026
3027 # must not convert the trim() call into a list, since it will use too much
3028 # memory for large trace. use a csv file as a workaround since the json
3029 # parser requires a complete in-memory file.
3030 with open('%s.preprocessed' % self._logname, 'wb') as f:
3031 # $ and * can't be used in file name on windows, reducing the likelihood
3032 # of having to escape a string.
3033 out = csv.writer(
3034 f, delimiter='$', quotechar='*', quoting=csv.QUOTE_MINIMAL)
3035 for line in trim(self.CsvReader(file_handle)):
3036 out.writerow([s.encode('utf-8') for s in line])
3037
3038 def _convert_log(self, logformat):
3039 """Converts the ETL trace to text representation.
3040
3041 Normally, 'csv' is sufficient. If complex scripts are used (like eastern
3042 languages), use 'csv_utf16'. If localization gets in the way, use 'xml'.
3043
3044 Arguments:
3045 - logformat: Text format to be generated, csv, csv_utf16 or xml.
3046
3047 Use "tracerpt -?" for help.
3048 """
3049 LOCALE_INVARIANT = 0x7F
3050 windll.kernel32.SetThreadLocale(LOCALE_INVARIANT)
3051 cmd_convert = [
3052 'tracerpt.exe',
3053 '-l', self._logname + '.etl',
3054 '-o', self._logname + '.' + logformat,
3055 '-gmt', # Use UTC
3056 '-y', # No prompt
3057 # Use -of XML to get the header of each items after column 19, e.g. all
3058 # the actual headers of 'User Data'.
3059 ]
3060
3061 if logformat == 'csv':
3062 # tracerpt localizes the 'Type' column, for major brainfuck
3063 # entertainment. I can't imagine any sane reason to do that.
3064 cmd_convert.extend(['-of', 'CSV'])
3065 elif logformat == 'csv_utf16':
3066 # This causes it to use UTF-16, which doubles the log size but ensures
3067 # the log is readable for non-ASCII characters.
3068 cmd_convert.extend(['-of', 'CSV', '-en', 'Unicode'])
3069 elif logformat == 'xml':
3070 cmd_convert.extend(['-of', 'XML'])
3071 else:
3072 raise ValueError('Unexpected log format \'%s\'' % logformat)
3073 logging.debug('Running: %s' % cmd_convert)
3074 # This can takes tens of minutes for large logs.
3075 # Redirects all output to stderr.
3076 subprocess.check_call(
3077 cmd_convert,
3078 stdin=subprocess.PIPE,
3079 stdout=sys.stderr,
3080 stderr=sys.stderr)
3081
3082 def __init__(self, use_sudo=False): # pylint: disable=W0613
3083 super(LogmanTrace, self).__init__()
3084 # Ignore use_sudo. It's irrelevant on Windows but kept to simplify the API.
3085
3086 @staticmethod
3087 def clean_trace(logname):
3088 for ext in ('', '.csv', '.etl', '.json', '.xml', '.preprocessed'):
3089 if os.path.isfile(logname + ext):
3090 os.remove(logname + ext)
3091
3092 @classmethod
3093 def parse_log(cls, logname, blacklist, trace_name):
3094 logging.info('parse_log(%s, ..., %s)', logname, trace_name)
3095 assert os.path.isabs(logname)
3096
3097 def blacklist_more(filepath):
3098 # All the NTFS metadata is in the form x:\$EXTEND or stuff like that.
3099 return blacklist(filepath) or re.match(r'[A-Z]\:\\\$EXTEND', filepath)
3100
3101 # Create a list of (Context, result_dict) tuples. This is necessary because
3102 # the csv file may be larger than the amount of available memory.
3103 contexes = [
3104 (
3105 cls.Context(
3106 blacklist_more, item['pid'], item['trace'], item['thunk_cmd']),
3107 {
3108 'output': item['output'],
3109 'trace': item['trace'],
3110 },
3111 )
3112 for item in read_json(logname)['traces']
3113 if not trace_name or item['trace'] == trace_name
3114 ]
3115
3116 # The log may be too large to fit in memory and it is not efficient to read
3117 # it multiple times, so multiplex the contexes instead, which is slightly
3118 # more awkward.
3119 with open('%s.preprocessed' % logname, 'rb') as f:
3120 lines = csv.reader(
3121 f, delimiter='$', quotechar='*', quoting=csv.QUOTE_MINIMAL)
3122 for encoded in lines:
3123 line = [s.decode('utf-8') for s in encoded]
3124 # Convert the PID in-place from hex.
3125 line[cls.Context.PID] = int(line[cls.Context.PID], 16)
3126 for context in contexes:
3127 if 'exception' in context[1]:
3128 continue
3129 try:
3130 context[0].on_line(line)
3131 except TracingFailure:
3132 context[1]['exception'] = sys.exc_info()
3133
3134 for context in contexes:
3135 if 'exception' in context[1]:
3136 continue
3137 context[1]['results'] = context[0].to_results()
3138
3139 return [context[1] for context in contexes]
3140
3141
3142 def get_api(**kwargs):
3143 """Returns the correct implementation for the current OS."""
3144 if sys.platform == 'cygwin':
3145 raise NotImplementedError(
3146 'Not implemented for cygwin, start the script from Win32 python')
3147 flavors = {
3148 'win32': LogmanTrace,
3149 'darwin': Dtrace,
3150 'sunos5': Dtrace,
3151 'freebsd7': Dtrace,
3152 'freebsd8': Dtrace,
3153 }
3154 # Defaults to strace.
3155 return flavors.get(sys.platform, Strace)(**kwargs)
3156
3157
3158 def extract_directories(root_dir, files, blacklist):
3159 """Detects if all the files in a directory are in |files| and if so, replace
3160 the individual files by a Results.Directory instance.
3161
3162 Takes a list of Results.File instances and returns a shorter list of
3163 Results.File and Results.Directory instances.
3164
3165 Arguments:
3166 - root_dir: Optional base directory that shouldn't be search further.
3167 - files: list of Results.File instances.
3168 - blacklist: lambda to reject unneeded files, for example r'.+\.pyc'.
3169 """
3170 logging.info(
3171 'extract_directories(%s, %d files, ...)' % (root_dir, len(files)))
3172 assert not (root_dir or '').endswith(os.path.sep), root_dir
3173 # It is important for root_dir to not be a symlinked path, make sure to call
3174 # os.path.realpath() as needed.
3175 assert not root_dir or (
3176 os.path.realpath(file_path.get_native_path_case(root_dir)) == root_dir)
3177 assert not any(isinstance(f, Results.Directory) for f in files)
3178 # Remove non existent files.
3179 files = [f for f in files if f.existent]
3180 if not files:
3181 return files
3182 # All files must share the same root, which can be None.
3183 assert len(set(f.root for f in files)) == 1, set(f.root for f in files)
3184
3185 # Creates a {directory: {filename: File}} mapping, up to root.
3186 buckets = {}
3187 if root_dir:
3188 buckets[root_dir] = {}
3189 for fileobj in files:
3190 path = fileobj.full_path
3191 directory = os.path.dirname(path)
3192 assert directory
3193 # Do not use os.path.basename() so trailing os.path.sep is kept.
3194 basename = path[len(directory)+1:]
3195 files_in_directory = buckets.setdefault(directory, {})
3196 files_in_directory[basename] = fileobj
3197 # Add all the directories recursively up to root.
3198 while True:
3199 old_d = directory
3200 directory = os.path.dirname(directory)
3201 if directory + os.path.sep == root_dir or directory == old_d:
3202 break
3203 buckets.setdefault(directory, {})
3204
3205 root_prefix = len(root_dir) + 1 if root_dir else 0
3206 for directory in sorted(buckets, reverse=True):
3207 if not os.path.isdir(directory):
3208 logging.debug(
3209 '%s was a directory but doesn\'t exist anymore; ignoring', directory)
3210 continue
3211 actual = set(f for f in os.listdir(directory) if not blacklist(f))
3212 expected = set(buckets[directory])
3213 if not (actual - expected):
3214 parent = os.path.dirname(directory)
3215 buckets[parent][os.path.basename(directory)] = Results.Directory(
3216 root_dir,
3217 directory[root_prefix:],
3218 False,
3219 sum(f.size for f in buckets[directory].itervalues()),
3220 sum(f.nb_files for f in buckets[directory].itervalues()))
3221 # Remove the whole bucket.
3222 del buckets[directory]
3223
3224 # Reverse the mapping with what remains. The original instances are returned,
3225 # so the cached meta data is kept.
3226 files = sum((x.values() for x in buckets.itervalues()), [])
3227 return sorted(files, key=lambda x: x.path)
3228
3229
3230 def trace(logfile, cmd, cwd, api, output):
3231 """Traces an executable. Returns (returncode, output) from api.
3232
3233 Arguments:
3234 - logfile: file to write to.
3235 - cmd: command to run.
3236 - cwd: current directory to start the process in.
3237 - api: a tracing api instance.
3238 - output: if True, returns output, otherwise prints it at the console.
3239 """
3240 cmd = tools.fix_python_path(cmd)
3241 api.clean_trace(logfile)
3242 with api.get_tracer(logfile) as tracer:
3243 return tracer.trace(cmd, cwd, 'default', output)
3244
3245
3246 def CMDclean(parser, args):
3247 """Cleans up traces."""
3248 options, args = parser.parse_args(args)
3249 api = get_api()
3250 api.clean_trace(options.log)
3251 return 0
3252
3253
3254 def CMDtrace(parser, args):
3255 """Traces an executable."""
3256 parser.allow_interspersed_args = False
3257 parser.add_option(
3258 '-q', '--quiet', action='store_true',
3259 help='Redirects traced executable output to /dev/null')
3260 parser.add_option(
3261 '-s', '--sudo', action='store_true',
3262 help='Use sudo when shelling out the tracer tool (ignored on Windows)')
3263 parser.add_option(
3264 '-n', '--no-sudo', action='store_false',
3265 help='Don\'t use sudo')
3266 options, args = parser.parse_args(args)
3267
3268 if not args:
3269 parser.error('Please provide a command to run')
3270
3271 if not os.path.isabs(args[0]) and os.access(args[0], os.X_OK):
3272 args[0] = os.path.abspath(args[0])
3273
3274 # options.sudo default value is None, which is to do whatever tracer defaults
3275 # do.
3276 api = get_api(use_sudo=options.sudo)
3277 return trace(options.log, args, os.getcwd(), api, options.quiet)[0]
3278
3279
3280 def CMDread(parser, args):
3281 """Reads the logs and prints the result."""
3282 parser.add_option(
3283 '-V', '--variable',
3284 nargs=2,
3285 action='append',
3286 dest='variables',
3287 metavar='VAR_NAME directory',
3288 default=[],
3289 help=('Variables to replace relative directories against. Example: '
3290 '"-v \'$HOME\' \'/home/%s\'" will replace all occurence of your '
3291 'home dir with $HOME') % getpass.getuser())
3292 parser.add_option(
3293 '--root-dir',
3294 help='Root directory to base everything off it. Anything outside of this '
3295 'this directory will not be reported')
3296 parser.add_option(
3297 '--trace-name',
3298 help='Only reads one of the trace. Defaults to reading all traces')
3299 parser.add_option(
3300 '-j', '--json', action='store_true',
3301 help='Outputs raw result data as json')
3302 parser.add_option(
3303 '--trace-blacklist', action='append', default=[],
3304 help='List of regexp to use as blacklist filter')
3305 options, args = parser.parse_args(args)
3306
3307 if options.root_dir:
3308 options.root_dir = file_path.get_native_path_case(
3309 unicode(os.path.abspath(options.root_dir)))
3310
3311 variables = dict(options.variables)
3312 api = get_api()
3313 blacklist = gen_blacklist(options.trace_blacklist)
3314 data = api.parse_log(options.log, blacklist, options.trace_name)
3315 # Process each trace.
3316 output_as_json = []
3317 try:
3318 for item in data:
3319 if 'exception' in item:
3320 # Do not abort the other traces.
3321 print >> sys.stderr, (
3322 'Trace %s: Got an exception: %s' % (
3323 item['trace'], item['exception'][1]))
3324 continue
3325 results = item['results']
3326 if options.root_dir:
3327 results = results.strip_root(options.root_dir)
3328
3329 if options.json:
3330 output_as_json.append(results.flatten())
3331 else:
3332 simplified = extract_directories(
3333 options.root_dir, results.files, blacklist)
3334 simplified = [f.replace_variables(variables) for f in simplified]
3335 if len(data) > 1:
3336 print('Trace: %s' % item['trace'])
3337 print('Total: %d' % len(results.files))
3338 print('Non existent: %d' % len(results.non_existent))
3339 for f in results.non_existent:
3340 print(' %s' % f.path)
3341 print(
3342 'Interesting: %d reduced to %d' % (
3343 len(results.existent), len(simplified)))
3344 for f in simplified:
3345 print(' %s' % f.path)
3346
3347 if options.json:
3348 write_json(sys.stdout, output_as_json, False)
3349 except KeyboardInterrupt:
3350 return 1
3351 except IOError as e:
3352 if e.errno == errno.EPIPE:
3353 # Do not print a stack trace when the output is piped to less and the user
3354 # quits before the whole output was written.
3355 return 1
3356 raise
3357 return 0
3358
3359
3360 class OptionParserTraceInputs(tools.OptionParserWithLogging):
3361 """Adds automatic --log handling."""
3362
3363 # Disable --log-file options since both --log and --log-file options are
3364 # confusing.
3365 # TODO(vadimsh): Rename --log-file or --log to something else.
3366 enable_log_file = False
3367
3368 def __init__(self, **kwargs):
3369 tools.OptionParserWithLogging.__init__(self, **kwargs)
3370 self.add_option(
3371 '-l', '--log', help='Log file to generate or read, required')
3372
3373 def parse_args(self, *args, **kwargs):
3374 """Makes sure the paths make sense.
3375
3376 On Windows, / and \ are often mixed together in a path.
3377 """
3378 options, args = tools.OptionParserWithLogging.parse_args(
3379 self, *args, **kwargs)
3380 if not options.log:
3381 self.error('Must supply a log file with -l')
3382 options.log = os.path.abspath(options.log)
3383 return options, args
3384
3385
3386 def main(argv):
3387 dispatcher = subcommand.CommandDispatcher(__name__)
3388 try:
3389 return dispatcher.execute(
3390 OptionParserTraceInputs(version=__version__), argv)
3391 except TracingFailure, e:
3392 sys.stderr.write('\nError: ')
3393 sys.stderr.write(str(e))
3394 sys.stderr.write('\n')
3395 return 1
3396
3397
3398 if __name__ == '__main__':
3399 fix_encoding.fix_encoding()
3400 tools.disable_buffering()
3401 colorama.init()
3402 sys.exit(main(sys.argv[1:]))
OLDNEW
« no previous file with comments | « swarm_client/tools/zip_run_isolated.py ('k') | swarm_client/utils/file_path.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698