trace_inputs.py - Issue 11048019: Add everything from src/tools/isolate r159537.

Side by Side Diff: trace_inputs.py

Issue 11048019: Add everything from src/tools/isolate r159537. (Closed) Base URL: https://git.chromium.org/chromium/tools/swarm_client.git@master

Patch Set: Ensure --similarity is sticky Created 8 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 #!/usr/bin/env python

	2 # coding=utf-8

	3 # Copyright (c) 2012 The Chromium Authors. All rights reserved.

	4 # Use of this source code is governed by a BSD-style license that can be

	5 # found in the LICENSE file.

	6

	7 """Traces an executable and its child processes and extract the files accessed

	8 by them.

	9

	10 The implementation uses OS-specific API. The native Kernel logger and the ETL

	11 interface is used on Windows. Dtrace is used on OSX. Strace is used otherwise.

	12 The OS-specific implementation is hidden in an 'API' interface.

	13

	14 The results are embedded in a Results instance. The tracing is done in two

	15 phases, the first is to do the actual trace and generate an

	16 implementation-specific log file. Then the log file is parsed to extract the

	17 information, including the individual child processes and the files accessed

	18 from the log.

	19 """

	20

	21 import codecs

	22 import csv

	23 import getpass

	24 import glob

	25 import json

	26 import logging

	27 import optparse

	28 import os

	29 import re

	30 import subprocess

	31 import sys

	32 import tempfile

	33 import threading

	34 import time

	35 import weakref

	36

	37 ## OS-specific imports

	38

	39 if sys.platform == 'win32':

	40 from ctypes.wintypes import byref, create_unicode_buffer, c_int, c_wchar_p

	41 from ctypes.wintypes import windll, FormatError # pylint: disable=E0611

	42 from ctypes.wintypes import GetLastError # pylint: disable=E0611

	43 elif sys.platform == 'darwin':

	44 import Carbon.File # pylint: disable=F0401

	45 import MacOS # pylint: disable=F0401

	46

	47

	48 BASE_DIR = os.path.dirname(os.path.abspath(__file__))

	49 ROOT_DIR = os.path.dirname(os.path.dirname(BASE_DIR))

	50

	51

	52 class TracingFailure(Exception):

	53 """An exception occured during tracing."""

	54 def __init__(self, description, pid, line_number, line, *args):

	55 super(TracingFailure, self).__init__(

	56 description, pid, line_number, line, *args)

	57 self.description = description

	58 self.pid = pid

	59 self.line_number = line_number

	60 self.line = line

	61 self.extra = args

	62

	63 def __str__(self):

	64 out = self.description

	65 if self.pid:

	66 out += '\npid: %d' % self.pid

	67 if self.line_number:

	68 out += '\nline: %d' % self.line_number

	69 if self.line:

	70 out += '\n%s' % self.line

	71 if self.extra:

	72 out += '\n' + ', '.join(map(str, filter(None, self.extra)))

	73 return out

	74

	75

	76 ## OS-specific functions

	77

	78 if sys.platform == 'win32':

	79 def QueryDosDevice(drive_letter):

	80 """Returns the Windows 'native' path for a DOS drive letter."""

	81 assert re.match(r'^[a-zA-Z]:$', drive_letter), drive_letter

	82 # Guesswork. QueryDosDeviceW never returns the required number of bytes.

	83 chars = 1024

	84 drive_letter = unicode(drive_letter)

	85 p = create_unicode_buffer(chars)

	86 if 0 == windll.kernel32.QueryDosDeviceW(drive_letter, p, chars):

	87 err = GetLastError()

	88 if err:

	89 # pylint: disable=E0602

	90 raise WindowsError(

	91 err,

	92 'QueryDosDevice(%s): %s (%d)' % (

	93 str(drive_letter), FormatError(err), err))

	94 return p.value

	95

	96

	97 def GetShortPathName(long_path):

	98 """Returns the Windows short path equivalent for a 'long' path."""

	99 long_path = unicode(long_path)

	100 # Adds '\\\\?\\' when given an absolute path so the MAX_PATH (260) limit is

	101 # not enforced.

	102 if os.path.isabs(long_path) and not long_path.startswith('\\\\?\\'):

	103 long_path = '\\\\?\\' + long_path

	104 chars = windll.kernel32.GetShortPathNameW(long_path, None, 0)

	105 if chars:

	106 p = create_unicode_buffer(chars)

	107 if windll.kernel32.GetShortPathNameW(long_path, p, chars):

	108 return p.value

	109

	110 err = GetLastError()

	111 if err:

	112 # pylint: disable=E0602

	113 raise WindowsError(

	114 err,

	115 'GetShortPathName(%s): %s (%d)' % (

	116 str(long_path), FormatError(err), err))

	117

	118

	119 def GetLongPathName(short_path):

	120 """Returns the Windows long path equivalent for a 'short' path."""

	121 short_path = unicode(short_path)

	122 # Adds '\\\\?\\' when given an absolute path so the MAX_PATH (260) limit is

	123 # not enforced.

	124 if os.path.isabs(short_path) and not short_path.startswith('\\\\?\\'):

	125 short_path = '\\\\?\\' + short_path

	126 chars = windll.kernel32.GetLongPathNameW(short_path, None, 0)

	127 if chars:

	128 p = create_unicode_buffer(chars)

	129 if windll.kernel32.GetLongPathNameW(short_path, p, chars):

	130 return p.value

	131

	132 err = GetLastError()

	133 if err:

	134 # pylint: disable=E0602

	135 raise WindowsError(

	136 err,

	137 'GetLongPathName(%s): %s (%d)' % (

	138 str(short_path), FormatError(err), err))

	139

	140

	141 def get_current_encoding():

	142 """Returns the 'ANSI' code page associated to the process."""

	143 return 'cp%d' % int(windll.kernel32.GetACP())

	144

	145

	146 class DosDriveMap(object):

	147 """Maps \Device\HarddiskVolumeN to N: on Windows."""

	148 # Keep one global cache.

	149 _MAPPING = {}

	150

	151 def __init__(self):

	152 """Lazy loads the cache."""

	153 if not self._MAPPING:

	154 # This is related to UNC resolver on windows. Ignore that.

	155 self._MAPPING['\\Device\\Mup'] = None

	156 self._MAPPING['\\SystemRoot'] = os.environ['SystemRoot']

	157

	158 for letter in (chr(l) for l in xrange(ord('C'), ord('Z')+1)):

	159 try:

	160 letter = '%s:' % letter

	161 mapped = QueryDosDevice(letter)

	162 if mapped in self._MAPPING:

	163 logging.warn(

	164 ('Two drives: \'%s\' and \'%s\', are mapped to the same disk'

	165 '. Drive letters are a user-mode concept and the kernel '

	166 'traces only have NT path, so all accesses will be '

	167 'associated with the first drive letter, independent of the '

	168 'actual letter used by the code') % (

	169 self._MAPPING[mapped], letter))

	170 else:

	171 self._MAPPING[mapped] = letter

	172 except WindowsError: # pylint: disable=E0602

	173 pass

	174

	175 def to_win32(self, path):

	176 """Converts a native NT path to Win32/DOS compatible path."""

	177 match = re.match(r'(^\\Device\\[a-zA-Z0-9]+)(\\.*)?$', path)

	178 if not match:

	179 raise ValueError(

	180 'Can\'t convert %s into a Win32 compatible path' % path,

	181 path)

	182 if not match.group(1) in self._MAPPING:

	183 # Unmapped partitions may be accessed by windows for the

	184 # fun of it while the test is running. Discard these.

	185 return None

	186 drive = self._MAPPING[match.group(1)]

	187 if not drive or not match.group(2):

	188 return drive

	189 return drive + match.group(2)

	190

	191

	192 def isabs(path):

	193 """Accepts X: as an absolute path, unlike python's os.path.isabs()."""

	194 return os.path.isabs(path) or len(path) == 2 and path[1] == ':'

	195

	196

	197 def get_native_path_case(p):

	198 """Returns the native path case for an existing file.

	199

	200 On Windows, removes any leading '\\?\'.

	201 """

	202 if not isabs(p):

	203 raise ValueError(

	204 'Can\'t get native path case for a non-absolute path: %s' % p,

	205 p)

	206 # Windows used to have an option to turn on case sensitivity on non Win32

	207 # subsystem but that's out of scope here and isn't supported anymore.

	208 # Go figure why GetShortPathName() is needed.

	209 try:

	210 out = GetLongPathName(GetShortPathName(p))

	211 except OSError, e:

	212 if e.args[0] in (2, 3, 5):

	213 # The path does not exist. Try to recurse and reconstruct the path.

	214 base = os.path.dirname(p)

	215 rest = os.path.basename(p)

	216 return os.path.join(get_native_path_case(base), rest)

	217 raise

	218 if out.startswith('\\\\?\\'):

	219 out = out[4:]

	220 # Always upper case the first letter since GetLongPathName() will return the

	221 # drive letter in the case it was given.

	222 return out[0].upper() + out[1:]

	223

	224

	225 def CommandLineToArgvW(command_line):

	226 """Splits a commandline into argv using CommandLineToArgvW()."""

	227 # http://msdn.microsoft.com/library/windows/desktop/bb776391.aspx

	228 size = c_int()

	229 ptr = windll.shell32.CommandLineToArgvW(unicode(command_line), byref(size))

	230 try:

	231 return [arg for arg in (c_wchar_p * size.value).from_address(ptr)]

	232 finally:

	233 windll.kernel32.LocalFree(ptr)

	234

	235

	236 elif sys.platform == 'darwin':

	237

	238

	239 # On non-windows, keep the stdlib behavior.

	240 isabs = os.path.isabs

	241

	242

	243 def _find_item_native_case(root_path, item):

	244 """Gets the native path case of a single item based at root_path.

	245

	246 There is no API to get the native path case of symlinks on OSX. So it

	247 needs to be done the slow way.

	248 """

	249 item = item.lower()

	250 for element in os.listdir(root_path):

	251 if element.lower() == item:

	252 return element

	253

	254

	255 def _native_case(p):

	256 """Gets the native path case. Warning: this function resolves symlinks."""

	257 logging.debug('native_case(%s)' % p)

	258 try:

	259 rel_ref, _ = Carbon.File.FSPathMakeRef(p)

	260 out = rel_ref.FSRefMakePath()

	261 if p.endswith(os.path.sep) and not out.endswith(os.path.sep):

	262 return out + os.path.sep

	263 return out

	264 except MacOS.Error, e:

	265 if e.args[0] in (-43, -120):

	266 # The path does not exist. Try to recurse and reconstruct the path.

	267 # -43 means file not found.

	268 # -120 means directory not found.

	269 base = os.path.dirname(p)

	270 rest = os.path.basename(p)

	271 return os.path.join(_native_case(base), rest)

	272 raise OSError(

	273 e.args[0], 'Failed to get native path for %s' % p, p, e.args[1])

	274

	275

	276 def _split_at_symlink_native(base_path, rest):

	277 """Returns the native path for a symlink."""

	278 base, symlink, rest = split_at_symlink(base_path, rest)

	279 if symlink:

	280 if not base_path:

	281 base_path = base

	282 else:

	283 base_path = safe_join(base_path, base)

	284 symlink = _find_item_native_case(base_path, symlink)

	285 return base, symlink, rest

	286

	287

	288 def get_native_path_case(path):

	289 """Returns the native path case for an existing file.

	290

	291 Technically, it's only HFS+ on OSX that is case preserving and

	292 insensitive. It's the default setting on HFS+ but can be changed.

	293 """

	294 if not isabs(path):

	295 raise ValueError(

	296 'Can\'t get native path case for a non-absolute path: %s' % path,

	297 path)

	298 if path.startswith('/dev'):

	299 # /dev is not visible from Carbon, causing an exception.

	300 return path

	301

	302 # Starts assuming there is no symlink along the path.

	303 resolved = _native_case(path)

	304 if resolved.lower() == path.lower():

	305 # This code path is incredibly faster.

	306 return resolved

	307

	308 # There was a symlink, process it.

	309 base, symlink, rest = _split_at_symlink_native(None, path)

	310 assert symlink, (path, base, symlink, rest, resolved)

	311 prev = base

	312 base = safe_join(_native_case(base), symlink)

	313 assert len(base) > len(prev)

	314 while rest:

	315 prev = base

	316 relbase, symlink, rest = _split_at_symlink_native(base, rest)

	317 base = safe_join(base, relbase)

	318 assert len(base) > len(prev), (prev, base, symlink)

	319 if symlink:

	320 base = safe_join(base, symlink)

	321 assert len(base) > len(prev), (prev, base, symlink)

	322 # Make sure no symlink was resolved.

	323 assert base.lower() == path.lower(), (base, path)

	324 return base

	325

	326

	327 else: # OSes other than Windows and OSX.

	328

	329

	330 # On non-windows, keep the stdlib behavior.

	331 isabs = os.path.isabs

	332

	333

	334 def get_native_path_case(path):

	335 """Returns the native path case for an existing file.

	336

	337 On OSes other than OSX and Windows, assume the file system is

	338 case-sensitive.

	339

	340 TODO(maruel): This is not strictly true. Implement if necessary.

	341 """

	342 if not isabs(path):

	343 raise ValueError(

	344 'Can\'t get native path case for a non-absolute path: %s' % path,

	345 path)

	346 # Give up on cygwin, as GetLongPathName() can't be called.

	347 # Linux traces tends to not be normalized so use this occasion to normalize

	348 # it. This function implementation already normalizes the path on the other

	349 # OS so this needs to be done here to be coherent between OSes.

	350 out = os.path.normpath(path)

	351 if path.endswith(os.path.sep) and not out.endswith(os.path.sep):

	352 return out + os.path.sep

	353 return out

	354

	355

	356 if sys.platform != 'win32': # All non-Windows OSes.

	357

	358

	359 def safe_join(*args):

	360 """Joins path elements like os.path.join() but doesn't abort on absolute

	361 path.

	362

	363 os.path.join('foo', '/bar') == '/bar'

	364 but safe_join('foo', '/bar') == 'foo/bar'.

	365 """

	366 out = ''

	367 for element in args:

	368 if element.startswith(os.path.sep):

	369 if out.endswith(os.path.sep):

	370 out += element[1:]

	371 else:

	372 out += element

	373 else:

	374 if out.endswith(os.path.sep):

	375 out += element

	376 else:

	377 out += os.path.sep + element

	378 return out

	379

	380

	381 def split_at_symlink(base_dir, relfile):

	382 """Scans each component of relfile and cut the string at the symlink if

	383 there is any.

	384

	385 Returns a tuple (base_path, symlink, rest), with symlink == rest == None if

	386 not symlink was found.

	387 """

	388 if base_dir:

	389 assert relfile

	390 assert os.path.isabs(base_dir)

	391 index = 0

	392 else:

	393 assert os.path.isabs(relfile)

	394 index = 1

	395

	396 def at_root(rest):

	397 if base_dir:

	398 return safe_join(base_dir, rest)

	399 return rest

	400

	401 while True:

	402 try:

	403 index = relfile.index(os.path.sep, index)

	404 except ValueError:

	405 index = len(relfile)

	406 full = at_root(relfile[:index])

	407 if os.path.islink(full):

	408 # A symlink!

	409 base = os.path.dirname(relfile[:index])

	410 symlink = os.path.basename(relfile[:index])

	411 rest = relfile[index:]

	412 logging.debug(

	413 'split_at_symlink(%s, %s) -> (%s, %s, %s)' %

	414 (base_dir, relfile, base, symlink, rest))

	415 return base, symlink, rest

	416 if index == len(relfile):

	417 break

	418 index += 1

	419 return relfile, None, None

	420

	421

	422 def fix_python_path(cmd):

	423 """Returns the fixed command line to call the right python executable."""

	424 out = cmd[:]

	425 if out[0] == 'python':

	426 out[0] = sys.executable

	427 elif out[0].endswith('.py'):

	428 out.insert(0, sys.executable)

	429 return out

	430

	431

	432 def create_thunk():

	433 handle, name = tempfile.mkstemp(prefix='trace_inputs_thunk', suffix='.py')

	434 os.write(

	435 handle,

	436 (

	437 'import subprocess\n'

	438 'import sys\n'

	439 'sys.exit(subprocess.call(sys.argv[2:]))\n'

	440 ))

	441 os.close(handle)

	442 return name

	443

	444

	445 def strace_process_quoted_arguments(text):

	446 """Extracts quoted arguments on a string and return the arguments as a list.

	447

	448 Implemented as an automaton. Supports incomplete strings in the form

	449 '"foo"...'.

	450

	451 Example:

	452 With text = '"foo", "bar"', the function will return ['foo', 'bar']

	453

	454 TODO(maruel): Implement escaping.

	455 """

	456 # All the possible states of the DFA.

	457 ( NEED_QUOTE, # Begining of a new arguments.

	458 INSIDE_STRING, # Inside an argument.

	459 ESCAPED, # Found a '\' inside a quote. Treat the next char as-is.

	460 NEED_COMMA_OR_DOT, # Right after the closing quote of an argument. Could be

	461 # a serie of 3 dots or a comma.

	462 NEED_SPACE, # Right after a comma

	463 NEED_DOT_2, # Found a dot, need a second one.

	464 NEED_DOT_3, # Found second dot, need a third one.

	465 NEED_COMMA, # Found third dot, need a comma.

	466 ) = range(8)

	467

	468 state = NEED_QUOTE

	469 out = []

	470 for index, char in enumerate(text):

	471 if char == '"':

	472 if state == NEED_QUOTE:

	473 state = INSIDE_STRING

	474 # A new argument was found.

	475 out.append('')

	476 elif state == INSIDE_STRING:

	477 # The argument is now closed.

	478 state = NEED_COMMA_OR_DOT

	479 elif state == ESCAPED:

	480 out[-1] += char

	481 state = INSIDE_STRING

	482 else:

	483 raise ValueError(

	484 'Can\'t process char at column %d for: %r' % (index, text),

	485 index,

	486 text)

	487 elif char == ',':

	488 if state in (NEED_COMMA_OR_DOT, NEED_COMMA):

	489 state = NEED_SPACE

	490 elif state == INSIDE_STRING:

	491 out[-1] += char

	492 elif state == ESCAPED:

	493 out[-1] += char

	494 state = INSIDE_STRING

	495 else:

	496 raise ValueError(

	497 'Can\'t process char at column %d for: %r' % (index, text),

	498 index,

	499 text)

	500 elif char == ' ':

	501 if state == NEED_SPACE:

	502 state = NEED_QUOTE

	503 elif state == INSIDE_STRING:

	504 out[-1] += char

	505 elif state == ESCAPED:

	506 out[-1] += char

	507 state = INSIDE_STRING

	508 else:

	509 raise ValueError(

	510 'Can\'t process char at column %d for: %r' % (index, text),

	511 index,

	512 text)

	513 elif char == '.':

	514 if state == NEED_COMMA_OR_DOT:

	515 # The string is incomplete, this mean the strace -s flag should be

	516 # increased.

	517 state = NEED_DOT_2

	518 elif state == NEED_DOT_2:

	519 state = NEED_DOT_3

	520 elif state == NEED_DOT_3:

	521 state = NEED_COMMA

	522 elif state == INSIDE_STRING:

	523 out[-1] += char

	524 elif state == ESCAPED:

	525 out[-1] += char

	526 state = INSIDE_STRING

	527 else:

	528 raise ValueError(

	529 'Can\'t process char at column %d for: %r' % (index, text),

	530 index,

	531 text)

	532 elif char == '\\':

	533 if state == ESCAPED:

	534 out[-1] += char

	535 state = INSIDE_STRING

	536 elif state == INSIDE_STRING:

	537 state = ESCAPED

	538 else:

	539 raise ValueError(

	540 'Can\'t process char at column %d for: %r' % (index, text),

	541 index,

	542 text)

	543 else:

	544 if state == INSIDE_STRING:

	545 out[-1] += char

	546 else:

	547 raise ValueError(

	548 'Can\'t process char at column %d for: %r' % (index, text),

	549 index,

	550 text)

	551 if state not in (NEED_COMMA, NEED_COMMA_OR_DOT):

	552 raise ValueError(

	553 'String is incorrectly terminated: %r' % text,

	554 text)

	555 return out

	556

	557

	558 def read_json(filepath):

	559 with open(filepath, 'r') as f:

	560 return json.load(f)

	561

	562

	563 def write_json(filepath_or_handle, data, dense):

	564 """Writes data into filepath or file handle encoded as json.

	565

	566 If dense is True, the json is packed. Otherwise, it is human readable.

	567 """

	568 if hasattr(filepath_or_handle, 'write'):

	569 if dense:

	570 filepath_or_handle.write(

	571 json.dumps(data, sort_keys=True, separators=(',',':')))

	572 else:

	573 filepath_or_handle.write(json.dumps(data, sort_keys=True, indent=2))

	574 else:

	575 with open(filepath_or_handle, 'wb') as f:

	576 if dense:

	577 json.dump(data, f, sort_keys=True, separators=(',',':'))

	578 else:

	579 json.dump(data, f, sort_keys=True, indent=2)

	580

	581

	582 class Results(object):

	583 """Results of a trace session."""

	584

	585 class _TouchedObject(object):

	586 """Something, a file or a directory, that was accessed."""

	587 def __init__(self, root, path, tainted, size, nb_files):

	588 logging.debug(

	589 '%s(%s, %s, %s, %s, %s)' %

	590 (self.__class__.__name__, root, path, tainted, size, nb_files))

	591 self.root = root

	592 self.path = path

	593 self.tainted = tainted

	594 self.nb_files = nb_files

	595 # Can be used as a cache or a default value, depending on context.

	596 self._size = size

	597 # These are cache only.

	598 self._real_path = None

	599

	600 # Check internal consistency.

	601 assert path, path

	602 assert tainted or bool(root) != bool(isabs(path)), (root, path)

	603 assert tainted or (

	604 not os.path.exists(self.full_path) or

	605 (self.full_path == get_native_path_case(self.full_path))), (

	606 tainted, self.full_path, get_native_path_case(self.full_path))

	607

	608 @property

	609 def existent(self):

	610 return self.size != -1

	611

	612 @property

	613 def full_path(self):

	614 if self.root:

	615 return os.path.join(self.root, self.path)

	616 return self.path

	617

	618 @property

	619 def real_path(self):

	620 """Returns the path with symlinks resolved."""

	621 if not self._real_path:

	622 self._real_path = os.path.realpath(self.full_path)

	623 return self._real_path

	624

	625 @property

	626 def size(self):

	627 """File's size. -1 is not existent."""

	628 if self._size is None and not self.tainted:

	629 try:

	630 self._size = os.stat(self.full_path).st_size

	631 except OSError:

	632 self._size = -1

	633 return self._size

	634

	635 def flatten(self):

	636 """Returns a dict representing this object.

	637

	638 A 'size' of 0 means the file was only touched and not read.

	639 """

	640 return {

	641 'path': self.path,

	642 'size': self.size,

	643 }

	644

	645 def replace_variables(self, variables):

	646 """Replaces the root of this File with one of the variables if it matches.

	647

	648 If a variable replacement occurs, the cloned object becomes tainted.

	649 """

	650 for variable, root_path in variables.iteritems():

	651 if self.path.startswith(root_path):

	652 return self._clone(

	653 self.root, variable + self.path[len(root_path):], True)

	654 # No need to clone, returns ourself.

	655 return self

	656

	657 def strip_root(self, root):

	658 """Returns a clone of itself with 'root' stripped off."""

	659 # Check internal consistency.

	660 assert self.tainted or (isabs(root) and root.endswith(os.path.sep)), root

	661 if not self.full_path.startswith(root):

	662 # Now try to resolve the symlinks to see if it can be reached this way.

	663 # Only try after trying without resolving symlink.

	664 if not self.real_path.startswith(root):

	665 return None

	666 path = self.real_path

	667 else:

	668 path = self.full_path

	669 return self._clone(root, path[len(root):], self.tainted)

	670

	671 def _clone(self, new_root, new_path, tainted):

	672 raise NotImplementedError(self.__class__.__name__)

	673

	674 class File(_TouchedObject):

	675 """A file that was accessed. May not be present anymore.

	676

	677 If tainted is true, it means it is not a real path anymore as a variable

	678 replacement occured.

	679

	680 If touched_only is True, this means the file was probed for existence, and

	681 it is existent, but was never _opened_. If touched_only is True, the file

	682 must have existed.

	683 """

	684 def __init__(self, root, path, tainted, size):

	685 super(Results.File, self).__init__(root, path, tainted, size, 1)

	686

	687 def _clone(self, new_root, new_path, tainted):

	688 """Clones itself keeping meta-data."""

	689 # Keep the self.size and self._real_path caches for performance reason. It

	690 # is also important when the file becomes tainted (with a variable instead

	691 # of the real path) since self.path is not an on-disk path anymore so

	692 # out._size cannot be updated.

	693 out = self.__class__(new_root, new_path, tainted, self.size)

	694 out._real_path = self._real_path

	695 return out

	696

	697 class Directory(_TouchedObject):

	698 """A directory of files. Must exist."""

	699 def __init__(self, root, path, tainted, size, nb_files):

	700 """path='.' is a valid value and must be handled appropriately."""

	701 assert not path.endswith(os.path.sep), path

	702 super(Results.Directory, self).__init__(

	703 root, path + os.path.sep, tainted, size, nb_files)

	704 # For a Directory instance, self.size is not a cache, it's an actual value

	705 # that is never modified and represents the total size of the files

	706 # contained in this directory. It is possible that the directory is empty

	707 # so that size == 0; this happens if there's only an invalid symlink in

	708 # it.

	709

	710 def flatten(self):

	711 out = super(Results.Directory, self).flatten()

	712 out['nb_files'] = self.nb_files

	713 return out

	714

	715 def _clone(self, new_root, new_path, tainted):

	716 """Clones itself keeping meta-data."""

	717 out = self.__class__(

	718 new_root,

	719 new_path.rstrip(os.path.sep),

	720 tainted,

	721 self.size,

	722 self.nb_files)

	723 out._real_path = self._real_path

	724 return out

	725

	726 class Process(object):

	727 """A process that was traced.

	728

	729 Contains references to the files accessed by this process and its children.

	730 """

	731 def __init__(self, pid, files, executable, command, initial_cwd, children):

	732 logging.debug('Process(%s, %d, ...)' % (pid, len(files)))

	733 self.pid = pid

	734 self.files = sorted(files, key=lambda x: x.path)

	735 self.children = children

	736 self.executable = executable

	737 self.command = command

	738 self.initial_cwd = initial_cwd

	739

	740 # Check internal consistency.

	741 assert len(set(f.path for f in self.files)) == len(self.files), sorted(

	742 f.path for f in self.files)

	743 assert isinstance(self.children, list)

	744 assert isinstance(self.files, list)

	745

	746 @property

	747 def all(self):

	748 for child in self.children:

	749 for i in child.all:

	750 yield i

	751 yield self

	752

	753 def flatten(self):

	754 return {

	755 'children': [c.flatten() for c in self.children],

	756 'command': self.command,

	757 'executable': self.executable,

	758 'files': [f.flatten() for f in self.files],

	759 'initial_cwd': self.initial_cwd,

	760 'pid': self.pid,

	761 }

	762

	763 def strip_root(self, root):

	764 assert isabs(root) and root.endswith(os.path.sep), root

	765 # Loads the files after since they are constructed as objects.

	766 out = self.__class__(

	767 self.pid,

	768 filter(None, (f.strip_root(root) for f in self.files)),

	769 self.executable,

	770 self.command,

	771 self.initial_cwd,

	772 [c.strip_root(root) for c in self.children])

	773 logging.debug(

	774 'strip_root(%s) %d -> %d' % (root, len(self.files), len(out.files)))

	775 return out

	776

	777 def __init__(self, process):

	778 self.process = process

	779 # Cache.

	780 self._files = None

	781

	782 def flatten(self):

	783 return {

	784 'root': self.process.flatten(),

	785 }

	786

	787 @property

	788 def files(self):

	789 if self._files is None:

	790 self._files = sorted(

	791 sum((p.files for p in self.process.all), []),

	792 key=lambda x: x.path)

	793 return self._files

	794

	795 @property

	796 def existent(self):

	797 return [f for f in self.files if f.existent]

	798

	799 @property

	800 def non_existent(self):

	801 return [f for f in self.files if not f.existent]

	802

	803 def strip_root(self, root):

	804 """Returns a clone with all the files outside the directory \|root\| removed

	805 and converts all the path to be relative paths.

	806 """

	807 # Resolve any symlink

	808 root = os.path.realpath(root)

	809 root = get_native_path_case(root).rstrip(os.path.sep) + os.path.sep

	810 logging.debug('strip_root(%s)' % root)

	811 return Results(self.process.strip_root(root))

	812

	813

	814 class ApiBase(object):

	815 """OS-agnostic API to trace a process and its children."""

	816 class Context(object):

	817 """Processes one log line at a time and keeps the list of traced processes.

	818

	819 The parsing is complicated by the fact that logs are traced out of order for

	820 strace but in-order for dtrace and logman. In addition, on Windows it is

	821 very frequent that processids are reused so a flat list cannot be used. But

	822 at the same time, it is impossible to faithfully construct a graph when the

	823 logs are processed out of order. So both a tree and a flat mapping are used,

	824 the tree is the real process tree, while the flat mapping stores the last

	825 valid process for the corresponding processid. For the strace case, the

	826 tree's head is guessed at the last moment.

	827 """

	828 class Process(object):

	829 """Keeps context for one traced child process.

	830

	831 Logs all the files this process touched. Ignores directories.

	832 """

	833 def __init__(self, blacklist, pid, initial_cwd):

	834 # Check internal consistency.

	835 assert isinstance(pid, int), repr(pid)

	836 self.pid = pid

	837 # children are Process instances.

	838 self.children = []

	839 self.initial_cwd = initial_cwd

	840 self.cwd = None

	841 self.files = set()

	842 self.only_touched = set()

	843 self.executable = None

	844 self.command = None

	845 self._blacklist = blacklist

	846

	847 def to_results_process(self):

	848 """Resolves file case sensitivity and or late-bound strings."""

	849 # When resolving files, it's normal to get dupe because a file could be

	850 # opened multiple times with different case. Resolve the deduplication

	851 # here.

	852 def fix_path(x):

	853 """Returns the native file path case.

	854

	855 Converts late-bound strings.

	856 """

	857 if not x:

	858 # Do not convert None instance to 'None'.

	859 return x

	860 # TODO(maruel): Do not upconvert to unicode here, on linux we don't

	861 # know the file path encoding so they must be treated as bytes.

	862 x = unicode(x)

	863 if os.path.isabs(x):

	864 # If the path is not absolute, which tends to happen occasionally on

	865 # Windows, it is not possible to get the native path case so ignore

	866 # that trace. It mostly happens for 'executable' value.

	867 x = get_native_path_case(x)

	868 return x

	869

	870 def fix_and_blacklist_path(x):

	871 x = fix_path(x)

	872 if not x:

	873 return

	874 # The blacklist needs to be reapplied, since path casing could

	875 # influence blacklisting.

	876 if self._blacklist(x):

	877 return

	878 return x

	879

	880 # Filters out directories. Some may have passed through.

	881 files = set(f for f in map(fix_and_blacklist_path, self.files) if f)

	882 only_touched = set(

	883 f for f in map(fix_and_blacklist_path, self.only_touched) if f)

	884 only_touched -= files

	885

	886 files = [

	887 Results.File(None, f, False, None) for f in files

	888 if not os.path.isdir(f)

	889 ]

	890 # Using 0 as size means the file's content is ignored since the file was

	891 # never opened for I/O.

	892 files.extend(

	893 Results.File(None, f, False, 0) for f in only_touched

	894 if not os.path.isdir(f)

	895 )

	896 return Results.Process(

	897 self.pid,

	898 files,

	899 fix_path(self.executable),

	900 self.command,

	901 fix_path(self.initial_cwd),

	902 [c.to_results_process() for c in self.children])

	903

	904 def add_file(self, filepath, touch_only):

	905 """Adds a file if it passes the blacklist."""

	906 if self._blacklist(unicode(filepath)):

	907 return

	908 logging.debug('add_file(%d, %s, %s)' % (self.pid, filepath, touch_only))

	909 # Note that filepath and not unicode(filepath) is added. It is because

	910 # filepath could be something else than a string, like a RelativePath

	911 # instance for dtrace logs.

	912 if touch_only:

	913 self.only_touched.add(filepath)

	914 else:

	915 self.files.add(filepath)

	916

	917 def __init__(self, blacklist):

	918 self.blacklist = blacklist

	919 # Initial process.

	920 self.root_process = None

	921 # dict to accelerate process lookup, to not have to lookup the whole graph

	922 # each time.

	923 self._process_lookup = {}

	924

	925 class Tracer(object):

	926 """During it's lifetime, the tracing subsystem is enabled."""

	927 def __init__(self, logname):

	928 self._logname = logname

	929 self._lock = threading.Lock()

	930 self._traces = []

	931 self._initialized = True

	932

	933 def trace(self, cmd, cwd, tracename, output):

	934 """Runs the OS-specific trace program on an executable.

	935

	936 Arguments:

	937 - cmd: The command (a list) to run.

	938 - cwd: Current directory to start the child process in.

	939 - tracename: Name of the trace in the logname file.

	940 - output: If False, redirects output to PIPEs.

	941

	942 Returns a tuple (resultcode, output) and updates the internal trace

	943 entries.

	944 """

	945 # The implementation adds an item to self._traces.

	946 raise NotImplementedError(self.__class__.__name__)

	947

	948 def close(self, _timeout=None):

	949 """Saves the meta-data in the logname file.

	950

	951 For kernel-based tracing, stops the tracing subsystem.

	952

	953 Must not be used manually when using 'with' construct.

	954 """

	955 with self._lock:

	956 assert self._initialized

	957 try:

	958 data = {

	959 'traces': self._traces,

	960 }

	961 write_json(self._logname, data, False)

	962 finally:

	963 self._initialized = False

	964

	965 def post_process_log(self):

	966 """Post-processes the log so it becomes faster to load afterward.

	967

	968 Must not be used manually when using 'with' construct.

	969 """

	970 assert not self._initialized, 'Must stop tracing first.'

	971

	972 def __enter__(self):

	973 """Enables 'with' statement."""

	974 return self

	975

	976 def __exit__(self, exc_type, exc_value, traceback):

	977 """Enables 'with' statement."""

	978 self.close()

	979 # If an exception was thrown, do not process logs.

	980 if not exc_type:

	981 self.post_process_log()

	982

	983 def get_tracer(self, logname):

	984 """Returns an ApiBase.Tracer instance.

	985

	986 Initializes the tracing subsystem, which is a requirement for kernel-based

	987 tracers. Only one tracer instance should be live at a time!

	988

	989 logname is the filepath to the json file that will contain the meta-data

	990 about the logs.

	991 """

	992 return self.Tracer(logname)

	993

	994 @staticmethod

	995 def clean_trace(logname):

	996 """Deletes an old log."""

	997 raise NotImplementedError()

	998

	999 @classmethod

	1000 def parse_log(cls, logname, blacklist):

	1001 """Processes trace logs and returns the files opened and the files that do

	1002 not exist.

	1003

	1004 It does not track directories.

	1005

	1006 Most of the time, files that do not exist are temporary test files that

	1007 should be put in /tmp instead. See http://crbug.com/116251.

	1008

	1009 Returns a list of dict with keys:

	1010 - results: A Results instance.

	1011 - trace: The corresponding tracename parameter provided to

	1012 get_tracer().trace().

	1013 - output: Output gathered during execution, if get_tracer().trace(...,

	1014 output=False) was used.

	1015 """

	1016 raise NotImplementedError(cls.__class__.__name__)

	1017

	1018

	1019 class Strace(ApiBase):

	1020 """strace implies linux."""

	1021 class Context(ApiBase.Context):

	1022 """Processes a strace log line and keeps the list of existent and non

	1023 existent files accessed.

	1024

	1025 Ignores directories.

	1026

	1027 Uses late-binding to processes the cwd of each process. The problem is that

	1028 strace generates one log file per process it traced but doesn't give any

	1029 information about which process was started when and by who. So we don't

	1030 even know which process is the initial one. So process the logs out of

	1031 order and use late binding with RelativePath to be able to deduce the

	1032 initial directory of each process once all the logs are parsed.

	1033 """

	1034 class Process(ApiBase.Context.Process):

	1035 """Represents the state of a process.

	1036

	1037 Contains all the information retrieved from the pid-specific log.

	1038 """

	1039 # Function names are using ([a-z_0-9]+)

	1040 # This is the most common format. function(args) = result

	1041 RE_HEADER = re.compile(r'^([a-z_0-9]+)$(.+?)$\s+= (.+)$')

	1042 # An interrupted function call, only grab the minimal header.

	1043 RE_UNFINISHED = re.compile(r'^([^\(]+)(.*) \<unfinished \.\.\.\>$')

	1044 # A resumed function call.

	1045 RE_RESUMED = re.compile(r'^<\.\.\. ([^ ]+) resumed> (.+)$')

	1046 # A process received a signal.

	1047 RE_SIGNAL = re.compile(r'^--- SIG[A-Z]+ .+ ---')

	1048 # A process didn't handle a signal. Ignore any junk appearing before,

	1049 # because the process was forcibly killed so it won't open any new file.

	1050 RE_KILLED = re.compile(

	1051 r'^.*\+\+\+ killed by ([A-Z]+)( $core dumped$)? \+\+\+$')

	1052 # The process has exited.

	1053 RE_PROCESS_EXITED = re.compile(r'^\+\+\+ exited with (\d+) \+\+\+')

	1054 # A call was canceled. Ignore any prefix.

	1055 RE_UNAVAILABLE = re.compile(r'^.\)\s= \? <unavailable>$')

	1056 # Happens when strace fails to even get the function name.

	1057 UNNAMED_FUNCTION = '????'

	1058

	1059 # Corner-case in python, a class member function decorator must not be

	1060 # @staticmethod.

	1061 def parse_args(regexp, expect_zero): # pylint: disable=E0213

	1062 """Automatically convert the str 'args' into a list of processed

	1063 arguments.

	1064

	1065 Arguments:

	1066 - regexp is used to parse args.

	1067 - expect_zero: one of True, False or None.

	1068 - True: will check for result.startswith('0') first and will ignore

	1069 the trace line completely otherwise. This is important because for

	1070 many functions, the regexp will not process if the call failed.

	1071 - False: will check for not result.startswith(('?', '-1')) for the

	1072 same reason than with True.

	1073 - None: ignore result.

	1074 """

	1075 def meta_hook(function):

	1076 assert function.__name__.startswith('handle_')

	1077 def hook(self, args, result):

	1078 if expect_zero is True and not result.startswith('0'):

	1079 return

	1080 if expect_zero is False and result.startswith(('?', '-1')):

	1081 return

	1082 match = re.match(regexp, args)

	1083 if not match:

	1084 raise TracingFailure(

	1085 'Failed to parse %s(%s) = %s' %

	1086 (function.__name__[len('handle_'):], args, result),

	1087 None, None, None)

	1088 return function(self, match.groups(), result)

	1089 return hook

	1090 return meta_hook

	1091

	1092 class RelativePath(object):

	1093 """A late-bound relative path."""

	1094 def __init__(self, parent, value):

	1095 self.parent = parent

	1096 self.value = value

	1097

	1098 def render(self):

	1099 """Returns the current directory this instance is representing.

	1100

	1101 This function is used to return the late-bound value.

	1102 """

	1103 if self.value and self.value.startswith(u'/'):

	1104 # An absolute path.

	1105 return self.value

	1106 parent = self.parent.render() if self.parent else u'<None>'

	1107 if self.value:

	1108 return os.path.normpath(os.path.join(parent, self.value))

	1109 return parent

	1110

	1111 def __unicode__(self):

	1112 """Acts as a string whenever needed."""

	1113 return unicode(self.render())

	1114

	1115 def __str__(self):

	1116 """Acts as a string whenever needed."""

	1117 return str(self.render())

	1118

	1119 def __init__(self, root, pid):

	1120 """Keeps enough information to be able to guess the original process

	1121 root.

	1122

	1123 strace doesn't store which process was the initial process. So more

	1124 information needs to be kept so the graph can be reconstructed from the

	1125 flat map.

	1126 """

	1127 logging.info('%s(%d)' % (self.__class__.__name__, pid))

	1128 super(Strace.Context.Process, self).__init__(root.blacklist, pid, None)

	1129 assert isinstance(root, ApiBase.Context)

	1130 self._root = weakref.ref(root)

	1131 # The dict key is the function name of the pending call, like 'open'

	1132 # or 'execve'.

	1133 self._pending_calls = {}

	1134 self._line_number = 0

	1135 # Current directory when the process started.

	1136 self.initial_cwd = self.RelativePath(self._root(), None)

	1137 self.parentid = None

	1138

	1139 def get_cwd(self):

	1140 """Returns the best known value of cwd."""

	1141 return self.cwd or self.initial_cwd

	1142

	1143 def render(self):

	1144 """Returns the string value of the RelativePath() object.

	1145

	1146 Used by RelativePath. Returns the initial directory and not the

	1147 current one since the current directory 'cwd' validity is time-limited.

	1148

	1149 The validity is only guaranteed once all the logs are processed.

	1150 """

	1151 return self.initial_cwd.render()

	1152

	1153 def on_line(self, line):

	1154 self._line_number += 1

	1155 if self.RE_SIGNAL.match(line):

	1156 # Ignore signals.

	1157 return

	1158

	1159 try:

	1160 match = self.RE_KILLED.match(line)

	1161 if match:

	1162 # Converts a '+++ killed by Foo +++' trace into an exit_group().

	1163 self.handle_exit_group(match.group(1), None)

	1164 return

	1165

	1166 match = self.RE_PROCESS_EXITED.match(line)

	1167 if match:

	1168 # Converts a '+++ exited with 1 +++' trace into an exit_group()

	1169 self.handle_exit_group(match.group(1), None)

	1170 return

	1171

	1172 match = self.RE_UNFINISHED.match(line)

	1173 if match:

	1174 if match.group(1) in self._pending_calls:

	1175 raise TracingFailure(

	1176 'Found two unfinished calls for the same function',

	1177 None, None, None,

	1178 self._pending_calls)

	1179 self._pending_calls[match.group(1)] = (

	1180 match.group(1) + match.group(2))

	1181 return

	1182

	1183 match = self.RE_UNAVAILABLE.match(line)

	1184 if match:

	1185 # This usually means a process was killed and a pending call was

	1186 # canceled.

	1187 # TODO(maruel): Look up the last exit_group() trace just above and

	1188 # make sure any self._pending_calls[anything] is properly flushed.

	1189 return

	1190

	1191 match = self.RE_RESUMED.match(line)

	1192 if match:

	1193 if match.group(1) not in self._pending_calls:

	1194 raise TracingFailure(

	1195 'Found a resumed call that was not logged as unfinished',

	1196 None, None, None,

	1197 self._pending_calls)

	1198 pending = self._pending_calls.pop(match.group(1))

	1199 # Reconstruct the line.

	1200 line = pending + match.group(2)

	1201

	1202 match = self.RE_HEADER.match(line)

	1203 if not match:

	1204 raise TracingFailure(

	1205 'Found an invalid line: %s' % line,

	1206 None, None, None)

	1207 if match.group(1) == self.UNNAMED_FUNCTION:

	1208 return

	1209

	1210 # It's a valid line, handle it.

	1211 handler = getattr(self, 'handle_%s' % match.group(1), None)

	1212 if not handler:

	1213 self._handle_unknown(match.group(1), match.group(2), match.group(3))

	1214 return handler(match.group(2), match.group(3))

	1215 except TracingFailure, e:

	1216 # Hack in the values since the handler could be a static function.

	1217 e.pid = self.pid

	1218 e.line = line

	1219 e.line_number = self._line_number

	1220 # Re-raise the modified exception.

	1221 raise

	1222 except (KeyError, NotImplementedError, ValueError), e:

	1223 raise TracingFailure(

	1224 'Trace generated a %s exception: %s' % (

	1225 e.__class__.__name__, str(e)),

	1226 self.pid,

	1227 self._line_number,

	1228 line,

	1229 e)

	1230

	1231 @parse_args(r'^\"(.+?)\", [FKORWX_\|]+$', True)

	1232 def handle_access(self, args, _result):

	1233 self._handle_file(args[0], True)

	1234

	1235 @parse_args(r'^\"(.+?)\"$', True)

	1236 def handle_chdir(self, args, _result):

	1237 """Updates cwd."""

	1238 self.cwd = self.RelativePath(self, args[0])

	1239 logging.debug('handle_chdir(%d, %s)' % (self.pid, self.cwd))

	1240

	1241 def handle_clone(self, _args, result):

	1242 """Transfers cwd."""

	1243 if result.startswith(('?', '-1')):

	1244 # The call failed.

	1245 return

	1246 # Update the other process right away.

	1247 childpid = int(result)

	1248 child = self._root().get_or_set_proc(childpid)

	1249 if child.parentid is not None or childpid in self.children:

	1250 raise TracingFailure(

	1251 'Found internal inconsitency in process lifetime detection '

	1252 'during a clone() call',

	1253 None, None, None)

	1254

	1255 # Copy the cwd object.

	1256 child.initial_cwd = self.get_cwd()

	1257 child.parentid = self.pid

	1258 # It is necessary because the logs are processed out of order.

	1259 self.children.append(child)

	1260

	1261 def handle_close(self, _args, _result):

	1262 pass

	1263

	1264 def handle_chmod(self, _args, _result):

	1265 pass

	1266

	1267 def handle_creat(self, _args, _result):

	1268 # Ignore files created, since they didn't need to exist.

	1269 pass

	1270

	1271 @parse_args(r'^\"(.+?)\", \[(.+)\], \[\/\* \d+ vars? \*\/\]$', True)

	1272 def handle_execve(self, args, _result):

	1273 # Even if in practice execve() doesn't returns when it succeeds, strace

	1274 # still prints '0' as the result.

	1275 filepath = args[0]

	1276 self._handle_file(filepath, False)

	1277 self.executable = self.RelativePath(self.get_cwd(), filepath)

	1278 self.command = strace_process_quoted_arguments(args[1])

	1279

	1280 def handle_exit_group(self, _args, _result):

	1281 """Removes cwd."""

	1282 self.cwd = None

	1283

	1284 def handle_fork(self, args, result):

	1285 self._handle_unknown('fork', args, result)

	1286

	1287 def handle_getcwd(self, _args, _result):

	1288 pass

	1289

	1290 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)

	1291 def handle_link(self, args, _result):

	1292 self._handle_file(args[0], False)

	1293 self._handle_file(args[1], False)

	1294

	1295 @parse_args(r'\"(.+?)\", \{.+?, \.\.\.\}', True)

	1296 def handle_lstat(self, args, _result):

	1297 self._handle_file(args[0], True)

	1298

	1299 def handle_mkdir(self, _args, _result):

	1300 pass

	1301

	1302 @parse_args(r'^\"(.*?)\", ([A-Z\_\\|]+)(\|, \d+)$', False)

	1303 def handle_open(self, args, _result):

	1304 if 'O_DIRECTORY' in args[1]:

	1305 return

	1306 self._handle_file(args[0], False)

	1307

	1308 @parse_args(r'^(\d+\|AT_FDCWD), \"(.*?)\", ([A-Z\_\\|]+)(\|, \d+)$', False)

	1309 def handle_openat(self, args, _result):

	1310 if 'O_DIRECTORY' in args[2]:

	1311 return

	1312 if args[1] == 'AT_FDCWD':

	1313 self._handle_file(args[1], False)

	1314 else:

	1315 # TODO(maruel): Implement relative open if necessary instead of the

	1316 # AT_FDCWD flag, let's hope not since this means tracking all active

	1317 # directory handles.

	1318 raise Exception('Relative open via openat not implemented.')

	1319

	1320 @parse_args(r'^\"(.+?)\", \".+?\"(\.\.\.)?, \d+$', False)

	1321 def handle_readlink(self, args, _result):

	1322 self._handle_file(args[0], False)

	1323

	1324 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)

	1325 def handle_rename(self, args, _result):

	1326 self._handle_file(args[0], False)

	1327 self._handle_file(args[1], False)

	1328

	1329 def handle_rmdir(self, _args, _result):

	1330 pass

	1331

	1332 def handle_setxattr(self, _args, _result):

	1333 pass

	1334

	1335 @parse_args(r'\"(.+?)\", \{.+?, \.\.\.\}', True)

	1336 def handle_stat(self, args, _result):

	1337 self._handle_file(args[0], True)

	1338

	1339 def handle_symlink(self, _args, _result):

	1340 pass

	1341

	1342 def handle_unlink(self, _args, _result):

	1343 # In theory, the file had to be created anyway.

	1344 pass

	1345

	1346 def handle_statfs(self, _args, _result):

	1347 pass

	1348

	1349 def handle_vfork(self, args, result):

	1350 self._handle_unknown('vfork', args, result)

	1351

	1352 @staticmethod

	1353 def _handle_unknown(function, args, result):

	1354 raise TracingFailure(

	1355 'Unexpected/unimplemented trace %s(%s)= %s' %

	1356 (function, args, result),

	1357 None, None, None)

	1358

	1359 def _handle_file(self, filepath, touch_only):

	1360 filepath = self.RelativePath(self.get_cwd(), filepath)

	1361 #assert not touch_only, unicode(filepath)

	1362 self.add_file(filepath, touch_only)

	1363

	1364 def __init__(self, blacklist, initial_cwd):

	1365 super(Strace.Context, self).__init__(blacklist)

	1366 self.initial_cwd = initial_cwd

	1367

	1368 def render(self):

	1369 """Returns the string value of the initial cwd of the root process.

	1370

	1371 Used by RelativePath.

	1372 """

	1373 return self.initial_cwd

	1374

	1375 def on_line(self, pid, line):

	1376 """Transfers control into the Process.on_line() function."""

	1377 self.get_or_set_proc(pid).on_line(line.strip())

	1378

	1379 def to_results(self):

	1380 """Finds back the root process and verify consistency."""

	1381 # TODO(maruel): Absolutely unecessary, fix me.

	1382 root = [p for p in self._process_lookup.itervalues() if not p.parentid]

	1383 if len(root) != 1:

	1384 raise TracingFailure(

	1385 'Found internal inconsitency in process lifetime detection '

	1386 'while finding the root process',

	1387 None,

	1388 None,

	1389 None,

	1390 sorted(p.pid for p in root))

	1391 self.root_process = root[0]

	1392 process = self.root_process.to_results_process()

	1393 if sorted(self._process_lookup) != sorted(p.pid for p in process.all):

	1394 raise TracingFailure(

	1395 'Found internal inconsitency in process lifetime detection '

	1396 'while looking for len(tree) == len(list)',

	1397 None,

	1398 None,

	1399 None,

	1400 sorted(self._process_lookup),

	1401 sorted(p.pid for p in process.all))

	1402 return Results(process)

	1403

	1404 def get_or_set_proc(self, pid):

	1405 """Returns the Context.Process instance for this pid or creates a new one.

	1406 """

	1407 if not pid or not isinstance(pid, int):

	1408 raise TracingFailure(

	1409 'Unpexpected value for pid: %r' % pid,

	1410 pid,

	1411 None,

	1412 None,

	1413 pid)

	1414 if pid not in self._process_lookup:

	1415 self._process_lookup[pid] = self.Process(self, pid)

	1416 return self._process_lookup[pid]

	1417

	1418 @classmethod

	1419 def traces(cls):

	1420 """Returns the list of all handled traces to pass this as an argument to

	1421 strace.

	1422 """

	1423 prefix = 'handle_'

	1424 return [i[len(prefix):] for i in dir(cls.Process) if i.startswith(prefix)]

	1425

	1426 class Tracer(ApiBase.Tracer):

	1427 MAX_LEN = 256

	1428

	1429 def trace(self, cmd, cwd, tracename, output):

	1430 """Runs strace on an executable."""

	1431 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))

	1432 assert os.path.isabs(cmd[0]), cmd[0]

	1433 assert os.path.isabs(cwd), cwd

	1434 assert os.path.normpath(cwd) == cwd, cwd

	1435 with self._lock:

	1436 if not self._initialized:

	1437 raise TracingFailure(

	1438 'Called Tracer.trace() on an unitialized object',

	1439 None, None, None, tracename)

	1440 assert tracename not in (i['trace'] for i in self._traces)

	1441 stdout = stderr = None

	1442 if output:

	1443 stdout = subprocess.PIPE

	1444 stderr = subprocess.STDOUT

	1445 # Ensure all file related APIs are hooked.

	1446 traces = ','.join(Strace.Context.traces() + ['file'])

	1447 trace_cmd = [

	1448 'strace',

	1449 '-ff',

	1450 '-s', '%d' % self.MAX_LEN,

	1451 '-e', 'trace=%s' % traces,

	1452 '-o', self._logname + '.' + tracename,

	1453 ]

	1454 child = subprocess.Popen(

	1455 trace_cmd + cmd,

	1456 cwd=cwd,

	1457 stdin=subprocess.PIPE,

	1458 stdout=stdout,

	1459 stderr=stderr)

	1460 out = child.communicate()[0]

	1461 # TODO(maruel): Walk the logs and figure out the root process would

	1462 # simplify parsing the logs a lot.

	1463 with self._lock:

	1464 assert tracename not in (i['trace'] for i in self._traces)

	1465 self._traces.append(

	1466 {

	1467 'cmd': cmd,

	1468 'cwd': cwd,

	1469 # The pid of strace process, not very useful.

	1470 'pid': child.pid,

	1471 'trace': tracename,

	1472 'output': out,

	1473 })

	1474 return child.returncode, out

	1475

	1476 @staticmethod

	1477 def clean_trace(logname):

	1478 if os.path.isfile(logname):

	1479 os.remove(logname)

	1480 # Also delete any pid specific file from previous traces.

	1481 for i in glob.iglob(logname + '.*'):

	1482 if i.rsplit('.', 1)[1].isdigit():

	1483 os.remove(i)

	1484

	1485 @classmethod

	1486 def parse_log(cls, logname, blacklist):

	1487 logging.info('parse_log(%s, %s)' % (logname, blacklist))

	1488 data = read_json(logname)

	1489 out = []

	1490 for item in data['traces']:

	1491 result = {

	1492 'trace': item['trace'],

	1493 'output': item['output'],

	1494 }

	1495 try:

	1496 context = cls.Context(blacklist, item['cwd'])

	1497 for pidfile in glob.iglob('%s.%s.*' % (logname, item['trace'])):

	1498 pid = pidfile.rsplit('.', 1)[1]

	1499 if pid.isdigit():

	1500 pid = int(pid)

	1501 # TODO(maruel): Load as utf-8

	1502 for line in open(pidfile, 'rb'):

	1503 context.on_line(pid, line)

	1504 result['results'] = context.to_results()

	1505 except TracingFailure, e:

	1506 result['exception'] = e

	1507 out.append(result)

	1508 return out

	1509

	1510

	1511 class Dtrace(ApiBase):

	1512 """Uses DTrace framework through dtrace. Requires root access.

	1513

	1514 Implies Mac OSX.

	1515

	1516 dtruss can't be used because it has compatibility issues with python.

	1517

	1518 Also, the pid->cwd handling needs to be done manually since OSX has no way to

	1519 get the absolute path of the 'cwd' dtrace variable from the probe.

	1520

	1521 Also, OSX doesn't populate curpsinfo->pr_psargs properly, see

	1522 https://discussions.apple.com/thread/1980539. So resort to handling execve()

	1523 manually.

	1524

	1525 errno is not printed in the log since this implementation currently only cares

	1526 about files that were successfully opened.

	1527 """

	1528 class Context(ApiBase.Context):

	1529 # Format: index pid function(args)

	1530 RE_HEADER = re.compile(r'^\d+ (\d+) ([a-zA-Z_\-]+)$(.*?)$$')

	1531

	1532 # Arguments parsing.

	1533 RE_DTRACE_BEGIN = re.compile(r'^\"(.+?)\"$')

	1534 RE_CHDIR = re.compile(r'^\"(.+?)\"$')

	1535 RE_EXECVE = re.compile(r'^\"(.+?)\", \[(\d+), (.+)\]$')

	1536 RE_OPEN = re.compile(r'^\"(.+?)\", (0x[0-9a-z]+), (0x[0-9a-z]+)$')

	1537 RE_PROC_START = re.compile(r'^(\d+), \"(.+?)\", (\d+)$')

	1538 RE_RENAME = re.compile(r'^\"(.+?)\", \"(.+?)\"$')

	1539

	1540 O_DIRECTORY = 0x100000

	1541

	1542 class Process(ApiBase.Context.Process):

	1543 def __init__(self, *args):

	1544 super(Dtrace.Context.Process, self).__init__(*args)

	1545 self.cwd = self.initial_cwd

	1546

	1547 def __init__(self, blacklist, tracer_pid, initial_cwd):

	1548 logging.info(

	1549 '%s(%d, %s)' % (self.__class__.__name__, tracer_pid, initial_cwd))

	1550 super(Dtrace.Context, self).__init__(blacklist)

	1551 # Process ID of the temporary script created by create_thunk().

	1552 self._tracer_pid = tracer_pid

	1553 self._initial_cwd = initial_cwd

	1554 self._line_number = 0

	1555

	1556 def on_line(self, line):

	1557 self._line_number += 1

	1558 match = self.RE_HEADER.match(line)

	1559 if not match:

	1560 raise TracingFailure(

	1561 'Found malformed line: %s' % line,

	1562 None,

	1563 self._line_number,

	1564 line)

	1565 fn = getattr(

	1566 self,

	1567 'handle_%s' % match.group(2).replace('-', '_'),

	1568 self._handle_ignored)

	1569 # It is guaranteed to succeed because of the regexp. Or at least I thought

	1570 # it would.

	1571 pid = int(match.group(1))

	1572 try:

	1573 return fn(pid, match.group(3))

	1574 except TracingFailure, e:

	1575 # Hack in the values since the handler could be a static function.

	1576 e.pid = pid

	1577 e.line = line

	1578 e.line_number = self._line_number

	1579 # Re-raise the modified exception.

	1580 raise

	1581 except (KeyError, NotImplementedError, ValueError), e:

	1582 raise TracingFailure(

	1583 'Trace generated a %s exception: %s' % (

	1584 e.__class__.__name__, str(e)),

	1585 pid,

	1586 self._line_number,

	1587 line,

	1588 e)

	1589

	1590 def to_results(self):

	1591 process = self.root_process.to_results_process()

	1592 # Internal concistency check.

	1593 if sorted(self._process_lookup) != sorted(p.pid for p in process.all):

	1594 raise TracingFailure(

	1595 'Found internal inconsitency in process lifetime detection '

	1596 'while looking for len(tree) == len(list)',

	1597 None,

	1598 None,

	1599 None,

	1600 sorted(self._process_lookup),

	1601 sorted(p.pid for p in process.all))

	1602 return Results(process)

	1603

	1604 def handle_dtrace_BEGIN(self, _pid, args):

	1605 if not self.RE_DTRACE_BEGIN.match(args):

	1606 raise TracingFailure(

	1607 'Found internal inconsitency in dtrace_BEGIN log line',

	1608 None, None, None)

	1609

	1610 def handle_proc_start(self, pid, args):

	1611 """Transfers cwd.

	1612

	1613 The dtrace script already takes care of only tracing the processes that

	1614 are child of the traced processes so there is no need to verify the

	1615 process hierarchy.

	1616 """

	1617 if pid in self._process_lookup:

	1618 raise TracingFailure(

	1619 'Found internal inconsitency in proc_start: %d started two times' %

	1620 pid,

	1621 None, None, None)

	1622 match = self.RE_PROC_START.match(args)

	1623 if not match:

	1624 raise TracingFailure(

	1625 'Failed to parse arguments: %s' % args,

	1626 None, None, None)

	1627 ppid = int(match.group(1))

	1628 if ppid == self._tracer_pid and not self.root_process:

	1629 proc = self.root_process = self.Process(

	1630 self.blacklist, pid, self._initial_cwd)

	1631 elif ppid in self._process_lookup:

	1632 proc = self.Process(self.blacklist, pid, self._process_lookup[ppid].cwd)

	1633 self._process_lookup[ppid].children.append(proc)

	1634 else:

	1635 # Another process tree, ignore.

	1636 return

	1637 self._process_lookup[pid] = proc

	1638 logging.debug(

	1639 'New child: %s -> %d cwd:%s' %

	1640 (ppid, pid, unicode(proc.initial_cwd)))

	1641

	1642 def handle_proc_exit(self, pid, _args):

	1643 """Removes cwd."""

	1644 if pid in self._process_lookup:

	1645 # self._tracer_pid is not traced itself and other traces run neither.

	1646 self._process_lookup[pid].cwd = None

	1647

	1648 def handle_execve(self, pid, args):

	1649 """Sets the process' executable.

	1650

	1651 TODO(maruel): Read command line arguments. See

	1652 https://discussions.apple.com/thread/1980539 for an example.

	1653 https://gist.github.com/1242279

	1654

	1655 Will have to put the answer at http://stackoverflow.com/questions/7556249.

	1656 :)

	1657 """

	1658 if not pid in self._process_lookup:

	1659 # Another process tree, ignore.

	1660 return

	1661 match = self.RE_EXECVE.match(args)

	1662 if not match:

	1663 raise TracingFailure(

	1664 'Failed to parse arguments: %r' % args,

	1665 None, None, None)

	1666 proc = self._process_lookup[pid]

	1667 proc.executable = match.group(1)

	1668 proc.command = self.process_escaped_arguments(match.group(3))

	1669 if int(match.group(2)) != len(proc.command):

	1670 raise TracingFailure(

	1671 'Failed to parse execve() arguments: %s' % args,

	1672 None, None, None)

	1673

	1674 def handle_chdir(self, pid, args):

	1675 """Updates cwd."""

	1676 if pid not in self._process_lookup:

	1677 # Another process tree, ignore.

	1678 return

	1679 cwd = self.RE_CHDIR.match(args).group(1)

	1680 if not cwd.startswith('/'):

	1681 cwd2 = os.path.join(self._process_lookup[pid].cwd, cwd)

	1682 logging.debug('handle_chdir(%d, %s) -> %s' % (pid, cwd, cwd2))

	1683 else:

	1684 logging.debug('handle_chdir(%d, %s)' % (pid, cwd))

	1685 cwd2 = cwd

	1686 self._process_lookup[pid].cwd = cwd2

	1687

	1688 def handle_open_nocancel(self, pid, args):

	1689 """Redirects to handle_open()."""

	1690 return self.handle_open(pid, args)

	1691

	1692 def handle_open(self, pid, args):

	1693 if pid not in self._process_lookup:

	1694 # Another process tree, ignore.

	1695 return

	1696 match = self.RE_OPEN.match(args)

	1697 if not match:

	1698 raise TracingFailure(

	1699 'Failed to parse arguments: %s' % args,

	1700 None, None, None)

	1701 flag = int(match.group(2), 16)

	1702 if self.O_DIRECTORY & flag == self.O_DIRECTORY:

	1703 # Ignore directories.

	1704 return

	1705 self._handle_file(pid, match.group(1))

	1706

	1707 def handle_rename(self, pid, args):

	1708 if pid not in self._process_lookup:

	1709 # Another process tree, ignore.

	1710 return

	1711 match = self.RE_RENAME.match(args)

	1712 if not match:

	1713 raise TracingFailure(

	1714 'Failed to parse arguments: %s' % args,

	1715 None, None, None)

	1716 self._handle_file(pid, match.group(1))

	1717 self._handle_file(pid, match.group(2))

	1718

	1719 def _handle_file(self, pid, filepath):

	1720 if not filepath.startswith('/'):

	1721 filepath = os.path.join(self._process_lookup[pid].cwd, filepath)

	1722 # We can get '..' in the path.

	1723 filepath = os.path.normpath(filepath)

	1724 # Sadly, still need to filter out directories here;

	1725 # saw open_nocancel(".", 0, 0) = 0 lines.

	1726 if os.path.isdir(filepath):

	1727 return

	1728 self._process_lookup[pid].add_file(filepath, False)

	1729

	1730 def handle_ftruncate(self, pid, args):

	1731 """Just used as a signal to kill dtrace, ignoring."""

	1732 pass

	1733

	1734 @staticmethod

	1735 def _handle_ignored(pid, args):

	1736 """Is called for all the event traces that are not handled."""

	1737 raise NotImplementedError('Please implement me')

	1738

	1739 @staticmethod

	1740 def process_escaped_arguments(text):

	1741 """Extracts escaped arguments on a string and return the arguments as a

	1742 list.

	1743

	1744 Implemented as an automaton.

	1745

	1746 Example:

	1747 With text = '\\001python2.7\\001-c\\001print(\\"hi\\")\\0', the

	1748 function will return ['python2.7', '-c', 'print("hi")]

	1749 """

	1750 if not text.endswith('\\0'):

	1751 raise ValueError('String is not null terminated: %r' % text, text)

	1752 text = text[:-2]

	1753

	1754 def unescape(x):

	1755 """Replaces '\\' with '\' and '\?' (where ? is anything) with ?."""

	1756 out = []

	1757 escaped = False

	1758 for i in x:

	1759 if i == '\\' and not escaped:

	1760 escaped = True

	1761 continue

	1762 escaped = False

	1763 out.append(i)

	1764 return ''.join(out)

	1765

	1766 return [unescape(i) for i in text.split('\\001')]

	1767

	1768 class Tracer(ApiBase.Tracer):

	1769 # pylint: disable=C0301

	1770 #

	1771 # To understand the following code, you'll want to take a look at:

	1772 # http://developers.sun.com/solaris/articles/dtrace_quickref/dtrace_quickref .html

	1773 # https://wikis.oracle.com/display/DTrace/Variables

	1774 # http://docs.oracle.com/cd/E19205-01/820-4221/

	1775 #

	1776 # 0. Dump all the valid probes into a text file. It is important, you

	1777 # want to redirect into a file and you don't want to constantly 'sudo'.

	1778 # $ sudo dtrace -l > probes.txt

	1779 #

	1780 # 1. Count the number of probes:

	1781 # $ wc -l probes.txt

	1782 # 81823 # On OSX 10.7, including 1 header line.

	1783 #

	1784 # 2. List providers, intentionally skipping all the 'syspolicy10925' and the

	1785 # likes and skipping the header with NR>1:

	1786 # $ awk 'NR>1 { print $2 }' probes.txt \| sort \| uniq \| grep -v '[[:digit:]]'

	1787 # dtrace

	1788 # fbt

	1789 # io

	1790 # ip

	1791 # lockstat

	1792 # mach_trap

	1793 # proc

	1794 # profile

	1795 # sched

	1796 # syscall

	1797 # tcp

	1798 # vminfo

	1799 #

	1800 # 3. List of valid probes:

	1801 # $ grep syscall probes.txt \| less

	1802 # or use dtrace directly:

	1803 # $ sudo dtrace -l -P syscall \| less

	1804 #

	1805 # trackedpid is an associative array where its value can be 0, 1 or 2.

	1806 # 0 is for untracked processes and is the default value for items not

	1807 # in the associative array.

	1808 # 1 is for tracked processes.

	1809 # 2 is for the script created by create_thunk() only. It is not tracked

	1810 # itself but all its decendants are.

	1811 #

	1812 # The script will kill itself only once waiting_to_die == 1 and

	1813 # current_processes == 0, so that both getlogin() was called and that

	1814 # all traced processes exited.

	1815 #

	1816 # TODO(maruel): Use cacheable predicates. See

	1817 # https://wikis.oracle.com/display/DTrace/Performance+Considerations

	1818 D_CODE = """

	1819 dtrace:::BEGIN {

	1820 waiting_to_die = 0;

	1821 current_processes = 0;

	1822 logindex = 0;

	1823 printf("%d %d %s_%s(\\"%s\\")\\n",

	1824 logindex, PID, probeprov, probename, SCRIPT);

	1825 logindex++;

	1826 }

	1827

	1828 proc:::start /trackedpid[ppid]/ {

	1829 trackedpid[pid] = 1;

	1830 current_processes += 1;

	1831 printf("%d %d %s_%s(%d, \\"%s\\", %d)\\n",

	1832 logindex, pid, probeprov, probename,

	1833 ppid,

	1834 execname,

	1835 current_processes);

	1836 logindex++;

	1837 }

	1838 /* Should use SCRIPT but there is no access to this variable at that

	1839 * point. */

	1840 proc:::start /ppid == PID && execname == "Python"/ {

	1841 trackedpid[pid] = 2;

	1842 current_processes += 1;

	1843 printf("%d %d %s_%s(%d, \\"%s\\", %d)\\n",

	1844 logindex, pid, probeprov, probename,

	1845 ppid,

	1846 execname,

	1847 current_processes);

	1848 logindex++;

	1849 }

	1850 proc:::exit /trackedpid[pid] &&

	1851 current_processes == 1 &&

	1852 waiting_to_die == 1/ {

	1853 trackedpid[pid] = 0;

	1854 current_processes -= 1;

	1855 printf("%d %d %s_%s(%d)\\n",

	1856 logindex, pid, probeprov, probename,

	1857 current_processes);

	1858 logindex++;

	1859 exit(0);

	1860 }

	1861 proc:::exit /trackedpid[pid]/ {

	1862 trackedpid[pid] = 0;

	1863 current_processes -= 1;

	1864 printf("%d %d %s_%s(%d)\\n",

	1865 logindex, pid, probeprov, probename,

	1866 current_processes);

	1867 logindex++;

	1868 }

	1869

	1870 /* Use an arcane function to detect when we need to die */

	1871 syscall::ftruncate:entry /pid == PID && arg0 == FILE_ID/ {

	1872 waiting_to_die = 1;

	1873 printf("%d %d %s()\\n", logindex, pid, probefunc);

	1874 logindex++;

	1875 }

	1876 syscall::ftruncate:entry /

	1877 pid == PID && arg0 == FILE_ID && current_processes == 0/ {

	1878 exit(0);

	1879 }

	1880

	1881 syscall::open*:entry /trackedpid[pid] == 1/ {

	1882 self->open_arg0 = arg0;

	1883 self->open_arg1 = arg1;

	1884 self->open_arg2 = arg2;

	1885 }

	1886 syscall::open*:return /trackedpid[pid] == 1 && errno == 0/ {

	1887 this->open_arg0 = copyinstr(self->open_arg0);

	1888 printf("%d %d %s(\\"%s\\", 0x%x, 0x%x)\\n",

	1889 logindex, pid, probefunc,

	1890 this->open_arg0,

	1891 self->open_arg1,

	1892 self->open_arg2);

	1893 logindex++;

	1894 this->open_arg0 = 0;

	1895 }

	1896 syscall::open*:return /trackedpid[pid] == 1/ {

	1897 self->open_arg0 = 0;

	1898 self->open_arg1 = 0;

	1899 self->open_arg2 = 0;

	1900 }

	1901

	1902 syscall::rename:entry /trackedpid[pid] == 1/ {

	1903 self->rename_arg0 = arg0;

	1904 self->rename_arg1 = arg1;

	1905 }

	1906 syscall::rename:return /trackedpid[pid] == 1 && errno == 0/ {

	1907 this->rename_arg0 = copyinstr(self->rename_arg0);

	1908 this->rename_arg1 = copyinstr(self->rename_arg1);

	1909 printf("%d %d %s(\\"%s\\", \\"%s\\")\\n",

	1910 logindex, pid, probefunc,

	1911 this->rename_arg0,

	1912 this->rename_arg1);

	1913 logindex++;

	1914 this->rename_arg0 = 0;

	1915 this->rename_arg1 = 0;

	1916 }

	1917 syscall::rename:return /trackedpid[pid] == 1/ {

	1918 self->rename_arg0 = 0;

	1919 self->rename_arg1 = 0;

	1920 }

	1921

	1922 /* Track chdir, it's painful because it is only receiving relative path.

	1923 */

	1924 syscall::chdir:entry /trackedpid[pid] == 1/ {

	1925 self->chdir_arg0 = arg0;

	1926 }

	1927 syscall::chdir:return /trackedpid[pid] == 1 && errno == 0/ {

	1928 this->chdir_arg0 = copyinstr(self->chdir_arg0);

	1929 printf("%d %d %s(\\"%s\\")\\n",

	1930 logindex, pid, probefunc,

	1931 this->chdir_arg0);

	1932 logindex++;

	1933 this->chdir_arg0 = 0;

	1934 }

	1935 syscall::chdir:return /trackedpid[pid] == 1/ {

	1936 self->chdir_arg0 = 0;

	1937 }

	1938 """

	1939

	1940 # execve-specific code, tends to throw a lot of exceptions.

	1941 D_CODE_EXECVE = """

	1942 /* Finally what we care about! */

	1943 syscall::exec*:entry /trackedpid[pid]/ {

	1944 self->exec_arg0 = copyinstr(arg0);

	1945 /* Incrementally probe for a NULL in the argv parameter of execve() to

	1946 * figure out argc. */

	1947 /* TODO(maruel): Skip the remaining copyin() when a NULL pointer was

	1948 * found. */

	1949 self->exec_argc = 0;

	1950 /* Probe for argc==1 */

	1951 this->exec_argv = (user_addr_t*)copyin(

	1952 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));

	1953 self->exec_argc = this->exec_argv[self->exec_argc] ?

	1954 (self->exec_argc + 1) : self->exec_argc;

	1955

	1956 /* Probe for argc==2 */

	1957 this->exec_argv = (user_addr_t*)copyin(

	1958 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));

	1959 self->exec_argc = this->exec_argv[self->exec_argc] ?

	1960 (self->exec_argc + 1) : self->exec_argc;

	1961

	1962 /* Probe for argc==3 */

	1963 this->exec_argv = (user_addr_t*)copyin(

	1964 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));

	1965 self->exec_argc = this->exec_argv[self->exec_argc] ?

	1966 (self->exec_argc + 1) : self->exec_argc;

	1967

	1968 /* Probe for argc==4 */

	1969 this->exec_argv = (user_addr_t*)copyin(

	1970 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));

	1971 self->exec_argc = this->exec_argv[self->exec_argc] ?

	1972 (self->exec_argc + 1) : self->exec_argc;

	1973

	1974 /* Copy the inputs strings since there is no guarantee they'll be

	1975 * present after the call completed. */

	1976 self->exec_argv0 = (self->exec_argc > 0) ?

	1977 copyinstr(this->exec_argv[0]) : "";

	1978 self->exec_argv1 = (self->exec_argc > 1) ?

	1979 copyinstr(this->exec_argv[1]) : "";

	1980 self->exec_argv2 = (self->exec_argc > 2) ?

	1981 copyinstr(this->exec_argv[2]) : "";

	1982 self->exec_argv3 = (self->exec_argc > 3) ?

	1983 copyinstr(this->exec_argv[3]) : "";

	1984 this->exec_argv = 0;

	1985 }

	1986 syscall::exec*:return /trackedpid[pid] && errno == 0/ {

	1987 /* We need to join strings here, as using multiple printf() would

	1988 * cause tearing when multiple threads/processes are traced.

	1989 * Since it is impossible to escape a string and join it to another one,

	1990 * like sprintf("%s%S", previous, more), use hackery.

	1991 * Each of the elements are split with a \\1. \\0 cannot be used because

	1992 * it is simply ignored. This will conflict with any program putting a

	1993 * \\1 in their execve() string but this should be "rare enough" */

	1994 this->args = "";

	1995 /* Process exec_argv[0] */

	1996 this->args = strjoin(

	1997 this->args, (self->exec_argc > 0) ? self->exec_argv0 : "");

	1998

	1999 /* Process exec_argv[1] */

	2000 this->args = strjoin(

	2001 this->args, (self->exec_argc > 1) ? "\\1" : "");

	2002 this->args = strjoin(

	2003 this->args, (self->exec_argc > 1) ? self->exec_argv1 : "");

	2004

	2005 /* Process exec_argv[2] */

	2006 this->args = strjoin(

	2007 this->args, (self->exec_argc > 2) ? "\\1" : "");

	2008 this->args = strjoin(

	2009 this->args, (self->exec_argc > 2) ? self->exec_argv2 : "");

	2010

	2011 /* Process exec_argv[3] */

	2012 this->args = strjoin(

	2013 this->args, (self->exec_argc > 3) ? "\\1" : "");

	2014 this->args = strjoin(

	2015 this->args, (self->exec_argc > 3) ? self->exec_argv3 : "");

	2016

	2017 /* Prints self->exec_argc to permits verifying the internal

	2018 * consistency since this code is quite fishy. */

	2019 printf("%d %d %s(\\"%s\\", [%d, %S])\\n",

	2020 logindex, pid, probefunc,

	2021 self->exec_arg0,

	2022 self->exec_argc,

	2023 this->args);

	2024 logindex++;

	2025 this->args = 0;

	2026 }

	2027 syscall::exec*:return /trackedpid[pid]/ {

	2028 self->exec_arg0 = 0;

	2029 self->exec_argc = 0;

	2030 self->exec_argv0 = 0;

	2031 self->exec_argv1 = 0;

	2032 self->exec_argv2 = 0;

	2033 self->exec_argv3 = 0;

	2034 }

	2035 """

	2036

	2037 # Code currently not used.

	2038 D_EXTRANEOUS = """

	2039 /* This is a good learning experience, since it traces a lot of things

	2040 * related to the process and child processes.

	2041 * Warning: it generates a gigantic log. For example, tracing

	2042 * "data/trace_inputs/child1.py --child" generates a 2mb log and takes

	2043 * several minutes to execute.

	2044 */

	2045 /*

	2046 mach_trap::: /trackedpid[pid] == 1 \|\| trackedpid[ppid]/ {

	2047 printf("%d %d %s_%s() = %d\\n",

	2048 logindex, pid, probeprov, probefunc, errno);

	2049 logindex++;

	2050 }

	2051 proc::: /trackedpid[pid] == 1 \|\| trackedpid[ppid]/ {

	2052 printf("%d %d %s_%s() = %d\\n",

	2053 logindex, pid, probeprov, probefunc, errno);

	2054 logindex++;

	2055 }

	2056 sched::: /trackedpid[pid] == 1 \|\| trackedpid[ppid]/ {

	2057 printf("%d %d %s_%s() = %d\\n",

	2058 logindex, pid, probeprov, probefunc, errno);

	2059 logindex++;

	2060 }

	2061 syscall::: /trackedpid[pid] == 1 \|\| trackedpid[ppid]/ {

	2062 printf("%d %d %s_%s() = %d\\n",

	2063 logindex, pid, probeprov, probefunc, errno);

	2064 logindex++;

	2065 }

	2066 vminfo::: /trackedpid[pid] == 1 \|\| trackedpid[ppid]/ {

	2067 printf("%d %d %s_%s() = %d\\n",

	2068 logindex, pid, probeprov, probefunc, errno);

	2069 logindex++;

	2070 }

	2071 */

	2072 /* TODO(maruel): stat functions and friends

	2073 syscall::access:return,

	2074 syscall::chdir:return,

	2075 syscall::chflags:return,

	2076 syscall::chown:return,

	2077 syscall::chroot:return,

	2078 syscall::getattrlist:return,

	2079 syscall::getxattr:return,

	2080 syscall::lchown:return,

	2081 syscall::lstat64:return,

	2082 syscall::lstat:return,

	2083 syscall::mkdir:return,

	2084 syscall::pathconf:return,

	2085 syscall::readlink:return,

	2086 syscall::removexattr:return,

	2087 syscall::setxattr:return,

	2088 syscall::stat64:return,

	2089 syscall::stat:return,

	2090 syscall::truncate:return,

	2091 syscall::unlink:return,

	2092 syscall::utimes:return,

	2093 */

	2094 """

	2095

	2096 def __init__(self, logname):

	2097 """Starts the log collection with dtrace.

	2098

	2099 Requires root access or chmod 4555 on dtrace. dtrace is asynchronous so

	2100 this needs to wait for dtrace to be "warmed up".

	2101 """

	2102 super(Dtrace.Tracer, self).__init__(logname)

	2103 self._script = create_thunk()

	2104 # This unique dummy temp file is used to signal the dtrace script that it

	2105 # should stop as soon as all the child processes are done. A bit hackish

	2106 # but works fine enough.

	2107 self._dummy_file_id, self._dummy_file_name = tempfile.mkstemp(

	2108 prefix='trace_signal_file')

	2109

	2110 # Note: do not use the -p flag. It's useless if the initial process quits

	2111 # too fast, resulting in missing traces from the grand-children. The D

	2112 # code manages the dtrace lifetime itself.

	2113 trace_cmd = [

	2114 'sudo',

	2115 'dtrace',

	2116 # Use a larger buffer if getting 'out of scratch space' errors.

	2117 # Ref: https://wikis.oracle.com/display/DTrace/Options+and+Tunables

	2118 '-b', '10m',

	2119 '-x', 'dynvarsize=10m',

	2120 #'-x', 'dtrace_global_maxsize=1m',

	2121 '-x', 'evaltime=exec',

	2122 '-o', '/dev/stderr',

	2123 '-q',

	2124 '-n', self._get_dtrace_code(),

	2125 ]

	2126 with open(self._logname + '.log', 'wb') as logfile:

	2127 self._dtrace = subprocess.Popen(

	2128 trace_cmd, stdout=logfile, stderr=subprocess.STDOUT)

	2129 logging.debug('Started dtrace pid: %d' % self._dtrace.pid)

	2130

	2131 # Reads until one line is printed, which signifies dtrace is up and ready.

	2132 with open(self._logname + '.log', 'rb') as logfile:

	2133 while 'dtrace_BEGIN' not in logfile.readline():

	2134 if self._dtrace.poll() is not None:

	2135 # Do a busy wait. :/

	2136 break

	2137 logging.debug('dtrace started')

	2138

	2139 def _get_dtrace_code(self):

	2140 """Setups the D code to implement child process tracking.

	2141

	2142 Injects the cookie in the script so it knows when to stop.

	2143

	2144 The script will detect any instance of the script created with

	2145 create_thunk() and will start tracing it.

	2146 """

	2147 return (

	2148 'inline int PID = %d;\n'

	2149 'inline string SCRIPT = "%s";\n'

	2150 'inline int FILE_ID = %d;\n'

	2151 '\n'

	2152 '%s') % (

	2153 os.getpid(),

	2154 self._script,

	2155 self._dummy_file_id,

	2156 self.D_CODE) + self.D_CODE_EXECVE

	2157

	2158 def trace(self, cmd, cwd, tracename, output):

	2159 """Runs dtrace on an executable.

	2160

	2161 This dtruss is broken when it starts the process itself or when tracing

	2162 child processes, this code starts a wrapper process

	2163 generated with create_thunk() which starts the executable to trace.

	2164 """

	2165 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))

	2166 assert os.path.isabs(cmd[0]), cmd[0]

	2167 assert os.path.isabs(cwd), cwd

	2168 assert os.path.normpath(cwd) == cwd, cwd

	2169 with self._lock:

	2170 if not self._initialized:

	2171 raise TracingFailure(

	2172 'Called Tracer.trace() on an unitialized object',

	2173 None, None, None, tracename)

	2174 assert tracename not in (i['trace'] for i in self._traces)

	2175

	2176 # Starts the script wrapper to start the child process. This signals the

	2177 # dtrace script that this process is to be traced.

	2178 stdout = stderr = None

	2179 if output:

	2180 stdout = subprocess.PIPE

	2181 stderr = subprocess.STDOUT

	2182 child_cmd = [

	2183 sys.executable,

	2184 self._script,

	2185 tracename,

	2186 ]

	2187 # Call a dummy function so that dtrace knows I'm about to launch a process

	2188 # that needs to be traced.

	2189 # Yummy.

	2190 child = subprocess.Popen(

	2191 child_cmd + fix_python_path(cmd),

	2192 stdin=subprocess.PIPE,

	2193 stdout=stdout,

	2194 stderr=stderr,

	2195 cwd=cwd)

	2196 logging.debug('Started child pid: %d' % child.pid)

	2197

	2198 out = child.communicate()[0]

	2199 # This doesn't mean tracing is done, one of the grand-child process may

	2200 # still be alive. It will be tracked with the dtrace script.

	2201

	2202 with self._lock:

	2203 assert tracename not in (i['trace'] for i in self._traces)

	2204 self._traces.append(

	2205 {

	2206 'cmd': cmd,

	2207 'cwd': cwd,

	2208 # The pid of strace process, not very useful.

	2209 'pid': child.pid,

	2210 'trace': tracename,

	2211 'output': out,

	2212 })

	2213 return child.returncode, out

	2214

	2215 def close(self, timeout=None):

	2216 """Terminates dtrace."""

	2217 logging.debug('close(%s)' % timeout)

	2218 try:

	2219 try:

	2220 super(Dtrace.Tracer, self).close(timeout)

	2221 # Signal dtrace that it should stop now.

	2222 os.ftruncate(self._dummy_file_id, 0)

	2223 if timeout:

	2224 start = time.time()

	2225 # Use polling. :/

	2226 while (self._dtrace.poll() is None and

	2227 (time.time() - start) < timeout):

	2228 time.sleep(0.1)

	2229 self._dtrace.kill()

	2230 self._dtrace.wait()

	2231 finally:

	2232 # Make sure to kill it in any case.

	2233 if self._dtrace.poll() is None:

	2234 try:

	2235 self._dtrace.kill()

	2236 self._dtrace.wait()

	2237 except OSError:

	2238 pass

	2239

	2240 if self._dtrace.returncode != 0:

	2241 # Warn about any dtrace failure but basically ignore it.

	2242 print 'dtrace failure: %s' % self._dtrace.returncode

	2243 finally:

	2244 os.close(self._dummy_file_id)

	2245 os.remove(self._dummy_file_name)

	2246 os.remove(self._script)

	2247

	2248 def post_process_log(self):

	2249 """Sorts the log back in order when each call occured.

	2250

	2251 dtrace doesn't save the buffer in strict order since it keeps one buffer

	2252 per CPU.

	2253 """

	2254 super(Dtrace.Tracer, self).post_process_log()

	2255 logname = self._logname + '.log'

	2256 with open(logname, 'rb') as logfile:

	2257 lines = [l for l in logfile if l.strip()]

	2258 errors = [l for l in lines if l.startswith('dtrace:')]

	2259 if errors:

	2260 raise TracingFailure(

	2261 'Found errors in the trace: %s' % '\n'.join(errors),

	2262 None, None, None, logname)

	2263 try:

	2264 lines = sorted(lines, key=lambda l: int(l.split(' ', 1)[0]))

	2265 except ValueError:

	2266 raise TracingFailure(

	2267 'Found errors in the trace: %s' % '\n'.join(

	2268 l for l in lines if l.split(' ', 1)[0].isdigit()),

	2269 None, None, None, logname)

	2270 with open(logname, 'wb') as logfile:

	2271 logfile.write(''.join(lines))

	2272

	2273 @staticmethod

	2274 def clean_trace(logname):

	2275 for ext in ('', '.log'):

	2276 if os.path.isfile(logname + ext):

	2277 os.remove(logname + ext)

	2278

	2279 @classmethod

	2280 def parse_log(cls, logname, blacklist):

	2281 logging.info('parse_log(%s, ...)' % logname)

	2282

	2283 def blacklist_more(filepath):

	2284 # All the HFS metadata is in the form /.vol/...

	2285 return blacklist(filepath) or re.match(r'^\/\.vol\/.+$', filepath)

	2286

	2287 data = read_json(logname)

	2288 out = []

	2289 for item in data['traces']:

	2290 context = cls.Context(blacklist_more, item['pid'], item['cwd'])

	2291 for line in open(logname + '.log', 'rb'):

	2292 context.on_line(line)

	2293 out.append(

	2294 {

	2295 'results': context.to_results(),

	2296 'trace': item['trace'],

	2297 'output': item['output'],

	2298 })

	2299 return out

	2300

	2301

	2302 class LogmanTrace(ApiBase):

	2303 """Uses the native Windows ETW based tracing functionality to trace a child

	2304 process.

	2305

	2306 Caveat: this implementations doesn't track cwd or initial_cwd. It is because

	2307 the Windows Kernel doesn't have a concept of 'current working directory' at

	2308 all. A Win32 process has a map of current directories, one per drive letter

	2309 and it is managed by the user mode kernel32.dll. In kernel, a file is always

	2310 opened relative to another file_object or as an absolute path. All the current

	2311 working directory logic is done in user mode.

	2312 """

	2313 class Context(ApiBase.Context):

	2314 """Processes a ETW log line and keeps the list of existent and non

	2315 existent files accessed.

	2316

	2317 Ignores directories.

	2318 """

	2319 # These indexes are for the stripped version in json.

	2320 EVENT_NAME = 0

	2321 TYPE = 1

	2322 PID = 2

	2323 TID = 3

	2324 PROCESSOR_ID = 4

	2325 TIMESTAMP = 5

	2326 USER_DATA = 6

	2327

	2328 class Process(ApiBase.Context.Process):

	2329 def __init__(self, *args):

	2330 super(LogmanTrace.Context.Process, self).__init__(*args)

	2331 # Handle file objects that succeeded.

	2332 self.file_objects = {}

	2333

	2334 def __init__(self, blacklist, tracer_pid):

	2335 logging.info('%s(%d)' % (self.__class__.__name__, tracer_pid))

	2336 super(LogmanTrace.Context, self).__init__(blacklist)

	2337 self._drive_map = DosDriveMap()

	2338 # Threads mapping to the corresponding process id.

	2339 self._threads_active = {}

	2340 # Process ID of the tracer, e.g. tracer_inputs.py

	2341 self._tracer_pid = tracer_pid

	2342 self._line_number = 0

	2343

	2344 def on_line(self, line):

	2345 """Processes a json Event line."""

	2346 self._line_number += 1

	2347 try:

	2348 # By Opcode

	2349 handler = getattr(

	2350 self,

	2351 'handle_%s_%s' % (line[self.EVENT_NAME], line[self.TYPE]),

	2352 None)

	2353 if not handler:

	2354 raise TracingFailure(

	2355 'Unexpected event %s_%s' % (

	2356 line[self.EVENT_NAME], line[self.TYPE]),

	2357 None, None, None)

	2358 handler(line)

	2359 except TracingFailure, e:

	2360 # Hack in the values since the handler could be a static function.

	2361 e.pid = line[self.PID]

	2362 e.line = line

	2363 e.line_number = self._line_number

	2364 # Re-raise the modified exception.

	2365 raise

	2366 except (KeyError, NotImplementedError, ValueError), e:

	2367 raise TracingFailure(

	2368 'Trace generated a %s exception: %s' % (

	2369 e.__class__.__name__, str(e)),

	2370 line[self.PID],

	2371 self._line_number,

	2372 line,

	2373 e)

	2374

	2375 def to_results(self):

	2376 if not self.root_process:

	2377 raise TracingFailure(

	2378 'Failed to detect the initial process',

	2379 None, None, None)

	2380 process = self.root_process.to_results_process()

	2381 return Results(process)

	2382

	2383 def _thread_to_process(self, tid):

	2384 """Finds the process from the thread id."""

	2385 tid = int(tid, 16)

	2386 pid = self._threads_active.get(tid)

	2387 if not pid or not self._process_lookup.get(pid):

	2388 return

	2389 return self._process_lookup[pid]

	2390

	2391 @classmethod

	2392 def handle_EventTrace_Header(cls, line):

	2393 """Verifies no event was dropped, e.g. no buffer overrun occured."""

	2394 BUFFER_SIZE = cls.USER_DATA

	2395 #VERSION = cls.USER_DATA + 1

	2396 #PROVIDER_VERSION = cls.USER_DATA + 2

	2397 #NUMBER_OF_PROCESSORS = cls.USER_DATA + 3

	2398 #END_TIME = cls.USER_DATA + 4

	2399 #TIMER_RESOLUTION = cls.USER_DATA + 5

	2400 #MAX_FILE_SIZE = cls.USER_DATA + 6

	2401 #LOG_FILE_MODE = cls.USER_DATA + 7

	2402 #BUFFERS_WRITTEN = cls.USER_DATA + 8

	2403 #START_BUFFERS = cls.USER_DATA + 9

	2404 #POINTER_SIZE = cls.USER_DATA + 10

	2405 EVENTS_LOST = cls.USER_DATA + 11

	2406 #CPU_SPEED = cls.USER_DATA + 12

	2407 #LOGGER_NAME = cls.USER_DATA + 13

	2408 #LOG_FILE_NAME = cls.USER_DATA + 14

	2409 #BOOT_TIME = cls.USER_DATA + 15

	2410 #PERF_FREQ = cls.USER_DATA + 16

	2411 #START_TIME = cls.USER_DATA + 17

	2412 #RESERVED_FLAGS = cls.USER_DATA + 18

	2413 #BUFFERS_LOST = cls.USER_DATA + 19

	2414 #SESSION_NAME_STRING = cls.USER_DATA + 20

	2415 #LOG_FILE_NAME_STRING = cls.USER_DATA + 21

	2416 if line[EVENTS_LOST] != '0':

	2417 raise TracingFailure(

	2418 ( '%s events were lost during trace, please increase the buffer '

	2419 'size from %s') % (line[EVENTS_LOST], line[BUFFER_SIZE]),

	2420 None, None, None)

	2421

	2422 def handle_FileIo_Cleanup(self, line):

	2423 """General wisdom: if a file is closed, it's because it was opened.

	2424

	2425 Note that FileIo_Close is not used since if a file was opened properly but

	2426 not closed before the process exits, only Cleanup will be logged.

	2427 """

	2428 #IRP = self.USER_DATA

	2429 TTID = self.USER_DATA + 1 # Thread ID, that's what we want.

	2430 FILE_OBJECT = self.USER_DATA + 2

	2431 #FILE_KEY = self.USER_DATA + 3

	2432 proc = self._thread_to_process(line[TTID])

	2433 if not proc:

	2434 # Not a process we care about.

	2435 return

	2436 file_object = line[FILE_OBJECT]

	2437 if file_object in proc.file_objects:

	2438 proc.add_file(proc.file_objects.pop(file_object), False)

	2439

	2440 def handle_FileIo_Create(self, line):

	2441 """Handles a file open.

	2442

	2443 All FileIo events are described at

	2444 http://msdn.microsoft.com/library/windows/desktop/aa363884.aspx

	2445 for some value of 'description'.

	2446

	2447 " (..) process and thread id values of the IO events (..) are not valid "

	2448 http://msdn.microsoft.com/magazine/ee358703.aspx

	2449

	2450 The FileIo.Create event doesn't return if the CreateFile() call

	2451 succeeded, so keep track of the file_object and check that it is

	2452 eventually closed with FileIo_Cleanup.

	2453 """

	2454 #IRP = self.USER_DATA

	2455 TTID = self.USER_DATA + 1 # Thread ID, that's what we want.

	2456 FILE_OBJECT = self.USER_DATA + 2

	2457 #CREATE_OPTIONS = self.USER_DATA + 3

	2458 #FILE_ATTRIBUTES = self.USER_DATA + 4

	2459 #self.USER_DATA + SHARE_ACCESS = 5

	2460 OPEN_PATH = self.USER_DATA + 6

	2461

	2462 proc = self._thread_to_process(line[TTID])

	2463 if not proc:

	2464 # Not a process we care about.

	2465 return

	2466

	2467 match = re.match(r'^\"(.+)\"$', line[OPEN_PATH])

	2468 raw_path = match.group(1)

	2469 # Ignore directories and bare drive right away.

	2470 if raw_path.endswith(os.path.sep):

	2471 return

	2472 filepath = self._drive_map.to_win32(raw_path)

	2473 # Ignore bare drive right away. Some may still fall through with format

	2474 # like '\\?\X:'

	2475 if len(filepath) == 2:

	2476 return

	2477 file_object = line[FILE_OBJECT]

	2478 if os.path.isdir(filepath):

	2479 # There is no O_DIRECTORY equivalent on Windows. The closed is

	2480 # FILE_FLAG_BACKUP_SEMANTICS but it's not exactly right either. So

	2481 # simply discard directories are they are found.

	2482 return

	2483 # Override any stale file object

	2484 proc.file_objects[file_object] = filepath

	2485

	2486 def handle_FileIo_Rename(self, line):

	2487 # TODO(maruel): Handle?

	2488 pass

	2489

	2490 def handle_Process_End(self, line):

	2491 pid = line[self.PID]

	2492 if self._process_lookup.get(pid):

	2493 logging.info('Terminated: %d' % pid)

	2494 self._process_lookup[pid] = None

	2495 else:

	2496 logging.debug('Terminated: %d' % pid)

	2497

	2498 def handle_Process_Start(self, line):

	2499 """Handles a new child process started by PID."""

	2500 #UNIQUE_PROCESS_KEY = self.USER_DATA

	2501 PROCESS_ID = self.USER_DATA + 1

	2502 #PARENT_PID = self.USER_DATA + 2

	2503 #SESSION_ID = self.USER_DATA + 3

	2504 #EXIT_STATUS = self.USER_DATA + 4

	2505 #DIRECTORY_TABLE_BASE = self.USER_DATA + 5

	2506 #USER_SID = self.USER_DATA + 6

	2507 IMAGE_FILE_NAME = self.USER_DATA + 7

	2508 COMMAND_LINE = self.USER_DATA + 8

	2509

	2510 ppid = line[self.PID]

	2511 pid = int(line[PROCESS_ID], 16)

	2512 logging.debug(

	2513 'New process %d->%d (%s) %s' %

	2514 (ppid, pid, line[IMAGE_FILE_NAME], line[COMMAND_LINE]))

	2515

	2516 if ppid == self._tracer_pid:

	2517 # Need to ignore processes we don't know about because the log is

	2518 # system-wide. self._tracer_pid shall start only one process.

	2519 if self.root_process:

	2520 raise TracingFailure(

	2521 ( 'Parent process is _tracer_pid(%d) but root_process(%d) is '

	2522 'already set') % (self._tracer_pid, self.root_process.pid),

	2523 None, None, None)

	2524 proc = self.Process(self.blacklist, pid, None)

	2525 self.root_process = proc

	2526 ppid = None

	2527 elif self._process_lookup.get(ppid):

	2528 proc = self.Process(self.blacklist, pid, None)

	2529 self._process_lookup[ppid].children.append(proc)

	2530 else:

	2531 # Ignore

	2532 return

	2533 self._process_lookup[pid] = proc

	2534

	2535 if (not line[IMAGE_FILE_NAME].startswith('"') or

	2536 not line[IMAGE_FILE_NAME].endswith('"')):

	2537 raise TracingFailure(

	2538 'Command line is not properly quoted: %s' % line[IMAGE_FILE_NAME],

	2539 None, None, None)

	2540

	2541 # TODO(maruel): Process escapes.

	2542 if (not line[COMMAND_LINE].startswith('"') or

	2543 not line[COMMAND_LINE].endswith('"')):

	2544 raise TracingFailure(

	2545 'Command line is not properly quoted: %s' % line[COMMAND_LINE],

	2546 None, None, None)

	2547 proc.command = CommandLineToArgvW(line[COMMAND_LINE][1:-1])

	2548 proc.executable = line[IMAGE_FILE_NAME][1:-1]

	2549 # proc.command[0] may be the absolute path of 'executable' but it may be

	2550 # anything else too. If it happens that command[0] ends with executable,

	2551 # use it, otherwise defaults to the base name.

	2552 cmd0 = proc.command[0].lower()

	2553 if not cmd0.endswith('.exe'):

	2554 # TODO(maruel): That's not strictly true either.

	2555 cmd0 += '.exe'

	2556 if cmd0.endswith(proc.executable) and os.path.isfile(cmd0):

	2557 # Fix the path.

	2558 cmd0 = cmd0.replace('/', os.path.sep)

	2559 cmd0 = os.path.normpath(cmd0)

	2560 proc.executable = get_native_path_case(cmd0)

	2561 logging.info(

	2562 'New child: %s -> %d %s' % (ppid, pid, proc.executable))

	2563

	2564 def handle_Thread_End(self, line):

	2565 """Has the same parameters as Thread_Start."""

	2566 tid = int(line[self.TID], 16)

	2567 self._threads_active.pop(tid, None)

	2568

	2569 def handle_Thread_Start(self, line):

	2570 """Handles a new thread created.

	2571

	2572 Do not use self.PID here since a process' initial thread is created by

	2573 the parent process.

	2574 """

	2575 PROCESS_ID = self.USER_DATA

	2576 TTHREAD_ID = self.USER_DATA + 1

	2577 #STACK_BASE = self.USER_DATA + 2

	2578 #STACK_LIMIT = self.USER_DATA + 3

	2579 #USER_STACK_BASE = self.USER_DATA + 4

	2580 #USER_STACK_LIMIT = self.USER_DATA + 5

	2581 #AFFINITY = self.USER_DATA + 6

	2582 #WIN32_START_ADDR = self.USER_DATA + 7

	2583 #TEB_BASE = self.USER_DATA + 8

	2584 #SUB_PROCESS_TAG = self.USER_DATA + 9

	2585 #BASE_PRIORITY = self.USER_DATA + 10

	2586 #PAGE_PRIORITY = self.USER_DATA + 11

	2587 #IO_PRIORITY = self.USER_DATA + 12

	2588 #THREAD_FLAGS = self.USER_DATA + 13

	2589 # Do not use self.PID here since a process' initial thread is created by

	2590 # the parent process.

	2591 pid = int(line[PROCESS_ID], 16)

	2592 tid = int(line[TTHREAD_ID], 16)

	2593 logging.debug('New thread pid:%d, tid:%d' % (pid, tid))

	2594 self._threads_active[tid] = pid

	2595

	2596 @classmethod

	2597 def supported_events(cls):

	2598 """Returns all the procesed events."""

	2599 out = []

	2600 for member in dir(cls):

	2601 match = re.match(r'^handle_([A-Za-z]+)_([A-Za-z]+)$', member)

	2602 if match:

	2603 out.append(match.groups())

	2604 return out

	2605

	2606 class Tracer(ApiBase.Tracer):

	2607 # The basic headers.

	2608 EXPECTED_HEADER = [

	2609 u'Event Name',

	2610 u'Type',

	2611 u'Event ID',

	2612 u'Version',

	2613 u'Channel',

	2614 u'Level', # 5

	2615 u'Opcode',

	2616 u'Task',

	2617 u'Keyword',

	2618 u'PID',

	2619 u'TID', # 10

	2620 u'Processor Number',

	2621 u'Instance ID',

	2622 u'Parent Instance ID',

	2623 u'Activity ID',

	2624 u'Related Activity ID', # 15

	2625 u'Clock-Time',

	2626 u'Kernel(ms)', # Both have a resolution of ~15ms which makes them

	2627 u'User(ms)', # pretty much useless.

	2628 u'User Data', # Extra arguments that are event-specific.

	2629 ]

	2630 # Only the useful headers common to all entries are listed there. Any column

	2631 # at 19 or higher is dependent on the specific event.

	2632 EVENT_NAME = 0

	2633 TYPE = 1

	2634 PID = 9

	2635 TID = 10

	2636 PROCESSOR_ID = 11

	2637 TIMESTAMP = 16

	2638 NULL_GUID = '{00000000-0000-0000-0000-000000000000}'

	2639 USER_DATA = 19

	2640

	2641 def __init__(self, logname):

	2642 """Starts the log collection.

	2643

	2644 Requires administrative access. logman.exe is synchronous so no need for a

	2645 "warmup" call. 'Windows Kernel Trace' is localized so use its GUID

	2646 instead. The GUID constant name is SystemTraceControlGuid. Lovely.

	2647

	2648 One can get the list of potentially interesting providers with:

	2649 "logman query providers \| findstr /i file"

	2650 """

	2651 super(LogmanTrace.Tracer, self).__init__(logname)

	2652 self._script = create_thunk()

	2653 cmd_start = [

	2654 'logman.exe',

	2655 'start',

	2656 'NT Kernel Logger',

	2657 '-p', '{9e814aad-3204-11d2-9a82-006008a86939}',

	2658 # splitio,fileiocompletion,syscall,file,cswitch,img

	2659 '(process,fileio,thread)',

	2660 '-o', self._logname + '.etl',

	2661 '-ets', # Send directly to kernel

	2662 # Values extracted out of thin air.

	2663 # Event Trace Session buffer size in kb.

	2664 '-bs', '10240',

	2665 # Number of Event Trace Session buffers.

	2666 '-nb', '16', '256',

	2667 ]

	2668 logging.debug('Running: %s' % cmd_start)

	2669 try:

	2670 subprocess.check_call(

	2671 cmd_start,

	2672 stdin=subprocess.PIPE,

	2673 stdout=subprocess.PIPE,

	2674 stderr=subprocess.STDOUT)

	2675 except subprocess.CalledProcessError, e:

	2676 if e.returncode == -2147024891:

	2677 print >> sys.stderr, 'Please restart with an elevated admin prompt'

	2678 elif e.returncode == -2144337737:

	2679 print >> sys.stderr, (

	2680 'A kernel trace was already running, stop it and try again')

	2681 raise

	2682

	2683 def trace(self, cmd, cwd, tracename, output):

	2684 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))

	2685 assert os.path.isabs(cmd[0]), cmd[0]

	2686 assert os.path.isabs(cwd), cwd

	2687 assert os.path.normpath(cwd) == cwd, cwd

	2688 with self._lock:

	2689 if not self._initialized:

	2690 raise TracingFailure(

	2691 'Called Tracer.trace() on an unitialized object',

	2692 None, None, None, tracename)

	2693 assert tracename not in (i['trace'] for i in self._traces)

	2694

	2695 # Use "logman -?" for help.

	2696

	2697 stdout = stderr = None

	2698 if output:

	2699 stdout = subprocess.PIPE

	2700 stderr = subprocess.STDOUT

	2701

	2702 # Run the child process.

	2703 logging.debug('Running: %s' % cmd)

	2704 # Use the temporary script generated with create_thunk() so we have a

	2705 # clear pid owner. Since trace_inputs.py can be used as a library and

	2706 # could trace multiple processes simultaneously, it makes it more complex

	2707 # if the executable to be traced is executed directly here. It also solves

	2708 # issues related to logman.exe that needs to be executed to control the

	2709 # kernel trace.

	2710 child_cmd = [

	2711 sys.executable,

	2712 self._script,

	2713 tracename,

	2714 ]

	2715 child = subprocess.Popen(

	2716 child_cmd + fix_python_path(cmd),

	2717 cwd=cwd,

	2718 stdin=subprocess.PIPE,

	2719 stdout=stdout,

	2720 stderr=stderr)

	2721 logging.debug('Started child pid: %d' % child.pid)

	2722 out = child.communicate()[0]

	2723 # This doesn't mean all the grand-children are done. Sadly, we don't have

	2724 # a good way to determine that.

	2725

	2726 with self._lock:

	2727 assert tracename not in (i['trace'] for i in self._traces)

	2728 self._traces.append({

	2729 'command': cmd,

	2730 'cwd': cwd,

	2731 'pid': child.pid,

	2732 'trace': tracename,

	2733 'output': out,

	2734 })

	2735

	2736 return child.returncode, out

	2737

	2738 def close(self, _timeout=None):

	2739 """Stops the kernel log collection and converts the traces to text

	2740 representation.

	2741 """

	2742 with self._lock:

	2743 if not self._initialized:

	2744 raise TracingFailure(

	2745 'Called Tracer.close() on an unitialized object',

	2746 None, None, None)

	2747 os.remove(self._script)

	2748 # Save metadata, add 'format' key..

	2749 data = {

	2750 'format': 'csv',

	2751 'traces': self._traces,

	2752 }

	2753 write_json(self._logname, data, False)

	2754

	2755 cmd_stop = [

	2756 'logman.exe',

	2757 'stop',

	2758 'NT Kernel Logger',

	2759 '-ets', # Sends the command directly to the kernel.

	2760 ]

	2761 logging.debug('Running: %s' % cmd_stop)

	2762 subprocess.check_call(

	2763 cmd_stop,

	2764 stdin=subprocess.PIPE,

	2765 stdout=subprocess.PIPE,

	2766 stderr=subprocess.STDOUT)

	2767 self._initialized = False

	2768

	2769 def post_process_log(self):

	2770 """Converts the .etl file into .csv then into .json."""

	2771 super(LogmanTrace.Tracer, self).post_process_log()

	2772 logformat = 'csv'

	2773 self._convert_log(logformat)

	2774

	2775 if logformat == 'csv_utf16':

	2776 def load_file():

	2777 def utf_8_encoder(unicode_csv_data):

	2778 """Encodes the unicode object as utf-8 encoded str instance"""

	2779 for line in unicode_csv_data:

	2780 yield line.encode('utf-8')

	2781

	2782 def unicode_csv_reader(unicode_csv_data, **kwargs):

	2783 """Encodes temporarily as UTF-8 since csv module doesn't do unicode.

	2784 """

	2785 csv_reader = csv.reader(utf_8_encoder(unicode_csv_data), **kwargs)

	2786 for row in csv_reader:

	2787 # Decode str utf-8 instances back to unicode instances, cell by

	2788 # cell:

	2789 yield [cell.decode('utf-8') for cell in row]

	2790

	2791 # The CSV file is UTF-16 so use codecs.open() to load the file into

	2792 # the python internal unicode format (utf-8). Then explicitly

	2793 # re-encode as utf8 as str instances so csv can parse it fine. Then

	2794 # decode the utf-8 str back into python unicode instances. This

	2795 # sounds about right.

	2796 for line in unicode_csv_reader(

	2797 codecs.open(self._logname + '.' + logformat, 'r', 'utf-16')):

	2798 # line is a list of unicode objects

	2799 # So much white space!

	2800 yield [i.strip() for i in line]

	2801

	2802 elif logformat == 'csv':

	2803 def load_file():

	2804 def ansi_csv_reader(ansi_csv_data, **kwargs):

	2805 """Loads an 'ANSI' code page and returns unicode() objects."""

	2806 assert sys.getfilesystemencoding() == 'mbcs'

	2807 encoding = get_current_encoding()

	2808 for row in csv.reader(ansi_csv_data, **kwargs):

	2809 # Decode str 'ansi' instances to unicode instances, cell by cell:

	2810 yield [cell.decode(encoding) for cell in row]

	2811

	2812 # The fastest and smallest format but only supports 'ANSI' file paths.

	2813 # E.g. the filenames are encoding in the 'current' encoding.

	2814 for line in ansi_csv_reader(open(self._logname + '.' + logformat)):

	2815 # line is a list of unicode objects.

	2816 yield [i.strip() for i in line]

	2817

	2818 supported_events = LogmanTrace.Context.supported_events()

	2819

	2820 def trim(generator):

	2821 for index, line in enumerate(generator):

	2822 if not index:

	2823 if line != self.EXPECTED_HEADER:

	2824 raise TracingFailure(

	2825 'Found malformed header: %s' % ' '.join(line),

	2826 None, None, None)

	2827 continue

	2828 # As you can see, the CSV is full of useful non-redundant information:

	2829 if (line[2] != '0' or # Event ID

	2830 line[3] not in ('2', '3') or # Version

	2831 line[4] != '0' or # Channel

	2832 line[5] != '0' or # Level

	2833 line[7] != '0' or # Task

	2834 line[8] != '0x0000000000000000' or # Keyword

	2835 line[12] != '' or # Instance ID

	2836 line[13] != '' or # Parent Instance ID

	2837 line[14] != self.NULL_GUID or # Activity ID

	2838 line[15] != ''): # Related Activity ID

	2839 raise TracingFailure(

	2840 'Found unexpected values in line: %s' % ' '.join(line),

	2841 None, None, None)

	2842

	2843 if (line[self.EVENT_NAME], line[self.TYPE]) not in supported_events:

	2844 continue

	2845

	2846 # Convert the PID in-place from hex.

	2847 line[self.PID] = int(line[self.PID], 16)

	2848

	2849 yield [

	2850 line[self.EVENT_NAME],

	2851 line[self.TYPE],

	2852 line[self.PID],

	2853 line[self.TID],

	2854 line[self.PROCESSOR_ID],

	2855 line[self.TIMESTAMP],

	2856 ] + line[self.USER_DATA:]

	2857

	2858 write_json('%s.json' % self._logname, list(trim(load_file())), True)

	2859

	2860 def _convert_log(self, logformat):

	2861 """Converts the ETL trace to text representation.

	2862

	2863 Normally, 'csv' is sufficient. If complex scripts are used (like eastern

	2864 languages), use 'csv_utf16'. If localization gets in the way, use 'xml'.

	2865

	2866 Arguments:

	2867 - logformat: Text format to be generated, csv, csv_utf16 or xml.

	2868

	2869 Use "tracerpt -?" for help.

	2870 """

	2871 LOCALE_INVARIANT = 0x7F

	2872 windll.kernel32.SetThreadLocale(LOCALE_INVARIANT)

	2873 cmd_convert = [

	2874 'tracerpt.exe',

	2875 '-l', self._logname + '.etl',

	2876 '-o', self._logname + '.' + logformat,

	2877 '-gmt', # Use UTC

	2878 '-y', # No prompt

	2879 # Use -of XML to get the header of each items after column 19, e.g. all

	2880 # the actual headers of 'User Data'.

	2881 ]

	2882

	2883 if logformat == 'csv':

	2884 # tracerpt localizes the 'Type' column, for major brainfuck

	2885 # entertainment. I can't imagine any sane reason to do that.

	2886 cmd_convert.extend(['-of', 'CSV'])

	2887 elif logformat == 'csv_utf16':

	2888 # This causes it to use UTF-16, which doubles the log size but ensures

	2889 # the log is readable for non-ASCII characters.

	2890 cmd_convert.extend(['-of', 'CSV', '-en', 'Unicode'])

	2891 elif logformat == 'xml':

	2892 cmd_convert.extend(['-of', 'XML'])

	2893 else:

	2894 raise ValueError('Unexpected log format \'%s\'' % logformat)

	2895 logging.debug('Running: %s' % cmd_convert)

	2896 # This can takes tens of minutes for large logs.

	2897 # Redirects all output to stderr.

	2898 subprocess.check_call(

	2899 cmd_convert,

	2900 stdin=subprocess.PIPE,

	2901 stdout=sys.stderr,

	2902 stderr=sys.stderr)

	2903

	2904 @staticmethod

	2905 def clean_trace(logname):

	2906 for ext in ('', '.csv', '.etl', '.json', '.xml'):

	2907 if os.path.isfile(logname + ext):

	2908 os.remove(logname + ext)

	2909

	2910 @classmethod

	2911 def parse_log(cls, logname, blacklist):

	2912 logging.info('parse_log(%s, %s)' % (logname, blacklist))

	2913

	2914 def blacklist_more(filepath):

	2915 # All the NTFS metadata is in the form x:\$EXTEND or stuff like that.

	2916 return blacklist(filepath) or re.match(r'[A-Z]\:\\\$EXTEND', filepath)

	2917

	2918 data = read_json(logname)

	2919 lines = read_json(logname + '.json')

	2920 out = []

	2921 for item in data['traces']:

	2922 context = cls.Context(blacklist_more, item['pid'])

	2923 for line in lines:

	2924 context.on_line(line)

	2925 out.append(

	2926 {

	2927 'results': context.to_results(),

	2928 'trace': item['trace'],

	2929 'output': item['output'],

	2930 })

	2931 return out

	2932

	2933

	2934 def get_api():

	2935 """Returns the correct implementation for the current OS."""

	2936 if sys.platform == 'cygwin':

	2937 raise NotImplementedError(

	2938 'Not implemented for cygwin, start the script from Win32 python')

	2939 flavors = {

	2940 'win32': LogmanTrace,

	2941 'darwin': Dtrace,

	2942 'sunos5': Dtrace,

	2943 'freebsd7': Dtrace,

	2944 'freebsd8': Dtrace,

	2945 }

	2946 # Defaults to strace.

	2947 return flavors.get(sys.platform, Strace)()

	2948

	2949

	2950 def extract_directories(root_dir, files, blacklist):

	2951 """Detects if all the files in a directory are in \|files\| and if so, replace

	2952 the individual files by a Results.Directory instance.

	2953

	2954 Takes a list of Results.File instances and returns a shorter list of

	2955 Results.File and Results.Directory instances.

	2956

	2957 Arguments:

	2958 - root_dir: Optional base directory that shouldn't be search further.

	2959 - files: list of Results.File instances.

	2960 - blacklist: regexp of files to ignore, for example r'.+\.pyc'.

	2961 """

	2962 logging.info(

	2963 'extract_directories(%s, %d files, ...)' % (root_dir, len(files)))

	2964 assert not (root_dir or '').endswith(os.path.sep), root_dir

	2965 assert not root_dir or (get_native_path_case(root_dir) == root_dir)

	2966 assert not any(isinstance(f, Results.Directory) for f in files)

	2967 # Remove non existent files.

	2968 files = [f for f in files if f.existent]

	2969 if not files:

	2970 return files

	2971 # All files must share the same root, which can be None.

	2972 assert len(set(f.root for f in files)) == 1, set(f.root for f in files)

	2973

	2974 # Creates a {directory: {filename: File}} mapping, up to root.

	2975 buckets = {}

	2976 if root_dir:

	2977 buckets[root_dir] = {}

	2978 for fileobj in files:

	2979 path = fileobj.full_path

	2980 directory = os.path.dirname(path)

	2981 assert directory

	2982 # Do not use os.path.basename() so trailing os.path.sep is kept.

	2983 basename = path[len(directory)+1:]

	2984 files_in_directory = buckets.setdefault(directory, {})

	2985 files_in_directory[basename] = fileobj

	2986 # Add all the directories recursively up to root.

	2987 while True:

	2988 old_d = directory

	2989 directory = os.path.dirname(directory)

	2990 if directory + os.path.sep == root_dir or directory == old_d:

	2991 break

	2992 buckets.setdefault(directory, {})

	2993

	2994 root_prefix = len(root_dir) + 1 if root_dir else 0

	2995 for directory in sorted(buckets, reverse=True):

	2996 actual = set(f for f in os.listdir(directory) if not blacklist(f))

	2997 expected = set(buckets[directory])

	2998 if not (actual - expected):

	2999 parent = os.path.dirname(directory)

	3000 buckets[parent][os.path.basename(directory)] = Results.Directory(

	3001 root_dir,

	3002 directory[root_prefix:],

	3003 False,

	3004 sum(f.size for f in buckets[directory].itervalues()),

	3005 sum(f.nb_files for f in buckets[directory].itervalues()))

	3006 # Remove the whole bucket.

	3007 del buckets[directory]

	3008

	3009 # Reverse the mapping with what remains. The original instances are returned,

	3010 # so the cached meta data is kept.

	3011 files = sum((x.values() for x in buckets.itervalues()), [])

	3012 return sorted(files, key=lambda x: x.path)

	3013

	3014

	3015 def trace(logfile, cmd, cwd, api, output):

	3016 """Traces an executable. Returns (returncode, output) from api.

	3017

	3018 Arguments:

	3019 - logfile: file to write to.

	3020 - cmd: command to run.

	3021 - cwd: current directory to start the process in.

	3022 - api: a tracing api instance.

	3023 - output: if True, returns output, otherwise prints it at the console.

	3024 """

	3025 cmd = fix_python_path(cmd)

	3026 api.clean_trace(logfile)

	3027 with api.get_tracer(logfile) as tracer:

	3028 return tracer.trace(cmd, cwd, 'default', output)

	3029

	3030

	3031 def load_trace(logfile, root_dir, api, blacklist):

	3032 """Loads a trace file and returns the Results instance.

	3033

	3034 Arguments:

	3035 - logfile: File to load.

	3036 - root_dir: Root directory to use to determine if a file is relevant to the

	3037 trace or not.

	3038 - api: A tracing api instance.

	3039 - blacklist: Optional blacklist function to filter out unimportant files.

	3040 """

	3041 data = api.parse_log(logfile, (blacklist or (lambda _: False)))

	3042 assert len(data) == 1, 'More than one trace was detected!'

	3043 if 'exception' in data[0]:

	3044 # It got an exception, raise it.

	3045 raise data[0]['exception']

	3046 results = data[0]['results']

	3047 if root_dir:

	3048 results = results.strip_root(root_dir)

	3049 return results

	3050

	3051

	3052 def CMDclean(args):

	3053 """Cleans up traces."""

	3054 parser = OptionParserTraceInputs(command='clean')

	3055 options, args = parser.parse_args(args)

	3056 api = get_api()

	3057 api.clean_trace(options.log)

	3058 return 0

	3059

	3060

	3061 def CMDtrace(args):

	3062 """Traces an executable."""

	3063 parser = OptionParserTraceInputs(command='trace')

	3064 parser.allow_interspersed_args = False

	3065 parser.add_option(

	3066 '-q', '--quiet', action='store_true',

	3067 help='Redirects traced executable output to /dev/null')

	3068 options, args = parser.parse_args(args)

	3069

	3070 if not args:

	3071 parser.error('Please provide a command to run')

	3072

	3073 if not os.path.isabs(args[0]) and os.access(args[0], os.X_OK):

	3074 args[0] = os.path.abspath(args[0])

	3075

	3076 api = get_api()

	3077 return trace(options.log, args, os.getcwd(), api, options.quiet)[0]

	3078

	3079

	3080 def CMDread(args):

	3081 """Reads the logs and prints the result."""

	3082 parser = OptionParserTraceInputs(command='read')

	3083 parser.add_option(

	3084 '-V', '--variable',

	3085 nargs=2,

	3086 action='append',

	3087 dest='variables',

	3088 metavar='VAR_NAME directory',

	3089 default=[],

	3090 help=('Variables to replace relative directories against. Example: '

	3091 '"-v \'$HOME\' \'/home/%s\'" will replace all occurence of your '

	3092 'home dir with $HOME') % getpass.getuser())

	3093 parser.add_option(

	3094 '--root-dir',

	3095 help='Root directory to base everything off it. Anything outside of this '

	3096 'this directory will not be reported')

	3097 parser.add_option(

	3098 '-j', '--json', action='store_true',

	3099 help='Outputs raw result data as json')

	3100 parser.add_option(

	3101 '-b', '--blacklist', action='append', default=[],

	3102 help='List of regexp to use as blacklist filter')

	3103 options, args = parser.parse_args(args)

	3104

	3105 if options.root_dir:

	3106 options.root_dir = os.path.abspath(options.root_dir)

	3107

	3108 variables = dict(options.variables)

	3109 api = get_api()

	3110 def blacklist(f):

	3111 return any(re.match(b, f) for b in options.blacklist)

	3112 data = api.parse_log(options.log, blacklist)

	3113 # Process each trace.

	3114 output_as_json = []

	3115 for item in data:

	3116 if 'exception' in item:

	3117 print >> sys.stderr, (

	3118 'Trace %s: Got an exception: %s' % (item['trace'], item['exception']))

	3119 continue

	3120 results = item['results']

	3121 if options.root_dir:

	3122 results = results.strip_root(options.root_dir)

	3123

	3124 if options.json:

	3125 output_as_json.append(results.flatten())

	3126 else:

	3127 simplified = extract_directories(

	3128 options.root_dir, results.files, blacklist)

	3129 simplified = [f.replace_variables(variables) for f in simplified]

	3130 if len(data) > 1:

	3131 print('Trace: %s' % item['trace'])

	3132 print('Total: %d' % len(results.files))

	3133 print('Non existent: %d' % len(results.non_existent))

	3134 for f in results.non_existent:

	3135 print(' %s' % f.path)

	3136 print(

	3137 'Interesting: %d reduced to %d' % (

	3138 len(results.existent), len(simplified)))

	3139 for f in simplified:

	3140 print(' %s' % f.path)

	3141

	3142 if options.json:

	3143 write_json(sys.stdout, output_as_json, False)

	3144 return 0

	3145

	3146

	3147 class OptionParserWithLogging(optparse.OptionParser):

	3148 """Adds --verbose option."""

	3149 def __init__(self, verbose=0, **kwargs):

	3150 optparse.OptionParser.__init__(self, **kwargs)

	3151 self.add_option(

	3152 '-v', '--verbose',

	3153 action='count',

	3154 default=verbose,

	3155 help='Use multiple times to increase verbosity')

	3156

	3157 def parse_args(self, args, *kwargs):

	3158 options, args = optparse.OptionParser.parse_args(self, args, *kwargs)

	3159 levels = [logging.ERROR, logging.INFO, logging.DEBUG]

	3160 logging.basicConfig(

	3161 level=levels[min(len(levels)-1, options.verbose)],

	3162 format='%(levelname)5s %(module)15s(%(lineno)3d): %(message)s')

	3163 return options, args

	3164

	3165

	3166 class OptionParserWithNiceDescription(OptionParserWithLogging):

	3167 """Generates the description with the command's docstring."""

	3168 def __init__(self, **kwargs):

	3169 """Sets 'description' and 'usage' if not already specified."""

	3170 command = kwargs.pop('command', 'help')

	3171 kwargs.setdefault(

	3172 'description',

	3173 re.sub('[\r\n ]{2,}', ' ', get_command_handler(command).__doc__))

	3174 kwargs.setdefault('usage', '%%prog %s [options]' % command)

	3175 OptionParserWithLogging.__init__(self, **kwargs)

	3176

	3177

	3178 class OptionParserTraceInputs(OptionParserWithNiceDescription):

	3179 """Adds automatic --log handling."""

	3180 def __init__(self, **kwargs):

	3181 OptionParserWithNiceDescription.__init__(self, **kwargs)

	3182 self.add_option(

	3183 '-l', '--log', help='Log file to generate or read, required')

	3184

	3185 def parse_args(self, args, *kwargs):

	3186 """Makes sure the paths make sense.

	3187

	3188 On Windows, / and \ are often mixed together in a path.

	3189 """

	3190 options, args = OptionParserWithNiceDescription.parse_args(

	3191 self, args, *kwargs)

	3192 if not options.log:

	3193 self.error('Must supply a log file with -l')

	3194 options.log = os.path.abspath(options.log)

	3195 return options, args

	3196

	3197

	3198 def extract_documentation():

	3199 """Returns a dict {command: description} for each of documented command."""

	3200 commands = (

	3201 fn[3:]

	3202 for fn in dir(sys.modules['__main__'])

	3203 if fn.startswith('CMD') and get_command_handler(fn[3:]).__doc__)

	3204 return dict((fn, get_command_handler(fn).__doc__) for fn in commands)

	3205

	3206

	3207 def CMDhelp(args):

	3208 """Prints list of commands or help for a specific command."""

	3209 doc = extract_documentation()

	3210 # Calculates the optimal offset.

	3211 offset = max(len(cmd) for cmd in doc)

	3212 format_str = ' %-' + str(offset + 2) + 's %s'

	3213 # Generate a one-liner documentation of each commands.

	3214 commands_description = '\n'.join(

	3215 format_str % (cmd, doc[cmd].split('\n')[0]) for cmd in sorted(doc))

	3216

	3217 parser = OptionParserWithNiceDescription(

	3218 usage='%prog <command> [options]',

	3219 description='Commands are:\n%s\n' % commands_description)

	3220 parser.format_description = lambda _: parser.description

	3221

	3222 # Strip out any -h or --help argument.

	3223 _, args = parser.parse_args([i for i in args if not i in ('-h', '--help')])

	3224 if len(args) == 1:

	3225 if not get_command_handler(args[0]):

	3226 parser.error('Unknown command %s' % args[0])

	3227 # The command was "%prog help command", replaces ourself with

	3228 # "%prog command --help" so help is correctly printed out.

	3229 return main(args + ['--help'])

	3230 elif args:

	3231 parser.error('Unknown argument "%s"' % ' '.join(args))

	3232 parser.print_help()

	3233 return 0

	3234

	3235

	3236 def get_command_handler(name):

	3237 """Returns the command handler or CMDhelp if it doesn't exist."""

	3238 return getattr(sys.modules['__main__'], 'CMD%s' % name, None)

	3239

	3240

	3241 def main_impl(argv):

	3242 command = get_command_handler(argv[0] if argv else 'help')

	3243 if not command:

	3244 return CMDhelp(argv)

	3245 return command(argv[1:])

	3246

	3247 def main(argv):

	3248 try:

	3249 main_impl(argv)

	3250 except TracingFailure, e:

	3251 sys.stderr.write('\nError: ')

	3252 sys.stderr.write(str(e))

	3253 sys.stderr.write('\n')

	3254 return 1

	3255

	3256

	3257 if __name__ == '__main__':

	3258 sys.exit(main(sys.argv[1:]))

OLD	NEW

« no previous file with comments | « tests/trace_test_cases_smoke_test.py ('k') | trace_test_cases.py » ('j') | no next file with comments »