swarm_client/trace_inputs.py - Issue 69143004: Delete swarm_client.

Side by Side Diff: swarm_client/trace_inputs.py

Issue 69143004: Delete swarm_client. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/

Patch Set: Created 7 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
	(Empty)
1 #!/usr/bin/env python

2 # coding=utf-8

3 # Copyright (c) 2012 The Chromium Authors. All rights reserved.

4 # Use of this source code is governed by a BSD-style license that can be

5 # found in the LICENSE file.

6

7 """Traces an executable and its child processes and extract the files accessed

8 by them.

9

10 The implementation uses OS-specific API. The native Kernel logger and the ETL

11 interface is used on Windows. Dtrace is used on OSX. Strace is used otherwise.

12 The OS-specific implementation is hidden in an 'API' interface.

13

14 The results are embedded in a Results instance. The tracing is done in two

15 phases, the first is to do the actual trace and generate an

16 implementation-specific log file. Then the log file is parsed to extract the

17 information, including the individual child processes and the files accessed

18 from the log.

19 """

20

21 import codecs

22 import csv

23 import errno

24 import getpass

25 import glob

26 import json

27 import logging

28 import os

29 import re

30 import stat

31 import subprocess

32 import sys

33 import tempfile

34 import threading

35 import time

36 import weakref

37

38 from third_party import colorama

39 from third_party.depot_tools import fix_encoding

40 from third_party.depot_tools import subcommand

41

42 from utils import file_path

43 from utils import tools

44

45 ## OS-specific imports

46

47 if sys.platform == 'win32':

48 from ctypes.wintypes import byref, c_int, c_wchar_p

49 from ctypes.wintypes import windll # pylint: disable=E0611

50

51

52 __version__ = '0.1'

53

54

55 BASE_DIR = os.path.dirname(os.path.abspath(__file__))

56 ROOT_DIR = os.path.dirname(os.path.dirname(BASE_DIR))

57

58

59 class TracingFailure(Exception):

60 """An exception occured during tracing."""

61 def __init__(self, description, pid, line_number, line, *args):

62 super(TracingFailure, self).__init__(

63 description, pid, line_number, line, *args)

64 self.description = description

65 self.pid = pid

66 self.line_number = line_number

67 self.line = line

68 self.extra = args

69

70 def __str__(self):

71 out = self.description

72 if self.pid:

73 out += '\npid: %d' % self.pid

74 if self.line_number:

75 out += '\nline: %d' % self.line_number

76 if self.line:

77 out += '\n%s' % self.line

78 if self.extra:

79 out += '\n' + ', '.join(map(str, filter(None, self.extra)))

80 return out

81

82

83 ## OS-specific functions

84

85 if sys.platform == 'win32':

86 def get_current_encoding():

87 """Returns the 'ANSI' code page associated to the process."""

88 return 'cp%d' % int(windll.kernel32.GetACP())

89

90

91 def CommandLineToArgvW(command_line):

92 """Splits a commandline into argv using CommandLineToArgvW()."""

93 # http://msdn.microsoft.com/library/windows/desktop/bb776391.aspx

94 size = c_int()

95 assert isinstance(command_line, unicode)

96 ptr = windll.shell32.CommandLineToArgvW(command_line, byref(size))

97 try:

98 return [arg for arg in (c_wchar_p * size.value).from_address(ptr)]

99 finally:

100 windll.kernel32.LocalFree(ptr)

101

102

103

104 def gen_blacklist(regexes):

105 """Returns a lambda to be used as a blacklist."""

106 compiled = [re.compile(i) for i in regexes]

107 def match(f):

108 return any(j.match(f) for j in compiled)

109 return match

110

111

112 def create_subprocess_thunk():

113 """Creates a small temporary script to start the child process.

114

115 This thunk doesn't block, its unique name is used to identify it as the

116 parent.

117 """

118 handle, name = tempfile.mkstemp(prefix='trace_inputs_thunk', suffix='.py')

119 try:

120 os.write(

121 handle,

122 (

123 'import subprocess, sys\n'

124 'sys.exit(subprocess.call(sys.argv[2:]))\n'

125 ))

126 finally:

127 os.close(handle)

128 return name

129

130

131 def create_exec_thunk():

132 """Creates a small temporary script to start the child executable.

133

134 Reads from the file handle provided as the fisrt argument to block, then

135 execv() the command to be traced.

136 """

137 handle, name = tempfile.mkstemp(prefix='trace_inputs_thunk', suffix='.py')

138 try:

139 os.write(

140 handle,

141 (

142 'import os, sys\n'

143 'fd = int(sys.argv[1])\n'

144 # This will block until the controlling process writes a byte on the

145 # pipe. It will do so once the tracing tool, e.g. strace, is ready to

146 # trace.

147 'os.read(fd, 1)\n'

148 'os.close(fd)\n'

149 'os.execv(sys.argv[2], sys.argv[2:])\n'

150 ))

151 finally:

152 os.close(handle)

153 return name

154

155

156 def strace_process_quoted_arguments(text):

157 """Extracts quoted arguments on a string and return the arguments as a list.

158

159 Implemented as an automaton. Supports incomplete strings in the form

160 '"foo"...'.

161

162 Example:

163 With text = '"foo", "bar"', the function will return ['foo', 'bar']

164

165 TODO(maruel): Implement escaping.

166 """

167 # All the possible states of the DFA.

168 ( NEED_QUOTE, # Begining of a new arguments.

169 INSIDE_STRING, # Inside an argument.

170 ESCAPED, # Found a '\' inside a quote. Treat the next char as-is.

171 NEED_COMMA_OR_DOT, # Right after the closing quote of an argument. Could be

172 # a serie of 3 dots or a comma.

173 NEED_SPACE, # Right after a comma

174 NEED_DOT_2, # Found a dot, need a second one.

175 NEED_DOT_3, # Found second dot, need a third one.

176 NEED_COMMA, # Found third dot, need a comma.

177 ) = range(8)

178

179 state = NEED_QUOTE

180 out = []

181 for index, char in enumerate(text):

182 if char == '"':

183 if state == NEED_QUOTE:

184 state = INSIDE_STRING

185 # A new argument was found.

186 out.append('')

187 elif state == INSIDE_STRING:

188 # The argument is now closed.

189 state = NEED_COMMA_OR_DOT

190 elif state == ESCAPED:

191 out[-1] += char

192 state = INSIDE_STRING

193 else:

194 raise ValueError(

195 'Can\'t process char \'%s\' at column %d for: %r' % (

196 char, index, text),

197 index,

198 text)

199 elif char == ',':

200 if state in (NEED_COMMA_OR_DOT, NEED_COMMA):

201 state = NEED_SPACE

202 elif state == INSIDE_STRING:

203 out[-1] += char

204 elif state == ESCAPED:

205 out[-1] += char

206 state = INSIDE_STRING

207 else:

208 raise ValueError(

209 'Can\'t process char \'%s\' at column %d for: %r' % (

210 char, index, text),

211 index,

212 text)

213 elif char == ' ':

214 if state == NEED_SPACE:

215 state = NEED_QUOTE

216 elif state == INSIDE_STRING:

217 out[-1] += char

218 elif state == ESCAPED:

219 out[-1] += char

220 state = INSIDE_STRING

221 else:

222 raise ValueError(

223 'Can\'t process char \'%s\' at column %d for: %r' % (

224 char, index, text),

225 index,

226 text)

227 elif char == '.':

228 if state in (NEED_QUOTE, NEED_COMMA_OR_DOT):

229 # The string is incomplete, this mean the strace -s flag should be

230 # increased.

231 # For NEED_QUOTE, the input string would look like '"foo", ...'.

232 # For NEED_COMMA_OR_DOT, the input string would look like '"foo"...'

233 state = NEED_DOT_2

234 elif state == NEED_DOT_2:

235 state = NEED_DOT_3

236 elif state == NEED_DOT_3:

237 state = NEED_COMMA

238 elif state == INSIDE_STRING:

239 out[-1] += char

240 elif state == ESCAPED:

241 out[-1] += char

242 state = INSIDE_STRING

243 else:

244 raise ValueError(

245 'Can\'t process char \'%s\' at column %d for: %r' % (

246 char, index, text),

247 index,

248 text)

249 elif char == '\\':

250 if state == ESCAPED:

251 out[-1] += char

252 state = INSIDE_STRING

253 elif state == INSIDE_STRING:

254 state = ESCAPED

255 else:

256 raise ValueError(

257 'Can\'t process char \'%s\' at column %d for: %r' % (

258 char, index, text),

259 index,

260 text)

261 else:

262 if state == INSIDE_STRING:

263 out[-1] += char

264 else:

265 raise ValueError(

266 'Can\'t process char \'%s\' at column %d for: %r' % (

267 char, index, text),

268 index,

269 text)

270 if state not in (NEED_COMMA, NEED_COMMA_OR_DOT):

271 raise ValueError(

272 'String is incorrectly terminated: %r' % text,

273 text)

274 return out

275

276

277 def read_json(filepath):

278 with open(filepath, 'r') as f:

279 return json.load(f)

280

281

282 def write_json(filepath_or_handle, data, dense):

283 """Writes data into filepath or file handle encoded as json.

284

285 If dense is True, the json is packed. Otherwise, it is human readable.

286 """

287 if hasattr(filepath_or_handle, 'write'):

288 if dense:

289 filepath_or_handle.write(

290 json.dumps(data, sort_keys=True, separators=(',',':')))

291 else:

292 filepath_or_handle.write(json.dumps(data, sort_keys=True, indent=2))

293 else:

294 with open(filepath_or_handle, 'wb') as f:

295 if dense:

296 json.dump(data, f, sort_keys=True, separators=(',',':'))

297 else:

298 json.dump(data, f, sort_keys=True, indent=2)

299

300

301 def assert_is_renderable(pseudo_string):

302 """Asserts the input is a valid object to be processed by render()."""

303 assert (

304 pseudo_string is None or

305 isinstance(pseudo_string, unicode) or

306 hasattr(pseudo_string, 'render')), repr(pseudo_string)

307

308

309 def render(pseudo_string):

310 """Converts the pseudo-string to an unicode string."""

311 if pseudo_string is None or isinstance(pseudo_string, unicode):

312 return pseudo_string

313 return pseudo_string.render()

314

315

316 class Results(object):

317 """Results of a trace session."""

318

319 class _TouchedObject(object):

320 """Something, a file or a directory, that was accessed."""

321 def __init__(self, root, path, tainted, size, nb_files):

322 logging.debug(

323 '%s(%s, %s, %s, %s, %s)' %

324 (self.__class__.__name__, root, path, tainted, size, nb_files))

325 assert_is_renderable(root)

326 assert_is_renderable(path)

327 self.root = root

328 self.path = path

329 self.tainted = tainted

330 self.nb_files = nb_files

331 # Can be used as a cache or a default value, depending on context. In

332 # particular, once self.tainted is True, because the path was replaced

333 # with a variable, it is not possible to look up the file size.

334 self._size = size

335 # These are cache only.

336 self._real_path = None

337

338 # Check internal consistency.

339 assert path, path

340 assert tainted or bool(root) != bool(file_path.isabs(path)), (root, path)

341 assert tainted or (

342 not os.path.exists(self.full_path) or

343 (self.full_path == file_path.get_native_path_case(self.full_path))), (

344 tainted,

345 self.full_path,

346 file_path.get_native_path_case(self.full_path))

347

348 @property

349 def existent(self):

350 return self.size != -1

351

352 @property

353 def full_path(self):

354 if self.root:

355 return os.path.join(self.root, self.path)

356 return self.path

357

358 @property

359 def real_path(self):

360 """Returns the path with symlinks resolved."""

361 if not self._real_path:

362 self._real_path = os.path.realpath(self.full_path)

363 return self._real_path

364

365 @property

366 def size(self):

367 """File's size. -1 is not existent.

368

369 Once tainted, it is not possible the retrieve the file size anymore since

370 the path is composed of variables.

371 """

372 if self._size is None and not self.tainted:

373 try:

374 self._size = os.stat(self.full_path).st_size

375 except OSError:

376 self._size = -1

377 return self._size

378

379 def flatten(self):

380 """Returns a dict representing this object.

381

382 A 'size' of 0 means the file was only touched and not read.

383 """

384 return {

385 'path': self.path,

386 'size': self.size,

387 }

388

389 def replace_variables(self, variables):

390 """Replaces the root of this File with one of the variables if it matches.

391

392 If a variable replacement occurs, the cloned object becomes tainted.

393 """

394 for variable, root_path in variables.iteritems():

395 if self.path.startswith(root_path):

396 return self._clone(

397 self.root, variable + self.path[len(root_path):], True)

398 # No need to clone, returns ourself.

399 return self

400

401 def strip_root(self, root):

402 """Returns a clone of itself with 'root' stripped off.

403

404 Note that the file is kept if it is either accessible from a symlinked

405 path that was used to access the file or through the real path.

406 """

407 # Check internal consistency.

408 assert (

409 self.tainted or

410 (file_path.isabs(root) and root.endswith(os.path.sep))), root

411 if not self.full_path.startswith(root):

412 # Now try to resolve the symlinks to see if it can be reached this way.

413 # Only try after trying without resolving symlink.

414 if not self.real_path.startswith(root):

415 return None

416 path = self.real_path

417 else:

418 path = self.full_path

419 return self._clone(root, path[len(root):], self.tainted)

420

421 def _clone(self, new_root, new_path, tainted):

422 raise NotImplementedError(self.__class__.__name__)

423

424 class File(_TouchedObject):

425 """A file that was accessed. May not be present anymore.

426

427 If tainted is true, it means it is not a real path anymore as a variable

428 replacement occured.

429

430 \|mode\| can be one of None, TOUCHED, READ or WRITE.

431 """

432 # Was probed for existence, and it is existent, but was never _opened_.

433 TOUCHED = 't'

434 # Opened for read only and guaranteed to not have been written to.

435 READ = 'r'

436 # Opened for write.

437 WRITE = 'w'

438

439 # They are listed in order of priority. E.g. if a file is traced as TOUCHED

440 # then as WRITE, only keep WRITE. None means no idea, which is a problem on

441 # Windows.

442 ACCEPTABLE_MODES = (None, TOUCHED, READ, WRITE)

443

444 def __init__(self, root, path, tainted, size, mode):

445 assert mode in self.ACCEPTABLE_MODES

446 super(Results.File, self).__init__(root, path, tainted, size, 1)

447 self.mode = mode

448

449 def _clone(self, new_root, new_path, tainted):

450 """Clones itself keeping meta-data."""

451 # Keep the self.size and self._real_path caches for performance reason. It

452 # is also important when the file becomes tainted (with a variable instead

453 # of the real path) since self.path is not an on-disk path anymore so

454 # out._size cannot be updated.

455 out = self.__class__(new_root, new_path, tainted, self.size, self.mode)

456 out._real_path = self._real_path

457 return out

458

459 def flatten(self):

460 out = super(Results.File, self).flatten()

461 out['mode'] = self.mode

462 return out

463

464 class Directory(_TouchedObject):

465 """A directory of files. Must exist.

466

467 For a Directory instance, self.size is not a cache, it's an actual value

468 that is never modified and represents the total size of the files contained

469 in this directory. It is possible that the directory is empty so that

470 size==0; this happens if there's only an invalid symlink in it.

471 """

472 def __init__(self, root, path, tainted, size, nb_files):

473 """path='.' is a valid value and must be handled appropriately."""

474 assert not path.endswith(os.path.sep), path

475 super(Results.Directory, self).__init__(

476 root, path + os.path.sep, tainted, size, nb_files)

477

478 def flatten(self):

479 out = super(Results.Directory, self).flatten()

480 out['nb_files'] = self.nb_files

481 return out

482

483 def _clone(self, new_root, new_path, tainted):

484 """Clones itself keeping meta-data."""

485 out = self.__class__(

486 new_root,

487 new_path.rstrip(os.path.sep),

488 tainted,

489 self.size,

490 self.nb_files)

491 out._real_path = self._real_path

492 return out

493

494 class Process(object):

495 """A process that was traced.

496

497 Contains references to the files accessed by this process and its children.

498 """

499 def __init__(self, pid, files, executable, command, initial_cwd, children):

500 logging.debug('Process(%s, %d, ...)' % (pid, len(files)))

501 self.pid = pid

502 self.files = sorted(files, key=lambda x: x.path)

503 self.children = children

504 self.executable = executable

505 self.command = command

506 self.initial_cwd = initial_cwd

507

508 # Check internal consistency.

509 assert len(set(f.path for f in self.files)) == len(self.files), sorted(

510 f.path for f in self.files)

511 assert isinstance(self.children, list)

512 assert isinstance(self.files, list)

513

514 @property

515 def all(self):

516 for child in self.children:

517 for i in child.all:

518 yield i

519 yield self

520

521 def flatten(self):

522 return {

523 'children': [c.flatten() for c in self.children],

524 'command': self.command,

525 'executable': self.executable,

526 'files': [f.flatten() for f in self.files],

527 'initial_cwd': self.initial_cwd,

528 'pid': self.pid,

529 }

530

531 def strip_root(self, root):

532 assert file_path.isabs(root) and root.endswith(os.path.sep), root

533 # Loads the files after since they are constructed as objects.

534 out = self.__class__(

535 self.pid,

536 filter(None, (f.strip_root(root) for f in self.files)),

537 self.executable,

538 self.command,

539 self.initial_cwd,

540 [c.strip_root(root) for c in self.children])

541 logging.debug(

542 'strip_root(%s) %d -> %d' % (root, len(self.files), len(out.files)))

543 return out

544

545 def __init__(self, process):

546 self.process = process

547 # Cache.

548 self._files = None

549

550 def flatten(self):

551 return {

552 'root': self.process.flatten(),

553 }

554

555 @property

556 def files(self):

557 if self._files is None:

558 self._files = sorted(

559 sum((p.files for p in self.process.all), []),

560 key=lambda x: x.path)

561 return self._files

562

563 @property

564 def existent(self):

565 return [f for f in self.files if f.existent]

566

567 @property

568 def non_existent(self):

569 return [f for f in self.files if not f.existent]

570

571 def strip_root(self, root):

572 """Returns a clone with all the files outside the directory \|root\| removed

573 and converts all the path to be relative paths.

574

575 It keeps files accessible through the \|root\| directory or that have been

576 accessed through any symlink which points to the same directory.

577 """

578 # Resolve any symlink

579 root = os.path.realpath(root)

580 root = (

581 file_path.get_native_path_case(root).rstrip(os.path.sep) + os.path.sep)

582 logging.debug('strip_root(%s)' % root)

583 return Results(self.process.strip_root(root))

584

585

586 class ApiBase(object):

587 """OS-agnostic API to trace a process and its children."""

588 class Context(object):

589 """Processes one log line at a time and keeps the list of traced processes.

590

591 The parsing is complicated by the fact that logs are traced out of order for

592 strace but in-order for dtrace and logman. In addition, on Windows it is

593 very frequent that processids are reused so a flat list cannot be used. But

594 at the same time, it is impossible to faithfully construct a graph when the

595 logs are processed out of order. So both a tree and a flat mapping are used,

596 the tree is the real process tree, while the flat mapping stores the last

597 valid process for the corresponding processid. For the strace case, the

598 tree's head is guessed at the last moment.

599 """

600 class Process(object):

601 """Keeps context for one traced child process.

602

603 Logs all the files this process touched. Ignores directories.

604 """

605 def __init__(self, blacklist, pid, initial_cwd):

606 # Check internal consistency.

607 assert isinstance(pid, int), repr(pid)

608 assert_is_renderable(initial_cwd)

609 self.pid = pid

610 # children are Process instances.

611 self.children = []

612 self.initial_cwd = initial_cwd

613 self.cwd = None

614 self.files = {}

615 self.executable = None

616 self.command = None

617 self._blacklist = blacklist

618

619 def to_results_process(self):

620 """Resolves file case sensitivity and or late-bound strings."""

621 # When resolving files, it's normal to get dupe because a file could be

622 # opened multiple times with different case. Resolve the deduplication

623 # here.

624 def fix_path(x):

625 """Returns the native file path case.

626

627 Converts late-bound strings.

628 """

629 if not x:

630 # Do not convert None instance to 'None'.

631 return x

632 x = render(x)

633 if os.path.isabs(x):

634 # If the path is not absolute, which tends to happen occasionally on

635 # Windows, it is not possible to get the native path case so ignore

636 # that trace. It mostly happens for 'executable' value.

637 x = file_path.get_native_path_case(x)

638 return x

639

640 def fix_and_blacklist_path(x, m):

641 """Receives a tuple (filepath, mode) and processes filepath."""

642 x = fix_path(x)

643 if not x:

644 return

645 # The blacklist needs to be reapplied, since path casing could

646 # influence blacklisting.

647 if self._blacklist(x):

648 return

649 # Filters out directories. Some may have passed through.

650 if os.path.isdir(x):

651 return

652 return x, m

653

654 # Renders all the files as strings, as some could be RelativePath

655 # instances. It is important to do it first since there could still be

656 # multiple entries with the same path but different modes.

657 rendered = (

658 fix_and_blacklist_path(f, m) for f, m in self.files.iteritems())

659 files = sorted(

660 (f for f in rendered if f),

661 key=lambda x: (x[0], Results.File.ACCEPTABLE_MODES.index(x[1])))

662 # Then converting into a dict will automatically clean up lesser

663 # important values.

664 files = [

665 Results.File(None, f, False, None, m)

666 for f, m in dict(files).iteritems()

667 ]

668 return Results.Process(

669 self.pid,

670 files,

671 fix_path(self.executable),

672 self.command,

673 fix_path(self.initial_cwd),

674 [c.to_results_process() for c in self.children])

675

676 def add_file(self, filepath, mode):

677 """Adds a file if it passes the blacklist."""

678 if self._blacklist(render(filepath)):

679 return

680 logging.debug('add_file(%d, %s, %s)', self.pid, filepath, mode)

681 # Note that filepath and not render(filepath) is added. It is because

682 # filepath could be something else than a string, like a RelativePath

683 # instance for dtrace logs.

684 modes = Results.File.ACCEPTABLE_MODES

685 old_mode = self.files.setdefault(filepath, mode)

686 if old_mode != mode and modes.index(old_mode) < modes.index(mode):

687 # Take the highest value.

688 self.files[filepath] = mode

689

690 def __init__(self, blacklist):

691 self.blacklist = blacklist

692 # Initial process.

693 self.root_process = None

694 # dict to accelerate process lookup, to not have to lookup the whole graph

695 # each time.

696 self._process_lookup = {}

697

698 class Tracer(object):

699 """During it's lifetime, the tracing subsystem is enabled."""

700 def __init__(self, logname):

701 self._logname = logname

702 self._lock = threading.RLock()

703 self._traces = []

704 self._initialized = True

705 self._scripts_to_cleanup = []

706

707 def trace(self, cmd, cwd, tracename, output):

708 """Runs the OS-specific trace program on an executable.

709

710 Arguments:

711 - cmd: The command (a list) to run.

712 - cwd: Current directory to start the child process in.

713 - tracename: Name of the trace in the logname file.

714 - output: If False, redirects output to PIPEs.

715

716 Returns a tuple (resultcode, output) and updates the internal trace

717 entries.

718 """

719 # The implementation adds an item to self._traces.

720 raise NotImplementedError(self.__class__.__name__)

721

722 def close(self, _timeout=None):

723 """Saves the meta-data in the logname file.

724

725 For kernel-based tracing, stops the tracing subsystem.

726

727 Must not be used manually when using 'with' construct.

728 """

729 with self._lock:

730 if not self._initialized:

731 raise TracingFailure(

732 'Called %s.close() on an unitialized object' %

733 self.__class__.__name__,

734 None, None, None)

735 try:

736 while self._scripts_to_cleanup:

737 try:

738 os.remove(self._scripts_to_cleanup.pop())

739 except OSError as e:

740 logging.error('Failed to delete a temporary script: %s', e)

741 write_json(self._logname, self._gen_logdata(), False)

742 finally:

743 self._initialized = False

744

745 def post_process_log(self):

746 """Post-processes the log so it becomes faster to load afterward.

747

748 Must not be used manually when using 'with' construct.

749 """

750 assert not self._initialized, 'Must stop tracing first.'

751

752 def _gen_logdata(self):

753 """Returns the data to be saved in the trace file."""

754 return {

755 'traces': self._traces,

756 }

757

758 def __enter__(self):

759 """Enables 'with' statement."""

760 return self

761

762 def __exit__(self, exc_type, exc_value, traceback):

763 """Enables 'with' statement."""

764 self.close()

765 # If an exception was thrown, do not process logs.

766 if not exc_type:

767 self.post_process_log()

768

769 def get_tracer(self, logname):

770 """Returns an ApiBase.Tracer instance.

771

772 Initializes the tracing subsystem, which is a requirement for kernel-based

773 tracers. Only one tracer instance should be live at a time!

774

775 logname is the filepath to the json file that will contain the meta-data

776 about the logs.

777 """

778 return self.Tracer(logname)

779

780 @staticmethod

781 def clean_trace(logname):

782 """Deletes an old log."""

783 raise NotImplementedError()

784

785 @classmethod

786 def parse_log(cls, logname, blacklist, trace_name):

787 """Processes trace logs and returns the files opened and the files that do

788 not exist.

789

790 It does not track directories.

791

792 Arguments:

793 - logname: must be an absolute path.

794 - blacklist: must be a lambda.

795 - trace_name: optional trace to read, defaults to reading all traces.

796

797 Most of the time, files that do not exist are temporary test files that

798 should be put in /tmp instead. See http://crbug.com/116251.

799

800 Returns a list of dict with keys:

801 - results: A Results instance.

802 - trace: The corresponding tracename parameter provided to

803 get_tracer().trace().

804 - output: Output gathered during execution, if get_tracer().trace(...,

805 output=False) was used.

806 """

807 raise NotImplementedError(cls.__class__.__name__)

808

809

810 class Strace(ApiBase):

811 """strace implies linux."""

812 @staticmethod

813 def load_filename(filename):

814 """Parses a filename in a log."""

815 # TODO(maruel): Be compatible with strace -x.

816 assert isinstance(filename, str)

817 out = ''

818 i = 0

819 while i < len(filename):

820 c = filename[i]

821 if c == '\\':

822 out += chr(int(filename[i+1:i+4], 8))

823 i += 4

824 else:

825 out += c

826 i += 1

827 # TODO(maruel): That's not necessarily true that the current code page is

828 # utf-8.

829 return out.decode('utf-8')

830

831 class Context(ApiBase.Context):

832 """Processes a strace log line and keeps the list of existent and non

833 existent files accessed.

834

835 Ignores directories.

836

837 Uses late-binding to processes the cwd of each process. The problem is that

838 strace generates one log file per process it traced but doesn't give any

839 information about which process was started when and by who. So process the

840 logs out of order and use late binding with RelativePath to be able to

841 deduce the initial directory of each process once all the logs are parsed.

842

843 TODO(maruel): Use the script even in the non-sudo case, so log parsing can

844 be done in two phase: first find the root process, then process the child

845 processes in order. With that, it should be possible to not use RelativePath

846 anymore. This would significantly simplify the code!

847 """

848 class Process(ApiBase.Context.Process):

849 """Represents the state of a process.

850

851 Contains all the information retrieved from the pid-specific log.

852 """

853 # Function names are using ([a-z_0-9]+)

854 # This is the most common format. function(args) = result

855 RE_HEADER = re.compile(r'^([a-z_0-9]+)$(.*?)$\s+= (.+)$')

856 # An interrupted function call, only grab the minimal header.

857 RE_UNFINISHED = re.compile(r'^([^\(]+)(.*) \<unfinished \.\.\.\>$')

858 # A resumed function call.

859 RE_RESUMED = re.compile(r'^<\.\.\. ([^ ]+) resumed> (.+)$')

860 # A process received a signal.

861 RE_SIGNAL = re.compile(r'^--- SIG[A-Z]+ .+ ---')

862 # A process didn't handle a signal. Ignore any junk appearing before,

863 # because the process was forcibly killed so it won't open any new file.

864 RE_KILLED = re.compile(

865 r'^.*\+\+\+ killed by ([A-Z]+)( $core dumped$)? \+\+\+$')

866 # The process has exited.

867 RE_PROCESS_EXITED = re.compile(r'^\+\+\+ exited with (\d+) \+\+\+')

868 # A call was canceled. Ignore any prefix.

869 RE_UNAVAILABLE = re.compile(r'^.\)\s= \? <unavailable>$')

870 # Happens when strace fails to even get the function name.

871 UNNAMED_FUNCTION = '????'

872

873 # Corner-case in python, a class member function decorator must not be

874 # @staticmethod.

875 def parse_args(regexp, expect_zero): # pylint: disable=E0213

876 """Automatically convert the str 'args' into a list of processed

877 arguments.

878

879 Arguments:

880 - regexp is used to parse args.

881 - expect_zero: one of True, False or None.

882 - True: will check for result.startswith('0') first and will ignore

883 the trace line completely otherwise. This is important because for

884 many functions, the regexp will not process if the call failed.

885 - False: will check for not result.startswith(('?', '-1')) for the

886 same reason than with True.

887 - None: ignore result.

888 """

889 def meta_hook(function):

890 assert function.__name__.startswith('handle_')

891 def hook(self, args, result):

892 if expect_zero is True and not result.startswith('0'):

893 return

894 if expect_zero is False and result.startswith(('?', '-1')):

895 return

896 match = re.match(regexp, args)

897 if not match:

898 raise TracingFailure(

899 'Failed to parse %s(%s) = %s' %

900 (function.__name__[len('handle_'):], args, result),

901 None, None, None)

902 return function(self, match.groups(), result)

903 return hook

904 return meta_hook

905

906 class RelativePath(object):

907 """A late-bound relative path."""

908 def __init__(self, parent, value):

909 assert_is_renderable(parent)

910 self.parent = parent

911 assert (

912 value is None or

913 (isinstance(value, unicode) and not os.path.isabs(value)))

914 self.value = value

915 if self.value:

916 # TODO(maruel): On POSIX, '\\' is a valid character so remove this

917 # assert.

918 assert '\\' not in self.value, value

919 assert '\\' not in self.value, (repr(value), repr(self.value))

920

921 def render(self):

922 """Returns the current directory this instance is representing.

923

924 This function is used to return the late-bound value.

925 """

926 assert self.parent is not None

927 parent = render(self.parent)

928 if self.value:

929 return os.path.normpath(os.path.join(parent, self.value))

930 return parent

931

932 def __init__(self, root, pid):

933 """Keeps enough information to be able to guess the original process

934 root.

935

936 strace doesn't store which process was the initial process. So more

937 information needs to be kept so the graph can be reconstructed from the

938 flat map.

939 """

940 logging.info('%s(%d)' % (self.__class__.__name__, pid))

941 super(Strace.Context.Process, self).__init__(root.blacklist, pid, None)

942 assert isinstance(root, ApiBase.Context)

943 self._root = weakref.ref(root)

944 # The dict key is the function name of the pending call, like 'open'

945 # or 'execve'.

946 self._pending_calls = {}

947 self._line_number = 0

948 # Current directory when the process started.

949 if isinstance(self._root(), unicode):

950 self.initial_cwd = self._root()

951 else:

952 self.initial_cwd = self.RelativePath(self._root(), None)

953 self.parentid = None

954 self._done = False

955

956 def get_cwd(self):

957 """Returns the best known value of cwd."""

958 return self.cwd or self.initial_cwd

959

960 def render(self):

961 """Returns the string value of the RelativePath() object.

962

963 Used by RelativePath. Returns the initial directory and not the

964 current one since the current directory 'cwd' validity is time-limited.

965

966 The validity is only guaranteed once all the logs are processed.

967 """

968 return self.initial_cwd.render()

969

970 def on_line(self, line):

971 assert isinstance(line, str)

972 self._line_number += 1

973 try:

974 if self._done:

975 raise TracingFailure(

976 'Found a trace for a terminated process or corrupted log',

977 None, None, None)

978

979 if self.RE_SIGNAL.match(line):

980 # Ignore signals.

981 return

982

983 match = self.RE_KILLED.match(line)

984 if match:

985 # Converts a '+++ killed by Foo +++' trace into an exit_group().

986 self.handle_exit_group(match.group(1), None)

987 return

988

989 match = self.RE_PROCESS_EXITED.match(line)

990 if match:

991 # Converts a '+++ exited with 1 +++' trace into an exit_group()

992 self.handle_exit_group(match.group(1), None)

993 return

994

995 match = self.RE_UNFINISHED.match(line)

996 if match:

997 if match.group(1) in self._pending_calls:

998 raise TracingFailure(

999 'Found two unfinished calls for the same function',

1000 None, None, None,

1001 self._pending_calls)

1002 self._pending_calls[match.group(1)] = (

1003 match.group(1) + match.group(2))

1004 return

1005

1006 match = self.RE_UNAVAILABLE.match(line)

1007 if match:

1008 # This usually means a process was killed and a pending call was

1009 # canceled.

1010 # TODO(maruel): Look up the last exit_group() trace just above and

1011 # make sure any self._pending_calls[anything] is properly flushed.

1012 return

1013

1014 match = self.RE_RESUMED.match(line)

1015 if match:

1016 if match.group(1) not in self._pending_calls:

1017 raise TracingFailure(

1018 'Found a resumed call that was not logged as unfinished',

1019 None, None, None,

1020 self._pending_calls)

1021 pending = self._pending_calls.pop(match.group(1))

1022 # Reconstruct the line.

1023 line = pending + match.group(2)

1024

1025 match = self.RE_HEADER.match(line)

1026 if not match:

1027 # The line is corrupted. It happens occasionally when a process is

1028 # killed forcibly with activity going on. Assume the process died.

1029 # No other line can be processed afterward.

1030 logging.debug('%d is done: %s', self.pid, line)

1031 self._done = True

1032 return

1033

1034 if match.group(1) == self.UNNAMED_FUNCTION:

1035 return

1036

1037 # It's a valid line, handle it.

1038 handler = getattr(self, 'handle_%s' % match.group(1), None)

1039 if not handler:

1040 self._handle_unknown(match.group(1), match.group(2), match.group(3))

1041 return handler(match.group(2), match.group(3))

1042 except TracingFailure, e:

1043 # Hack in the values since the handler could be a static function.

1044 e.pid = self.pid

1045 e.line = line

1046 e.line_number = self._line_number

1047 # Re-raise the modified exception.

1048 raise

1049 except (KeyError, NotImplementedError, ValueError), e:

1050 raise TracingFailure(

1051 'Trace generated a %s exception: %s' % (

1052 e.__class__.__name__, str(e)),

1053 self.pid,

1054 self._line_number,

1055 line,

1056 e)

1057

1058 @parse_args(r'^\"(.+?)\", [FKORWX_\|]+$', True)

1059 def handle_access(self, args, _result):

1060 self._handle_file(args[0], Results.File.TOUCHED)

1061

1062 @parse_args(r'^\"(.+?)\"$', True)

1063 def handle_chdir(self, args, _result):

1064 """Updates cwd."""

1065 self.cwd = self._mangle(args[0])

1066 logging.debug('handle_chdir(%d, %s)' % (self.pid, self.cwd))

1067

1068 @parse_args(r'^\"(.+?)\", (\d+), (\d+)$', False)

1069 def handle_chown(self, args, _result):

1070 # TODO(maruel): Look at result?

1071 self._handle_file(args[0], Results.File.WRITE)

1072

1073 def handle_clone(self, _args, result):

1074 self._handling_forking('clone', result)

1075

1076 def handle_close(self, _args, _result):

1077 pass

1078

1079 @parse_args(r'^\"(.+?)\", (\d+)$', False)

1080 def handle_chmod(self, args, _result):

1081 self._handle_file(args[0], Results.File.WRITE)

1082

1083 @parse_args(r'^\"(.+?)\", (\d+)$', False)

1084 def handle_creat(self, args, _result):

1085 self._handle_file(args[0], Results.File.WRITE)

1086

1087 @parse_args(r'^\"(.+?)\", \[(.+)\], \[\/\* \d+ vars? \*\/\]$', True)

1088 def handle_execve(self, args, _result):

1089 # Even if in practice execve() doesn't returns when it succeeds, strace

1090 # still prints '0' as the result.

1091 filepath = args[0]

1092 self._handle_file(filepath, Results.File.READ)

1093 self.executable = self._mangle(filepath)

1094 try:

1095 self.command = strace_process_quoted_arguments(args[1])

1096 except ValueError as e:

1097 raise TracingFailure(

1098 'Failed to process command line argument:\n%s' % e.args[0],

1099 None, None, None)

1100

1101 def handle_exit_group(self, _args, _result):

1102 """Removes cwd."""

1103 self.cwd = None

1104

1105 @parse_args(r'^(\d+\|AT_FDCWD), \"(.*?)\", ([A-Z\_\\|]+)(\|, \d+)$', True)

1106 def handle_faccessat(self, args, _results):

1107 if args[0] == 'AT_FDCWD':

1108 self._handle_file(args[1], Results.File.TOUCHED)

1109 else:

1110 raise Exception('Relative faccess not implemented.')

1111

1112 def handle_fallocate(self, _args, result):

1113 pass

1114

1115 def handle_fork(self, args, result):

1116 self._handle_unknown('fork', args, result)

1117

1118 def handle_futex(self, _args, _result):

1119 pass

1120

1121 @parse_args(r'^\"(.+?)\", (\d+)$', False)

1122 def handle_getcwd(self, args, _result):

1123 if os.path.isabs(args[0]):

1124 logging.debug('handle_chdir(%d, %s)' % (self.pid, self.cwd))

1125 if not isinstance(self.cwd, unicode):

1126 # Take the occasion to reset the path.

1127 self.cwd = self._mangle(args[0])

1128 else:

1129 # It should always match.

1130 assert self.cwd == Strace.load_filename(args[0]), (

1131 self.cwd, args[0])

1132

1133 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)

1134 def handle_link(self, args, _result):

1135 self._handle_file(args[0], Results.File.READ)

1136 self._handle_file(args[1], Results.File.WRITE)

1137

1138 @parse_args(r'\"(.+?)\", \{.+?, \.\.\.\}', True)

1139 def handle_lstat(self, args, _result):

1140 self._handle_file(args[0], Results.File.TOUCHED)

1141

1142 def handle_mkdir(self, _args, _result):

1143 # We track content, not directories.

1144 pass

1145

1146 @parse_args(r'^\"(.*?)\", ([A-Z\_\\|]+)(\|, \d+)$', False)

1147 def handle_open(self, args, _result):

1148 if 'O_DIRECTORY' in args[1]:

1149 return

1150 self._handle_file(

1151 args[0],

1152 Results.File.READ if 'O_RDONLY' in args[1] else Results.File.WRITE)

1153

1154 @parse_args(r'^(\d+\|AT_FDCWD), \"(.*?)\", ([A-Z\_\\|]+)(\|, \d+)$', False)

1155 def handle_openat(self, args, _result):

1156 if 'O_DIRECTORY' in args[2]:

1157 return

1158 if args[0] == 'AT_FDCWD':

1159 self._handle_file(

1160 args[1],

1161 Results.File.READ if 'O_RDONLY' in args[2]

1162 else Results.File.WRITE)

1163 else:

1164 # TODO(maruel): Implement relative open if necessary instead of the

1165 # AT_FDCWD flag, let's hope not since this means tracking all active

1166 # directory handles.

1167 raise NotImplementedError('Relative open via openat not implemented.')

1168

1169 @parse_args(r'^\"(.+?)\", \".+?\"(\.\.\.)?, \d+$', False)

1170 def handle_readlink(self, args, _result):

1171 self._handle_file(args[0], Results.File.READ)

1172

1173 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)

1174 def handle_rename(self, args, _result):

1175 self._handle_file(args[0], Results.File.READ)

1176 self._handle_file(args[1], Results.File.WRITE)

1177

1178 def handle_rmdir(self, _args, _result):

1179 # We track content, not directories.

1180 pass

1181

1182 def handle_setxattr(self, _args, _result):

1183 # TODO(maruel): self._handle_file(args[0], Results.File.WRITE)

1184 pass

1185

1186 @parse_args(r'\"(.+?)\", \{.+?, \.\.\.\}', True)

1187 def handle_stat(self, args, _result):

1188 self._handle_file(args[0], Results.File.TOUCHED)

1189

1190 @parse_args(r'^\"(.+?)\", \"(.+?)\"$', True)

1191 def handle_symlink(self, args, _result):

1192 self._handle_file(args[0], Results.File.TOUCHED)

1193 self._handle_file(args[1], Results.File.WRITE)

1194

1195 @parse_args(r'^\"(.+?)\", \d+', True)

1196 def handle_truncate(self, args, _result):

1197 self._handle_file(args[0], Results.File.WRITE)

1198

1199 def handle_unlink(self, _args, _result):

1200 # In theory, the file had to be created anyway.

1201 pass

1202

1203 def handle_unlinkat(self, _args, _result):

1204 # In theory, the file had to be created anyway.

1205 pass

1206

1207 def handle_statfs(self, _args, _result):

1208 pass

1209

1210 def handle_utimensat(self, _args, _result):

1211 pass

1212

1213 def handle_vfork(self, _args, result):

1214 self._handling_forking('vfork', result)

1215

1216 @staticmethod

1217 def _handle_unknown(function, args, result):

1218 raise TracingFailure(

1219 'Unexpected/unimplemented trace %s(%s)= %s' %

1220 (function, args, result),

1221 None, None, None)

1222

1223 def _handling_forking(self, name, result):

1224 """Transfers cwd."""

1225 if result.startswith(('?', '-1')):

1226 # The call failed.

1227 return

1228 # Update the other process right away.

1229 childpid = int(result)

1230 child = self._root().get_or_set_proc(childpid)

1231 if child.parentid is not None or childpid in self.children:

1232 raise TracingFailure(

1233 'Found internal inconsitency in process lifetime detection '

1234 'during a %s() call' % name,

1235 None, None, None)

1236

1237 # Copy the cwd object.

1238 child.initial_cwd = self.get_cwd()

1239 child.parentid = self.pid

1240 # It is necessary because the logs are processed out of order.

1241 self.children.append(child)

1242

1243 def _handle_file(self, filepath, mode):

1244 filepath = self._mangle(filepath)

1245 self.add_file(filepath, mode)

1246

1247 def _mangle(self, filepath):

1248 """Decodes a filepath found in the log and convert it to a late-bound

1249 path if necessary.

1250

1251 \|filepath\| is an strace 'encoded' string and the returned value is

1252 either an unicode string if the path was absolute or a late bound path

1253 otherwise.

1254 """

1255 filepath = Strace.load_filename(filepath)

1256 if os.path.isabs(filepath):

1257 return filepath

1258 else:

1259 if isinstance(self.get_cwd(), unicode):

1260 return os.path.normpath(os.path.join(self.get_cwd(), filepath))

1261 return self.RelativePath(self.get_cwd(), filepath)

1262

1263 def __init__(self, blacklist, root_pid, initial_cwd):

1264 """\|root_pid\| may be None when the root process is not known.

1265

1266 In that case, a search is done after reading all the logs to figure out

1267 the root process.

1268 """

1269 super(Strace.Context, self).__init__(blacklist)

1270 assert_is_renderable(initial_cwd)

1271 self.root_pid = root_pid

1272 self.initial_cwd = initial_cwd

1273

1274 def render(self):

1275 """Returns the string value of the initial cwd of the root process.

1276

1277 Used by RelativePath.

1278 """

1279 return self.initial_cwd

1280

1281 def on_line(self, pid, line):

1282 """Transfers control into the Process.on_line() function."""

1283 self.get_or_set_proc(pid).on_line(line.strip())

1284

1285 def to_results(self):

1286 """If necessary, finds back the root process and verify consistency."""

1287 if not self.root_pid:

1288 # The non-sudo case. The traced process was started by strace itself,

1289 # so the pid of the traced process is not known.

1290 root = [p for p in self._process_lookup.itervalues() if not p.parentid]

1291 if len(root) == 1:

1292 self.root_process = root[0]

1293 # Save it for later.

1294 self.root_pid = self.root_process.pid

1295 else:

1296 # The sudo case. The traced process was started manually so its pid is

1297 # known.

1298 self.root_process = self._process_lookup.get(self.root_pid)

1299 if not self.root_process:

1300 raise TracingFailure(

1301 'Found internal inconsitency in process lifetime detection '

1302 'while finding the root process',

1303 None,

1304 None,

1305 None,

1306 self.root_pid,

1307 sorted(self._process_lookup))

1308 process = self.root_process.to_results_process()

1309 if sorted(self._process_lookup) != sorted(p.pid for p in process.all):

1310 raise TracingFailure(

1311 'Found internal inconsitency in process lifetime detection '

1312 'while looking for len(tree) == len(list)',

1313 None,

1314 None,

1315 None,

1316 sorted(self._process_lookup),

1317 sorted(p.pid for p in process.all))

1318 return Results(process)

1319

1320 def get_or_set_proc(self, pid):

1321 """Returns the Context.Process instance for this pid or creates a new one.

1322 """

1323 if not pid or not isinstance(pid, int):

1324 raise TracingFailure(

1325 'Unpexpected value for pid: %r' % pid,

1326 pid,

1327 None,

1328 None,

1329 pid)

1330 if pid not in self._process_lookup:

1331 self._process_lookup[pid] = self.Process(self, pid)

1332 return self._process_lookup[pid]

1333

1334 @classmethod

1335 def traces(cls):

1336 """Returns the list of all handled traces to pass this as an argument to

1337 strace.

1338 """

1339 prefix = 'handle_'

1340 return [i[len(prefix):] for i in dir(cls.Process) if i.startswith(prefix)]

1341

1342 class Tracer(ApiBase.Tracer):

1343 MAX_LEN = 256

1344

1345 def __init__(self, logname, use_sudo):

1346 super(Strace.Tracer, self).__init__(logname)

1347 self.use_sudo = use_sudo

1348 if use_sudo:

1349 # TODO(maruel): Use the jump script systematically to make it easy to

1350 # figure out the root process, so RelativePath is not necessary anymore.

1351 self._child_script = create_exec_thunk()

1352 self._scripts_to_cleanup.append(self._child_script)

1353

1354 def trace(self, cmd, cwd, tracename, output):

1355 """Runs strace on an executable.

1356

1357 When use_sudo=True, it is a 3-phases process: start the thunk, start

1358 sudo strace with the pid of the thunk and then have the thunk os.execve()

1359 the process to trace.

1360 """

1361 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))

1362 assert os.path.isabs(cmd[0]), cmd[0]

1363 assert os.path.isabs(cwd), cwd

1364 assert os.path.normpath(cwd) == cwd, cwd

1365 with self._lock:

1366 if not self._initialized:

1367 raise TracingFailure(

1368 'Called Tracer.trace() on an unitialized object',

1369 None, None, None, tracename)

1370 assert tracename not in (i['trace'] for i in self._traces)

1371 stdout = stderr = None

1372 if output:

1373 stdout = subprocess.PIPE

1374 stderr = subprocess.STDOUT

1375

1376 # Ensure all file related APIs are hooked.

1377 traces = ','.join(Strace.Context.traces() + ['file'])

1378 flags = [

1379 # Each child process has its own trace file. It is necessary because

1380 # strace may generate corrupted log file if multiple processes are

1381 # heavily doing syscalls simultaneously.

1382 '-ff',

1383 # Reduce whitespace usage.

1384 '-a1',

1385 # hex encode non-ascii strings.

1386 # TODO(maruel): '-x',

1387 # TODO(maruel): '-ttt',

1388 # Signals are unnecessary noise here. Note the parser can cope with them

1389 # but reduce the unnecessary output.

1390 '-esignal=none',

1391 # Print as much data as wanted.

1392 '-s', '%d' % self.MAX_LEN,

1393 '-e', 'trace=%s' % traces,

1394 '-o', self._logname + '.' + tracename,

1395 ]

1396

1397 if self.use_sudo:

1398 pipe_r, pipe_w = os.pipe()

1399 # Start the child process paused.

1400 target_cmd = [sys.executable, self._child_script, str(pipe_r)] + cmd

1401 logging.debug(' '.join(target_cmd))

1402 child_proc = subprocess.Popen(

1403 target_cmd,

1404 stdin=subprocess.PIPE,

1405 stdout=stdout,

1406 stderr=stderr,

1407 cwd=cwd)

1408

1409 # TODO(maruel): both processes must use the same UID for it to work

1410 # without sudo. Look why -p is failing at the moment without sudo.

1411 trace_cmd = [

1412 'sudo',

1413 'strace',

1414 '-p', str(child_proc.pid),

1415 ] + flags

1416 logging.debug(' '.join(trace_cmd))

1417 strace_proc = subprocess.Popen(

1418 trace_cmd,

1419 cwd=cwd,

1420 stdin=subprocess.PIPE,

1421 stdout=subprocess.PIPE,

1422 stderr=subprocess.PIPE)

1423

1424 line = strace_proc.stderr.readline()

1425 if not re.match(r'^Process \d+ attached \- interrupt to quit$', line):

1426 # TODO(maruel): Raise an exception.

1427 assert False, line

1428

1429 # Now fire the child process.

1430 os.write(pipe_w, 'x')

1431

1432 out = child_proc.communicate()[0]

1433 strace_out = strace_proc.communicate()[0]

1434

1435 # TODO(maruel): if strace_proc.returncode: Add an exception.

1436 saved_out = strace_out if strace_proc.returncode else out

1437 root_pid = child_proc.pid

1438 else:

1439 # Non-sudo case.

1440 trace_cmd = [

1441 'strace',

1442 ] + flags + cmd

1443 logging.debug(' '.join(trace_cmd))

1444 child_proc = subprocess.Popen(

1445 trace_cmd,

1446 cwd=cwd,

1447 stdin=subprocess.PIPE,

1448 stdout=stdout,

1449 stderr=stderr)

1450 out = child_proc.communicate()[0]

1451 # TODO(maruel): Walk the logs and figure out the root process would

1452 # simplify parsing the logs a lot.

1453 saved_out = out

1454 # The trace reader will have to figure out.

1455 root_pid = None

1456

1457 with self._lock:

1458 assert tracename not in (i['trace'] for i in self._traces)

1459 self._traces.append(

1460 {

1461 'cmd': cmd,

1462 'cwd': cwd,

1463 'output': saved_out,

1464 'pid': root_pid,

1465 'trace': tracename,

1466 })

1467 return child_proc.returncode, out

1468

1469 def __init__(self, use_sudo=None):

1470 super(Strace, self).__init__()

1471 self.use_sudo = use_sudo

1472

1473 def get_tracer(self, logname):

1474 return self.Tracer(logname, self.use_sudo)

1475

1476 @staticmethod

1477 def clean_trace(logname):

1478 if os.path.isfile(logname):

1479 os.remove(logname)

1480 # Also delete any pid specific file from previous traces.

1481 for i in glob.iglob(logname + '.*'):

1482 if i.rsplit('.', 1)[1].isdigit():

1483 os.remove(i)

1484

1485 @classmethod

1486 def parse_log(cls, logname, blacklist, trace_name):

1487 logging.info('parse_log(%s, ..., %s)', logname, trace_name)

1488 assert os.path.isabs(logname)

1489 data = read_json(logname)

1490 out = []

1491 for item in data['traces']:

1492 if trace_name and item['trace'] != trace_name:

1493 continue

1494 result = {

1495 'output': item['output'],

1496 'trace': item['trace'],

1497 }

1498 try:

1499 context = cls.Context(blacklist, item['pid'], item['cwd'])

1500 for pidfile in glob.iglob('%s.%s.*' % (logname, item['trace'])):

1501 logging.debug('Reading %s', pidfile)

1502 pid = pidfile.rsplit('.', 1)[1]

1503 if pid.isdigit():

1504 pid = int(pid)

1505 found_line = False

1506 for line in open(pidfile, 'rb'):

1507 context.on_line(pid, line)

1508 found_line = True

1509 if not found_line:

1510 # Ensures that a completely empty trace still creates the

1511 # corresponding Process instance by logging a dummy line.

1512 context.on_line(pid, '')

1513 else:

1514 logging.warning('Found unexpected file %s', pidfile)

1515 result['results'] = context.to_results()

1516 except TracingFailure:

1517 result['exception'] = sys.exc_info()

1518 out.append(result)

1519 return out

1520

1521

1522 class Dtrace(ApiBase):

1523 """Uses DTrace framework through dtrace. Requires root access.

1524

1525 Implies Mac OSX.

1526

1527 dtruss can't be used because it has compatibility issues with python.

1528

1529 Also, the pid->cwd handling needs to be done manually since OSX has no way to

1530 get the absolute path of the 'cwd' dtrace variable from the probe.

1531

1532 Also, OSX doesn't populate curpsinfo->pr_psargs properly, see

1533 https://discussions.apple.com/thread/1980539. So resort to handling execve()

1534 manually.

1535

1536 errno is not printed in the log since this implementation currently only cares

1537 about files that were successfully opened.

1538 """

1539 class Context(ApiBase.Context):

1540 # Format: index pid function(args)

1541 RE_HEADER = re.compile(r'^\d+ (\d+) ([a-zA-Z_\-]+)$(.*?)$$')

1542

1543 # Arguments parsing.

1544 RE_DTRACE_BEGIN = re.compile(r'^\"(.+?)\"$')

1545 RE_CHDIR = re.compile(r'^\"(.+?)\"$')

1546 RE_EXECVE = re.compile(r'^\"(.+?)\", \[(\d+), (.+)\]$')

1547 RE_OPEN = re.compile(r'^\"(.+?)\", (0x[0-9a-z]+), (0x[0-9a-z]+)$')

1548 RE_PROC_START = re.compile(r'^(\d+), \"(.+?)\", (\d+)$')

1549 RE_RENAME = re.compile(r'^\"(.+?)\", \"(.+?)\"$')

1550

1551 # O_DIRECTORY is not defined on Windows and dtrace doesn't exist on Windows.

1552 O_DIRECTORY = os.O_DIRECTORY if hasattr(os, 'O_DIRECTORY') else None

1553 O_RDWR = os.O_RDWR

1554 O_WRONLY = os.O_WRONLY

1555

1556 class Process(ApiBase.Context.Process):

1557 def __init__(self, *args):

1558 super(Dtrace.Context.Process, self).__init__(*args)

1559 self.cwd = self.initial_cwd

1560

1561 def __init__(self, blacklist, thunk_pid, initial_cwd):

1562 logging.info(

1563 '%s(%d, %s)' % (self.__class__.__name__, thunk_pid, initial_cwd))

1564 super(Dtrace.Context, self).__init__(blacklist)

1565 assert isinstance(initial_cwd, unicode), initial_cwd

1566 # Process ID of the temporary script created by create_subprocess_thunk().

1567 self._thunk_pid = thunk_pid

1568 self._initial_cwd = initial_cwd

1569 self._line_number = 0

1570

1571 def on_line(self, line):

1572 assert isinstance(line, unicode), line

1573 self._line_number += 1

1574 match = self.RE_HEADER.match(line)

1575 if not match:

1576 raise TracingFailure(

1577 'Found malformed line: %s' % line,

1578 None,

1579 self._line_number,

1580 line)

1581 fn = getattr(

1582 self,

1583 'handle_%s' % match.group(2).replace('-', '_'),

1584 self._handle_ignored)

1585 # It is guaranteed to succeed because of the regexp. Or at least I thought

1586 # it would.

1587 pid = int(match.group(1))

1588 try:

1589 return fn(pid, match.group(3))

1590 except TracingFailure, e:

1591 # Hack in the values since the handler could be a static function.

1592 e.pid = pid

1593 e.line = line

1594 e.line_number = self._line_number

1595 # Re-raise the modified exception.

1596 raise

1597 except (KeyError, NotImplementedError, ValueError), e:

1598 raise TracingFailure(

1599 'Trace generated a %s exception: %s' % (

1600 e.__class__.__name__, str(e)),

1601 pid,

1602 self._line_number,

1603 line,

1604 e)

1605

1606 def to_results(self):

1607 process = self.root_process.to_results_process()

1608 # Internal concistency check.

1609 if sorted(self._process_lookup) != sorted(p.pid for p in process.all):

1610 raise TracingFailure(

1611 'Found internal inconsitency in process lifetime detection '

1612 'while looking for len(tree) == len(list)',

1613 None,

1614 None,

1615 None,

1616 sorted(self._process_lookup),

1617 sorted(p.pid for p in process.all))

1618 return Results(process)

1619

1620 def handle_dtrace_BEGIN(self, _pid, args):

1621 if not self.RE_DTRACE_BEGIN.match(args):

1622 raise TracingFailure(

1623 'Found internal inconsitency in dtrace_BEGIN log line',

1624 None, None, None)

1625

1626 def handle_proc_start(self, pid, args):

1627 """Transfers cwd.

1628

1629 The dtrace script already takes care of only tracing the processes that

1630 are child of the traced processes so there is no need to verify the

1631 process hierarchy.

1632 """

1633 if pid in self._process_lookup:

1634 raise TracingFailure(

1635 'Found internal inconsitency in proc_start: %d started two times' %

1636 pid,

1637 None, None, None)

1638 match = self.RE_PROC_START.match(args)

1639 if not match:

1640 raise TracingFailure(

1641 'Failed to parse arguments: %s' % args,

1642 None, None, None)

1643 ppid = int(match.group(1))

1644 if ppid == self._thunk_pid and not self.root_process:

1645 proc = self.root_process = self.Process(

1646 self.blacklist, pid, self._initial_cwd)

1647 elif ppid in self._process_lookup:

1648 proc = self.Process(self.blacklist, pid, self._process_lookup[ppid].cwd)

1649 self._process_lookup[ppid].children.append(proc)

1650 else:

1651 # Another process tree, ignore.

1652 return

1653 self._process_lookup[pid] = proc

1654 logging.debug(

1655 'New child: %s -> %d cwd:%s' %

1656 (ppid, pid, render(proc.initial_cwd)))

1657

1658 def handle_proc_exit(self, pid, _args):

1659 """Removes cwd."""

1660 if pid in self._process_lookup:

1661 # self._thunk_pid is not traced itself and other traces run neither.

1662 self._process_lookup[pid].cwd = None

1663

1664 def handle_execve(self, pid, args):

1665 """Sets the process' executable.

1666

1667 TODO(maruel): Read command line arguments. See

1668 https://discussions.apple.com/thread/1980539 for an example.

1669 https://gist.github.com/1242279

1670

1671 Will have to put the answer at http://stackoverflow.com/questions/7556249.

1672 :)

1673 """

1674 if not pid in self._process_lookup:

1675 # Another process tree, ignore.

1676 return

1677 match = self.RE_EXECVE.match(args)

1678 if not match:

1679 raise TracingFailure(

1680 'Failed to parse arguments: %r' % args,

1681 None, None, None)

1682 proc = self._process_lookup[pid]

1683 proc.executable = match.group(1)

1684 self._handle_file(pid, proc.executable, Results.File.READ)

1685 proc.command = self.process_escaped_arguments(match.group(3))

1686 if int(match.group(2)) != len(proc.command):

1687 raise TracingFailure(

1688 'Failed to parse execve() arguments: %s' % args,

1689 None, None, None)

1690

1691 def handle_chdir(self, pid, args):

1692 """Updates cwd."""

1693 if pid not in self._process_lookup:

1694 # Another process tree, ignore.

1695 return

1696 cwd = self.RE_CHDIR.match(args).group(1)

1697 if not cwd.startswith('/'):

1698 cwd2 = os.path.join(self._process_lookup[pid].cwd, cwd)

1699 logging.debug('handle_chdir(%d, %s) -> %s' % (pid, cwd, cwd2))

1700 else:

1701 logging.debug('handle_chdir(%d, %s)' % (pid, cwd))

1702 cwd2 = cwd

1703 self._process_lookup[pid].cwd = cwd2

1704

1705 def handle_open_nocancel(self, pid, args):

1706 """Redirects to handle_open()."""

1707 return self.handle_open(pid, args)

1708

1709 def handle_open(self, pid, args):

1710 if pid not in self._process_lookup:

1711 # Another process tree, ignore.

1712 return

1713 match = self.RE_OPEN.match(args)

1714 if not match:

1715 raise TracingFailure(

1716 'Failed to parse arguments: %s' % args,

1717 None, None, None)

1718 flag = int(match.group(2), 16)

1719 if self.O_DIRECTORY & flag == self.O_DIRECTORY:

1720 # Ignore directories.

1721 return

1722 self._handle_file(

1723 pid,

1724 match.group(1),

1725 Results.File.READ if not ((self.O_RDWR \| self.O_WRONLY) & flag)

1726 else Results.File.WRITE)

1727

1728 def handle_rename(self, pid, args):

1729 if pid not in self._process_lookup:

1730 # Another process tree, ignore.

1731 return

1732 match = self.RE_RENAME.match(args)

1733 if not match:

1734 raise TracingFailure(

1735 'Failed to parse arguments: %s' % args,

1736 None, None, None)

1737 self._handle_file(pid, match.group(1), Results.File.READ)

1738 self._handle_file(pid, match.group(2), Results.File.WRITE)

1739

1740 def _handle_file(self, pid, filepath, mode):

1741 if not filepath.startswith('/'):

1742 filepath = os.path.join(self._process_lookup[pid].cwd, filepath)

1743 # We can get '..' in the path.

1744 filepath = os.path.normpath(filepath)

1745 # Sadly, still need to filter out directories here;

1746 # saw open_nocancel(".", 0, 0) = 0 lines.

1747 if os.path.isdir(filepath):

1748 return

1749 self._process_lookup[pid].add_file(filepath, mode)

1750

1751 def handle_ftruncate(self, pid, args):

1752 """Just used as a signal to kill dtrace, ignoring."""

1753 pass

1754

1755 @staticmethod

1756 def _handle_ignored(pid, args):

1757 """Is called for all the event traces that are not handled."""

1758 raise NotImplementedError('Please implement me')

1759

1760 @staticmethod

1761 def process_escaped_arguments(text):

1762 """Extracts escaped arguments on a string and return the arguments as a

1763 list.

1764

1765 Implemented as an automaton.

1766

1767 Example:

1768 With text = '\\001python2.7\\001-c\\001print(\\"hi\\")\\0', the

1769 function will return ['python2.7', '-c', 'print("hi")]

1770 """

1771 if not text.endswith('\\0'):

1772 raise ValueError('String is not null terminated: %r' % text, text)

1773 text = text[:-2]

1774

1775 def unescape(x):

1776 """Replaces '\\' with '\' and '\?' (where ? is anything) with ?."""

1777 out = []

1778 escaped = False

1779 for i in x:

1780 if i == '\\' and not escaped:

1781 escaped = True

1782 continue

1783 escaped = False

1784 out.append(i)

1785 return ''.join(out)

1786

1787 return [unescape(i) for i in text.split('\\001')]

1788

1789 class Tracer(ApiBase.Tracer):

1790 # pylint: disable=C0301

1791 #

1792 # To understand the following code, you'll want to take a look at:

1793 # http://developers.sun.com/solaris/articles/dtrace_quickref/dtrace_quickref .html

1794 # https://wikis.oracle.com/display/DTrace/Variables

1795 # http://docs.oracle.com/cd/E19205-01/820-4221/

1796 #

1797 # 0. Dump all the valid probes into a text file. It is important, you

1798 # want to redirect into a file and you don't want to constantly 'sudo'.

1799 # $ sudo dtrace -l > probes.txt

1800 #

1801 # 1. Count the number of probes:

1802 # $ wc -l probes.txt

1803 # 81823 # On OSX 10.7, including 1 header line.

1804 #

1805 # 2. List providers, intentionally skipping all the 'syspolicy10925' and the

1806 # likes and skipping the header with NR>1:

1807 # $ awk 'NR>1 { print $2 }' probes.txt \| sort \| uniq \| grep -v '[[:digit:]]'

1808 # dtrace

1809 # fbt

1810 # io

1811 # ip

1812 # lockstat

1813 # mach_trap

1814 # proc

1815 # profile

1816 # sched

1817 # syscall

1818 # tcp

1819 # vminfo

1820 #

1821 # 3. List of valid probes:

1822 # $ grep syscall probes.txt \| less

1823 # or use dtrace directly:

1824 # $ sudo dtrace -l -P syscall \| less

1825 #

1826 # trackedpid is an associative array where its value can be 0, 1 or 2.

1827 # 0 is for untracked processes and is the default value for items not

1828 # in the associative array.

1829 # 1 is for tracked processes.

1830 # 2 is for the script created by create_subprocess_thunk() only. It is not

1831 # tracked itself but all its decendants are.

1832 #

1833 # The script will kill itself only once waiting_to_die == 1 and

1834 # current_processes == 0, so that both getlogin() was called and that

1835 # all traced processes exited.

1836 #

1837 # TODO(maruel): Use cacheable predicates. See

1838 # https://wikis.oracle.com/display/DTrace/Performance+Considerations

1839 D_CODE = """

1840 dtrace:::BEGIN {

1841 waiting_to_die = 0;

1842 current_processes = 0;

1843 logindex = 0;

1844 printf("%d %d %s_%s(\\"%s\\")\\n",

1845 logindex, PID, probeprov, probename, SCRIPT);

1846 logindex++;

1847 }

1848

1849 proc:::start /trackedpid[ppid]/ {

1850 trackedpid[pid] = 1;

1851 current_processes += 1;

1852 printf("%d %d %s_%s(%d, \\"%s\\", %d)\\n",

1853 logindex, pid, probeprov, probename,

1854 ppid,

1855 execname,

1856 current_processes);

1857 logindex++;

1858 }

1859 /* Should use SCRIPT but there is no access to this variable at that

1860 * point. */

1861 proc:::start /ppid == PID && execname == "Python"/ {

1862 trackedpid[pid] = 2;

1863 current_processes += 1;

1864 printf("%d %d %s_%s(%d, \\"%s\\", %d)\\n",

1865 logindex, pid, probeprov, probename,

1866 ppid,

1867 execname,

1868 current_processes);

1869 logindex++;

1870 }

1871 proc:::exit /trackedpid[pid] &&

1872 current_processes == 1 &&

1873 waiting_to_die == 1/ {

1874 trackedpid[pid] = 0;

1875 current_processes -= 1;

1876 printf("%d %d %s_%s(%d)\\n",

1877 logindex, pid, probeprov, probename,

1878 current_processes);

1879 logindex++;

1880 exit(0);

1881 }

1882 proc:::exit /trackedpid[pid]/ {

1883 trackedpid[pid] = 0;

1884 current_processes -= 1;

1885 printf("%d %d %s_%s(%d)\\n",

1886 logindex, pid, probeprov, probename,

1887 current_processes);

1888 logindex++;

1889 }

1890

1891 /* Use an arcane function to detect when we need to die */

1892 syscall::ftruncate:entry /pid == PID && arg0 == FILE_ID/ {

1893 waiting_to_die = 1;

1894 printf("%d %d %s()\\n", logindex, pid, probefunc);

1895 logindex++;

1896 }

1897 syscall::ftruncate:entry /

1898 pid == PID && arg0 == FILE_ID && current_processes == 0/ {

1899 exit(0);

1900 }

1901

1902 syscall::open*:entry /trackedpid[pid] == 1/ {

1903 self->open_arg0 = arg0;

1904 self->open_arg1 = arg1;

1905 self->open_arg2 = arg2;

1906 }

1907 syscall::open*:return /trackedpid[pid] == 1 && errno == 0/ {

1908 this->open_arg0 = copyinstr(self->open_arg0);

1909 printf("%d %d %s(\\"%s\\", 0x%x, 0x%x)\\n",

1910 logindex, pid, probefunc,

1911 this->open_arg0,

1912 self->open_arg1,

1913 self->open_arg2);

1914 logindex++;

1915 this->open_arg0 = 0;

1916 }

1917 syscall::open*:return /trackedpid[pid] == 1/ {

1918 self->open_arg0 = 0;

1919 self->open_arg1 = 0;

1920 self->open_arg2 = 0;

1921 }

1922

1923 syscall::rename:entry /trackedpid[pid] == 1/ {

1924 self->rename_arg0 = arg0;

1925 self->rename_arg1 = arg1;

1926 }

1927 syscall::rename:return /trackedpid[pid] == 1 && errno == 0/ {

1928 this->rename_arg0 = copyinstr(self->rename_arg0);

1929 this->rename_arg1 = copyinstr(self->rename_arg1);

1930 printf("%d %d %s(\\"%s\\", \\"%s\\")\\n",

1931 logindex, pid, probefunc,

1932 this->rename_arg0,

1933 this->rename_arg1);

1934 logindex++;

1935 this->rename_arg0 = 0;

1936 this->rename_arg1 = 0;

1937 }

1938 syscall::rename:return /trackedpid[pid] == 1/ {

1939 self->rename_arg0 = 0;

1940 self->rename_arg1 = 0;

1941 }

1942

1943 /* Track chdir, it's painful because it is only receiving relative path.

1944 */

1945 syscall::chdir:entry /trackedpid[pid] == 1/ {

1946 self->chdir_arg0 = arg0;

1947 }

1948 syscall::chdir:return /trackedpid[pid] == 1 && errno == 0/ {

1949 this->chdir_arg0 = copyinstr(self->chdir_arg0);

1950 printf("%d %d %s(\\"%s\\")\\n",

1951 logindex, pid, probefunc,

1952 this->chdir_arg0);

1953 logindex++;

1954 this->chdir_arg0 = 0;

1955 }

1956 syscall::chdir:return /trackedpid[pid] == 1/ {

1957 self->chdir_arg0 = 0;

1958 }

1959 """

1960

1961 # execve-specific code, tends to throw a lot of exceptions.

1962 D_CODE_EXECVE = """

1963 /* Finally what we care about! */

1964 syscall::exec*:entry /trackedpid[pid]/ {

1965 self->exec_arg0 = copyinstr(arg0);

1966 /* Incrementally probe for a NULL in the argv parameter of execve() to

1967 * figure out argc. */

1968 /* TODO(maruel): Skip the remaining copyin() when a NULL pointer was

1969 * found. */

1970 self->exec_argc = 0;

1971 /* Probe for argc==1 */

1972 this->exec_argv = (user_addr_t*)copyin(

1973 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));

1974 self->exec_argc = this->exec_argv[self->exec_argc] ?

1975 (self->exec_argc + 1) : self->exec_argc;

1976

1977 /* Probe for argc==2 */

1978 this->exec_argv = (user_addr_t*)copyin(

1979 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));

1980 self->exec_argc = this->exec_argv[self->exec_argc] ?

1981 (self->exec_argc + 1) : self->exec_argc;

1982

1983 /* Probe for argc==3 */

1984 this->exec_argv = (user_addr_t*)copyin(

1985 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));

1986 self->exec_argc = this->exec_argv[self->exec_argc] ?

1987 (self->exec_argc + 1) : self->exec_argc;

1988

1989 /* Probe for argc==4 */

1990 this->exec_argv = (user_addr_t*)copyin(

1991 arg1, sizeof(user_addr_t) * (self->exec_argc + 1));

1992 self->exec_argc = this->exec_argv[self->exec_argc] ?

1993 (self->exec_argc + 1) : self->exec_argc;

1994

1995 /* Copy the inputs strings since there is no guarantee they'll be

1996 * present after the call completed. */

1997 self->exec_argv0 = (self->exec_argc > 0) ?

1998 copyinstr(this->exec_argv[0]) : "";

1999 self->exec_argv1 = (self->exec_argc > 1) ?

2000 copyinstr(this->exec_argv[1]) : "";

2001 self->exec_argv2 = (self->exec_argc > 2) ?

2002 copyinstr(this->exec_argv[2]) : "";

2003 self->exec_argv3 = (self->exec_argc > 3) ?

2004 copyinstr(this->exec_argv[3]) : "";

2005 this->exec_argv = 0;

2006 }

2007 syscall::exec*:return /trackedpid[pid] && errno == 0/ {

2008 /* We need to join strings here, as using multiple printf() would

2009 * cause tearing when multiple threads/processes are traced.

2010 * Since it is impossible to escape a string and join it to another one,

2011 * like sprintf("%s%S", previous, more), use hackery.

2012 * Each of the elements are split with a \\1. \\0 cannot be used because

2013 * it is simply ignored. This will conflict with any program putting a

2014 * \\1 in their execve() string but this should be "rare enough" */

2015 this->args = "";

2016 /* Process exec_argv[0] */

2017 this->args = strjoin(

2018 this->args, (self->exec_argc > 0) ? self->exec_argv0 : "");

2019

2020 /* Process exec_argv[1] */

2021 this->args = strjoin(

2022 this->args, (self->exec_argc > 1) ? "\\1" : "");

2023 this->args = strjoin(

2024 this->args, (self->exec_argc > 1) ? self->exec_argv1 : "");

2025

2026 /* Process exec_argv[2] */

2027 this->args = strjoin(

2028 this->args, (self->exec_argc > 2) ? "\\1" : "");

2029 this->args = strjoin(

2030 this->args, (self->exec_argc > 2) ? self->exec_argv2 : "");

2031

2032 /* Process exec_argv[3] */

2033 this->args = strjoin(

2034 this->args, (self->exec_argc > 3) ? "\\1" : "");

2035 this->args = strjoin(

2036 this->args, (self->exec_argc > 3) ? self->exec_argv3 : "");

2037

2038 /* Prints self->exec_argc to permits verifying the internal

2039 * consistency since this code is quite fishy. */

2040 printf("%d %d %s(\\"%s\\", [%d, %S])\\n",

2041 logindex, pid, probefunc,

2042 self->exec_arg0,

2043 self->exec_argc,

2044 this->args);

2045 logindex++;

2046 this->args = 0;

2047 }

2048 syscall::exec*:return /trackedpid[pid]/ {

2049 self->exec_arg0 = 0;

2050 self->exec_argc = 0;

2051 self->exec_argv0 = 0;

2052 self->exec_argv1 = 0;

2053 self->exec_argv2 = 0;

2054 self->exec_argv3 = 0;

2055 }

2056 """

2057

2058 # Code currently not used.

2059 D_EXTRANEOUS = """

2060 /* This is a good learning experience, since it traces a lot of things

2061 * related to the process and child processes.

2062 * Warning: it generates a gigantic log. For example, tracing

2063 * "data/trace_inputs/child1.py --child" generates a 2mb log and takes

2064 * several minutes to execute.

2065 */

2066 /*

2067 mach_trap::: /trackedpid[pid] == 1 \|\| trackedpid[ppid]/ {

2068 printf("%d %d %s_%s() = %d\\n",

2069 logindex, pid, probeprov, probefunc, errno);

2070 logindex++;

2071 }

2072 proc::: /trackedpid[pid] == 1 \|\| trackedpid[ppid]/ {

2073 printf("%d %d %s_%s() = %d\\n",

2074 logindex, pid, probeprov, probefunc, errno);

2075 logindex++;

2076 }

2077 sched::: /trackedpid[pid] == 1 \|\| trackedpid[ppid]/ {

2078 printf("%d %d %s_%s() = %d\\n",

2079 logindex, pid, probeprov, probefunc, errno);

2080 logindex++;

2081 }

2082 syscall::: /trackedpid[pid] == 1 \|\| trackedpid[ppid]/ {

2083 printf("%d %d %s_%s() = %d\\n",

2084 logindex, pid, probeprov, probefunc, errno);

2085 logindex++;

2086 }

2087 vminfo::: /trackedpid[pid] == 1 \|\| trackedpid[ppid]/ {

2088 printf("%d %d %s_%s() = %d\\n",

2089 logindex, pid, probeprov, probefunc, errno);

2090 logindex++;

2091 }

2092 */

2093 /* TODO(maruel): stat functions and friends

2094 syscall::access:return,

2095 syscall::chdir:return,

2096 syscall::chflags:return,

2097 syscall::chown:return,

2098 syscall::chroot:return,

2099 syscall::getattrlist:return,

2100 syscall::getxattr:return,

2101 syscall::lchown:return,

2102 syscall::lstat64:return,

2103 syscall::lstat:return,

2104 syscall::mkdir:return,

2105 syscall::pathconf:return,

2106 syscall::readlink:return,

2107 syscall::removexattr:return,

2108 syscall::setxattr:return,

2109 syscall::stat64:return,

2110 syscall::stat:return,

2111 syscall::truncate:return,

2112 syscall::unlink:return,

2113 syscall::utimes:return,

2114 */

2115 """

2116

2117 def __init__(self, logname, use_sudo):

2118 """Starts the log collection with dtrace.

2119

2120 Requires root access or chmod 4555 on dtrace. dtrace is asynchronous so

2121 this needs to wait for dtrace to be "warmed up".

2122 """

2123 super(Dtrace.Tracer, self).__init__(logname)

2124 # This script is used as a signal to figure out the root process.

2125 self._signal_script = create_subprocess_thunk()

2126 self._scripts_to_cleanup.append(self._signal_script)

2127 # This unique dummy temp file is used to signal the dtrace script that it

2128 # should stop as soon as all the child processes are done. A bit hackish

2129 # but works fine enough.

2130 self._dummy_file_id, self._dummy_file_name = tempfile.mkstemp(

2131 prefix='trace_signal_file')

2132

2133 dtrace_path = '/usr/sbin/dtrace'

2134 if not os.path.isfile(dtrace_path):

2135 dtrace_path = 'dtrace'

2136 elif use_sudo is None and (os.stat(dtrace_path).st_mode & stat.S_ISUID):

2137 # No need to sudo. For those following at home, don't do that.

2138 use_sudo = False

2139

2140 # Note: do not use the -p flag. It's useless if the initial process quits

2141 # too fast, resulting in missing traces from the grand-children. The D

2142 # code manages the dtrace lifetime itself.

2143 trace_cmd = [

2144 dtrace_path,

2145 # Use a larger buffer if getting 'out of scratch space' errors.

2146 # Ref: https://wikis.oracle.com/display/DTrace/Options+and+Tunables

2147 '-b', '10m',

2148 '-x', 'dynvarsize=10m',

2149 #'-x', 'dtrace_global_maxsize=1m',

2150 '-x', 'evaltime=exec',

2151 '-o', '/dev/stderr',

2152 '-q',

2153 '-n', self._get_dtrace_code(),

2154 ]

2155 if use_sudo is not False:

2156 trace_cmd.insert(0, 'sudo')

2157

2158 with open(self._logname + '.log', 'wb') as logfile:

2159 self._dtrace = subprocess.Popen(

2160 trace_cmd, stdout=logfile, stderr=subprocess.STDOUT)

2161 logging.debug('Started dtrace pid: %d' % self._dtrace.pid)

2162

2163 # Reads until one line is printed, which signifies dtrace is up and ready.

2164 with open(self._logname + '.log', 'rb') as logfile:

2165 while 'dtrace_BEGIN' not in logfile.readline():

2166 if self._dtrace.poll() is not None:

2167 # Do a busy wait. :/

2168 break

2169 logging.debug('dtrace started')

2170

2171 def _get_dtrace_code(self):

2172 """Setups the D code to implement child process tracking.

2173

2174 Injects the cookie in the script so it knows when to stop.

2175

2176 The script will detect any instance of the script created with

2177 create_subprocess_thunk() and will start tracing it.

2178 """

2179 out = (

2180 'inline int PID = %d;\n'

2181 'inline string SCRIPT = "%s";\n'

2182 'inline int FILE_ID = %d;\n'

2183 '\n'

2184 '%s') % (

2185 os.getpid(),

2186 self._signal_script,

2187 self._dummy_file_id,

2188 self.D_CODE)

2189 if os.environ.get('TRACE_INPUTS_DTRACE_ENABLE_EXECVE') == '1':

2190 # Do not enable by default since it tends to spew dtrace: error lines

2191 # because the execve() parameters are not in valid memory at the time of

2192 # logging.

2193 # TODO(maruel): Find a way to make this reliable since it's useful but

2194 # only works in limited/trivial uses cases for now.

2195 out += self.D_CODE_EXECVE

2196 return out

2197

2198 def trace(self, cmd, cwd, tracename, output):

2199 """Runs dtrace on an executable.

2200

2201 This dtruss is broken when it starts the process itself or when tracing

2202 child processes, this code starts a wrapper process

2203 generated with create_subprocess_thunk() which starts the executable to

2204 trace.

2205 """

2206 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))

2207 assert os.path.isabs(cmd[0]), cmd[0]

2208 assert os.path.isabs(cwd), cwd

2209 assert os.path.normpath(cwd) == cwd, cwd

2210 with self._lock:

2211 if not self._initialized:

2212 raise TracingFailure(

2213 'Called Tracer.trace() on an unitialized object',

2214 None, None, None, tracename)

2215 assert tracename not in (i['trace'] for i in self._traces)

2216

2217 # Starts the script wrapper to start the child process. This signals the

2218 # dtrace script that this process is to be traced.

2219 stdout = stderr = None

2220 if output:

2221 stdout = subprocess.PIPE

2222 stderr = subprocess.STDOUT

2223 child_cmd = [

2224 sys.executable,

2225 self._signal_script,

2226 tracename,

2227 ]

2228 # Call a dummy function so that dtrace knows I'm about to launch a process

2229 # that needs to be traced.

2230 # Yummy.

2231 child = subprocess.Popen(

2232 child_cmd + tools.fix_python_path(cmd),

2233 stdin=subprocess.PIPE,

2234 stdout=stdout,

2235 stderr=stderr,

2236 cwd=cwd)

2237 logging.debug('Started child pid: %d' % child.pid)

2238

2239 out = child.communicate()[0]

2240 # This doesn't mean tracing is done, one of the grand-child process may

2241 # still be alive. It will be tracked with the dtrace script.

2242

2243 with self._lock:

2244 assert tracename not in (i['trace'] for i in self._traces)

2245 self._traces.append(

2246 {

2247 'cmd': cmd,

2248 'cwd': cwd,

2249 'pid': child.pid,

2250 'output': out,

2251 'trace': tracename,

2252 })

2253 return child.returncode, out

2254

2255 def close(self, timeout=None):

2256 """Terminates dtrace."""

2257 logging.debug('close(%s)' % timeout)

2258 try:

2259 try:

2260 super(Dtrace.Tracer, self).close(timeout)

2261 # Signal dtrace that it should stop now.

2262 # ftruncate doesn't exist on Windows.

2263 os.ftruncate(self._dummy_file_id, 0) # pylint: disable=E1101

2264 if timeout:

2265 start = time.time()

2266 # Use polling. :/

2267 while (self._dtrace.poll() is None and

2268 (time.time() - start) < timeout):

2269 time.sleep(0.1)

2270 self._dtrace.kill()

2271 self._dtrace.wait()

2272 finally:

2273 # Make sure to kill it in any case.

2274 if self._dtrace.poll() is None:

2275 try:

2276 self._dtrace.kill()

2277 self._dtrace.wait()

2278 except OSError:

2279 pass

2280

2281 if self._dtrace.returncode != 0:

2282 # Warn about any dtrace failure but basically ignore it.

2283 print 'dtrace failure: %s' % self._dtrace.returncode

2284 finally:

2285 os.close(self._dummy_file_id)

2286 os.remove(self._dummy_file_name)

2287

2288 def post_process_log(self):

2289 """Sorts the log back in order when each call occured.

2290

2291 dtrace doesn't save the buffer in strict order since it keeps one buffer

2292 per CPU.

2293 """

2294 super(Dtrace.Tracer, self).post_process_log()

2295 logname = self._logname + '.log'

2296 with open(logname, 'rb') as logfile:

2297 lines = [l for l in logfile if l.strip()]

2298 errors = [l for l in lines if l.startswith('dtrace:')]

2299 if errors:

2300 raise TracingFailure(

2301 'Found errors in the trace: %s' % '\n'.join(errors),

2302 None, None, None, logname)

2303 try:

2304 lines = sorted(lines, key=lambda l: int(l.split(' ', 1)[0]))

2305 except ValueError:

2306 raise TracingFailure(

2307 'Found errors in the trace: %s' % '\n'.join(

2308 l for l in lines if l.split(' ', 1)[0].isdigit()),

2309 None, None, None, logname)

2310 with open(logname, 'wb') as logfile:

2311 logfile.write(''.join(lines))

2312

2313 def __init__(self, use_sudo=None):

2314 super(Dtrace, self).__init__()

2315 self.use_sudo = use_sudo

2316

2317 def get_tracer(self, logname):

2318 return self.Tracer(logname, self.use_sudo)

2319

2320 @staticmethod

2321 def clean_trace(logname):

2322 for ext in ('', '.log'):

2323 if os.path.isfile(logname + ext):

2324 os.remove(logname + ext)

2325

2326 @classmethod

2327 def parse_log(cls, logname, blacklist, trace_name):

2328 logging.info('parse_log(%s, ..., %s)', logname, trace_name)

2329 assert os.path.isabs(logname)

2330

2331 def blacklist_more(filepath):

2332 # All the HFS metadata is in the form /.vol/...

2333 return blacklist(filepath) or re.match(r'^\/\.vol\/.+$', filepath)

2334

2335 data = read_json(logname)

2336 out = []

2337 for item in data['traces']:

2338 if trace_name and item['trace'] != trace_name:

2339 continue

2340 result = {

2341 'output': item['output'],

2342 'trace': item['trace'],

2343 }

2344 try:

2345 context = cls.Context(blacklist_more, item['pid'], item['cwd'])

2346 # It's fine to assume the file as UTF-8: OSX enforces the file names to

2347 # be valid UTF-8 and we control the log output.

2348 for line in codecs.open(logname + '.log', 'rb', encoding='utf-8'):

2349 context.on_line(line)

2350 result['results'] = context.to_results()

2351 except TracingFailure:

2352 result['exception'] = sys.exc_info()

2353 out.append(result)

2354 return out

2355

2356

2357 class LogmanTrace(ApiBase):

2358 """Uses the native Windows ETW based tracing functionality to trace a child

2359 process.

2360

2361 Caveat: this implementations doesn't track cwd or initial_cwd. It is because

2362 the Windows Kernel doesn't have a concept of 'current working directory' at

2363 all. A Win32 process has a map of current directories, one per drive letter

2364 and it is managed by the user mode kernel32.dll. In kernel, a file is always

2365 opened relative to another file_object or as an absolute path. All the current

2366 working directory logic is done in user mode.

2367 """

2368 class Context(ApiBase.Context):

2369 """Processes a ETW log line and keeps the list of existent and non

2370 existent files accessed.

2371

2372 Ignores directories.

2373 """

2374 # These indexes are for the stripped version in json.

2375 EVENT_NAME = 0

2376 TYPE = 1

2377 PID = 2

2378 TID = 3

2379 PROCESSOR_ID = 4

2380 TIMESTAMP = 5

2381 USER_DATA = 6

2382

2383 class Process(ApiBase.Context.Process):

2384 def __init__(self, *args):

2385 super(LogmanTrace.Context.Process, self).__init__(*args)

2386 # Handle file objects that succeeded.

2387 self.file_objects = {}

2388

2389 def __init__(self, blacklist, thunk_pid, trace_name, thunk_cmd):

2390 logging.info(

2391 '%s(%d, %s, %s)', self.__class__.__name__, thunk_pid, trace_name,

2392 thunk_cmd)

2393 super(LogmanTrace.Context, self).__init__(blacklist)

2394 self._drive_map = file_path.DosDriveMap()

2395 # Threads mapping to the corresponding process id.

2396 self._threads_active = {}

2397 # Process ID of the tracer, e.g. the temporary script created by

2398 # create_subprocess_thunk(). This is tricky because the process id may

2399 # have been reused.

2400 self._thunk_pid = thunk_pid

2401 self._thunk_cmd = thunk_cmd

2402 self._trace_name = trace_name

2403 self._line_number = 0

2404 self._thunk_process = None

2405

2406 def on_line(self, line):

2407 """Processes a json Event line."""

2408 self._line_number += 1

2409 try:

2410 # By Opcode

2411 handler = getattr(

2412 self,

2413 'handle_%s_%s' % (line[self.EVENT_NAME], line[self.TYPE]),

2414 None)

2415 if not handler:

2416 raise TracingFailure(

2417 'Unexpected event %s_%s' % (

2418 line[self.EVENT_NAME], line[self.TYPE]),

2419 None, None, None)

2420 handler(line)

2421 except TracingFailure, e:

2422 # Hack in the values since the handler could be a static function.

2423 e.pid = line[self.PID]

2424 e.line = line

2425 e.line_number = self._line_number

2426 # Re-raise the modified exception.

2427 raise

2428 except (KeyError, NotImplementedError, ValueError), e:

2429 raise TracingFailure(

2430 'Trace generated a %s exception: %s' % (

2431 e.__class__.__name__, str(e)),

2432 line[self.PID],

2433 self._line_number,

2434 line,

2435 e)

2436

2437 def to_results(self):

2438 if not self.root_process:

2439 raise TracingFailure(

2440 'Failed to detect the initial process %d' % self._thunk_pid,

2441 None, None, None)

2442 process = self.root_process.to_results_process()

2443 return Results(process)

2444

2445 def _thread_to_process(self, tid):

2446 """Finds the process from the thread id."""

2447 tid = int(tid, 16)

2448 pid = self._threads_active.get(tid)

2449 if not pid or not self._process_lookup.get(pid):

2450 return

2451 return self._process_lookup[pid]

2452

2453 @classmethod

2454 def handle_EventTrace_Header(cls, line):

2455 """Verifies no event was dropped, e.g. no buffer overrun occured."""

2456 BUFFER_SIZE = cls.USER_DATA

2457 #VERSION = cls.USER_DATA + 1

2458 #PROVIDER_VERSION = cls.USER_DATA + 2

2459 #NUMBER_OF_PROCESSORS = cls.USER_DATA + 3

2460 #END_TIME = cls.USER_DATA + 4

2461 #TIMER_RESOLUTION = cls.USER_DATA + 5

2462 #MAX_FILE_SIZE = cls.USER_DATA + 6

2463 #LOG_FILE_MODE = cls.USER_DATA + 7

2464 #BUFFERS_WRITTEN = cls.USER_DATA + 8

2465 #START_BUFFERS = cls.USER_DATA + 9

2466 #POINTER_SIZE = cls.USER_DATA + 10

2467 EVENTS_LOST = cls.USER_DATA + 11

2468 #CPU_SPEED = cls.USER_DATA + 12

2469 #LOGGER_NAME = cls.USER_DATA + 13

2470 #LOG_FILE_NAME = cls.USER_DATA + 14

2471 #BOOT_TIME = cls.USER_DATA + 15

2472 #PERF_FREQ = cls.USER_DATA + 16

2473 #START_TIME = cls.USER_DATA + 17

2474 #RESERVED_FLAGS = cls.USER_DATA + 18

2475 #BUFFERS_LOST = cls.USER_DATA + 19

2476 #SESSION_NAME_STRING = cls.USER_DATA + 20

2477 #LOG_FILE_NAME_STRING = cls.USER_DATA + 21

2478 if line[EVENTS_LOST] != '0':

2479 raise TracingFailure(

2480 ( '%s events were lost during trace, please increase the buffer '

2481 'size from %s') % (line[EVENTS_LOST], line[BUFFER_SIZE]),

2482 None, None, None)

2483

2484 def handle_FileIo_Cleanup(self, line):

2485 """General wisdom: if a file is closed, it's because it was opened.

2486

2487 Note that FileIo_Close is not used since if a file was opened properly but

2488 not closed before the process exits, only Cleanup will be logged.

2489 """

2490 #IRP = self.USER_DATA

2491 TTID = self.USER_DATA + 1 # Thread ID, that's what we want.

2492 FILE_OBJECT = self.USER_DATA + 2

2493 #FILE_KEY = self.USER_DATA + 3

2494 proc = self._thread_to_process(line[TTID])

2495 if not proc:

2496 # Not a process we care about.

2497 return

2498 file_object = line[FILE_OBJECT]

2499 if file_object in proc.file_objects:

2500 filepath, access_type = proc.file_objects.pop(file_object)

2501 proc.add_file(filepath, access_type)

2502

2503 def handle_FileIo_Create(self, line):

2504 """Handles a file open.

2505

2506 All FileIo events are described at

2507 http://msdn.microsoft.com/library/windows/desktop/aa363884.aspx

2508 for some value of 'description'.

2509

2510 " (..) process and thread id values of the IO events (..) are not valid "

2511 http://msdn.microsoft.com/magazine/ee358703.aspx

2512

2513 The FileIo.Create event doesn't return if the CreateFile() call

2514 succeeded, so keep track of the file_object and check that it is

2515 eventually closed with FileIo_Cleanup.

2516 """

2517 #IRP = self.USER_DATA

2518 TTID = self.USER_DATA + 1 # Thread ID, that's what we want.

2519 FILE_OBJECT = self.USER_DATA + 2

2520 #CREATE_OPTIONS = self.USER_DATA + 3

2521 #FILE_ATTRIBUTES = self.USER_DATA + 4

2522 #SHARE_ACCESS = self.USER_DATA + 5

2523 OPEN_PATH = self.USER_DATA + 6

2524

2525 proc = self._thread_to_process(line[TTID])

2526 if not proc:

2527 # Not a process we care about.

2528 return

2529

2530 raw_path = line[OPEN_PATH]

2531 # Ignore directories and bare drive right away.

2532 if raw_path.endswith(os.path.sep):

2533 return

2534 filepath = self._drive_map.to_win32(raw_path)

2535 # Ignore bare drive right away. Some may still fall through with format

2536 # like '\\?\X:'

2537 if len(filepath) == 2:

2538 return

2539 file_object = line[FILE_OBJECT]

2540 if os.path.isdir(filepath):

2541 # There is no O_DIRECTORY equivalent on Windows. The closed is

2542 # FILE_FLAG_BACKUP_SEMANTICS but it's not exactly right either. So

2543 # simply discard directories are they are found.

2544 return

2545 # Override any stale file object.

2546 # TODO(maruel): Figure out a way to detect if the file was opened for

2547 # reading or writting. Sadly CREATE_OPTIONS doesn't seem to be of any help

2548 # here. For now mark as None to make it clear we have no idea what it is

2549 # about.

2550 proc.file_objects[file_object] = (filepath, None)

2551

2552 def handle_FileIo_Rename(self, line):

2553 # TODO(maruel): Handle?

2554 pass

2555

2556 def handle_Process_End(self, line):

2557 pid = line[self.PID]

2558 if self._process_lookup.get(pid):

2559 logging.info('Terminated: %d' % pid)

2560 self._process_lookup[pid] = None

2561 else:

2562 logging.debug('Terminated: %d' % pid)

2563 if self._thunk_process and self._thunk_process.pid == pid:

2564 self._thunk_process = None

2565

2566 def handle_Process_Start(self, line):

2567 """Handles a new child process started by PID."""

2568 #UNIQUE_PROCESS_KEY = self.USER_DATA

2569 PROCESS_ID = self.USER_DATA + 1

2570 #PARENT_PID = self.USER_DATA + 2

2571 #SESSION_ID = self.USER_DATA + 3

2572 #EXIT_STATUS = self.USER_DATA + 4

2573 #DIRECTORY_TABLE_BASE = self.USER_DATA + 5

2574 #USER_SID = self.USER_DATA + 6

2575 IMAGE_FILE_NAME = self.USER_DATA + 7

2576 COMMAND_LINE = self.USER_DATA + 8

2577

2578 ppid = line[self.PID]

2579 pid = int(line[PROCESS_ID], 16)

2580 command_line = CommandLineToArgvW(line[COMMAND_LINE])

2581 logging.debug(

2582 'New process %d->%d (%s) %s' %

2583 (ppid, pid, line[IMAGE_FILE_NAME], command_line))

2584

2585 if pid == self._thunk_pid:

2586 # Need to ignore processes we don't know about because the log is

2587 # system-wide. self._thunk_pid shall start only one process.

2588 # This is tricky though because Windows loves to reuse process id and

2589 # it happens often that the process ID of the thunk script created by

2590 # create_subprocess_thunk() is reused. So just detecting the pid here is

2591 # not sufficient, we must confirm the command line.

2592 if command_line[:len(self._thunk_cmd)] != self._thunk_cmd:

2593 logging.info(

2594 'Ignoring duplicate pid %d for %s: %s while searching for %s',

2595 pid, self._trace_name, command_line, self._thunk_cmd)

2596 return

2597

2598 # TODO(maruel): The check is quite weak. Add the thunk path.

2599 if self._thunk_process:

2600 raise TracingFailure(

2601 ( 'Parent process is _thunk_pid(%d) but thunk_process(%d) is '

2602 'already set') % (self._thunk_pid, self._thunk_process.pid),

2603 None, None, None)

2604 proc = self.Process(self.blacklist, pid, None)

2605 self._thunk_process = proc

2606 return

2607 elif ppid == self._thunk_pid and self._thunk_process:

2608 proc = self.Process(self.blacklist, pid, None)

2609 self.root_process = proc

2610 ppid = None

2611 elif self._process_lookup.get(ppid):

2612 proc = self.Process(self.blacklist, pid, None)

2613 self._process_lookup[ppid].children.append(proc)

2614 else:

2615 # Ignore

2616 return

2617 self._process_lookup[pid] = proc

2618

2619 proc.command = command_line

2620 proc.executable = line[IMAGE_FILE_NAME]

2621 # proc.command[0] may be the absolute path of 'executable' but it may be

2622 # anything else too. If it happens that command[0] ends with executable,

2623 # use it, otherwise defaults to the base name.

2624 cmd0 = proc.command[0].lower()

2625 if not cmd0.endswith('.exe'):

2626 # TODO(maruel): That's not strictly true either.

2627 cmd0 += '.exe'

2628 if cmd0.endswith(proc.executable) and os.path.isfile(cmd0):

2629 # Fix the path.

2630 cmd0 = cmd0.replace('/', os.path.sep)

2631 cmd0 = os.path.normpath(cmd0)

2632 proc.executable = file_path.get_native_path_case(cmd0)

2633 logging.info(

2634 'New child: %s -> %d %s' % (ppid, pid, proc.executable))

2635

2636 def handle_Thread_End(self, line):

2637 """Has the same parameters as Thread_Start."""

2638 tid = int(line[self.TID], 16)

2639 self._threads_active.pop(tid, None)

2640

2641 def handle_Thread_Start(self, line):

2642 """Handles a new thread created.

2643

2644 Do not use self.PID here since a process' initial thread is created by

2645 the parent process.

2646 """

2647 PROCESS_ID = self.USER_DATA

2648 TTHREAD_ID = self.USER_DATA + 1

2649 #STACK_BASE = self.USER_DATA + 2

2650 #STACK_LIMIT = self.USER_DATA + 3

2651 #USER_STACK_BASE = self.USER_DATA + 4

2652 #USER_STACK_LIMIT = self.USER_DATA + 5

2653 #AFFINITY = self.USER_DATA + 6

2654 #WIN32_START_ADDR = self.USER_DATA + 7

2655 #TEB_BASE = self.USER_DATA + 8

2656 #SUB_PROCESS_TAG = self.USER_DATA + 9

2657 #BASE_PRIORITY = self.USER_DATA + 10

2658 #PAGE_PRIORITY = self.USER_DATA + 11

2659 #IO_PRIORITY = self.USER_DATA + 12

2660 #THREAD_FLAGS = self.USER_DATA + 13

2661 # Do not use self.PID here since a process' initial thread is created by

2662 # the parent process.

2663 pid = int(line[PROCESS_ID], 16)

2664 tid = int(line[TTHREAD_ID], 16)

2665 logging.debug('New thread pid:%d, tid:%d' % (pid, tid))

2666 self._threads_active[tid] = pid

2667

2668 @classmethod

2669 def supported_events(cls):

2670 """Returns all the procesed events."""

2671 out = []

2672 for member in dir(cls):

2673 match = re.match(r'^handle_([A-Za-z]+)_([A-Za-z]+)$', member)

2674 if match:

2675 out.append(match.groups())

2676 return out

2677

2678 class Tracer(ApiBase.Tracer):

2679 # The basic headers.

2680 EXPECTED_HEADER = [

2681 u'Event Name',

2682 u'Type',

2683 u'Event ID',

2684 u'Version',

2685 u'Channel',

2686 u'Level', # 5

2687 u'Opcode',

2688 u'Task',

2689 u'Keyword',

2690 u'PID',

2691 u'TID', # 10

2692 u'Processor Number',

2693 u'Instance ID',

2694 u'Parent Instance ID',

2695 u'Activity ID',

2696 u'Related Activity ID', # 15

2697 u'Clock-Time',

2698 u'Kernel(ms)', # Both have a resolution of ~15ms which makes them

2699 u'User(ms)', # pretty much useless.

2700 u'User Data', # Extra arguments that are event-specific.

2701 ]

2702 # Only the useful headers common to all entries are listed there. Any column

2703 # at 19 or higher is dependent on the specific event.

2704 EVENT_NAME = 0

2705 TYPE = 1

2706 PID = 9

2707 TID = 10

2708 PROCESSOR_ID = 11

2709 TIMESTAMP = 16

2710 NULL_GUID = '{00000000-0000-0000-0000-000000000000}'

2711 USER_DATA = 19

2712

2713 class CsvReader(object):

2714 """CSV reader that reads files generated by tracerpt.exe.

2715

2716 csv.reader() fails to read them properly, it mangles file names quoted

2717 with "" with a comma in it.

2718 """

2719 # 0. Had a ',' or one of the following ' ' after a comma, next should

2720 # be ' ', '"' or string or ',' for an empty field.

2721 ( HAD_DELIMITER,

2722 # 1. Processing an unquoted field up to ','.

2723 IN_STR,

2724 # 2. Processing a new field starting with '"'.

2725 STARTING_STR_QUOTED,

2726 # 3. Second quote in a row at the start of a field. It could be either

2727 # '""foo""' or '""'. Who the hell thought it was a great idea to use

2728 # the same character for delimiting and escaping?

2729 STARTING_SECOND_QUOTE,

2730 # 4. A quote inside a quoted string where the previous character was

2731 # not a quote, so the string is not empty. Can be either: end of a

2732 # quoted string (a delimiter) or a quote escape. The next char must be

2733 # either '"' or ','.

2734 HAD_QUOTE_IN_QUOTED,

2735 # 5. Second quote inside a quoted string.

2736 HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED,

2737 # 6. Processing a field that started with '"'.

2738 IN_STR_QUOTED) = range(7)

2739

2740 def __init__(self, f):

2741 self.f = f

2742

2743 def __iter__(self):

2744 return self

2745

2746 def next(self):

2747 """Splits the line in fields."""

2748 line = self.f.readline()

2749 if not line:

2750 raise StopIteration()

2751 line = line.strip()

2752 fields = []

2753 state = self.HAD_DELIMITER

2754 for i, c in enumerate(line):

2755 if state == self.HAD_DELIMITER:

2756 if c == ',':

2757 # Empty field.

2758 fields.append('')

2759 elif c == ' ':

2760 # Ignore initial whitespaces

2761 pass

2762 elif c == '"':

2763 state = self.STARTING_STR_QUOTED

2764 fields.append('')

2765 else:

2766 # Start of a new field.

2767 state = self.IN_STR

2768 fields.append(c)

2769

2770 elif state == self.IN_STR:

2771 # Do not accept quote inside unquoted field.

2772 assert c != '"', (i, c, line, fields)

2773 if c == ',':

2774 fields[-1] = fields[-1].strip()

2775 state = self.HAD_DELIMITER

2776 else:

2777 fields[-1] = fields[-1] + c

2778

2779 elif state == self.STARTING_STR_QUOTED:

2780 if c == '"':

2781 # Do not store the character yet.

2782 state = self.STARTING_SECOND_QUOTE

2783 else:

2784 state = self.IN_STR_QUOTED

2785 fields[-1] = fields[-1] + c

2786

2787 elif state == self.STARTING_SECOND_QUOTE:

2788 if c == ',':

2789 # It was an empty field. '""' == ''.

2790 state = self.HAD_DELIMITER

2791 else:

2792 fields[-1] = fields[-1] + '"' + c

2793 state = self.IN_STR_QUOTED

2794

2795 elif state == self.HAD_QUOTE_IN_QUOTED:

2796 if c == ',':

2797 # End of the string.

2798 state = self.HAD_DELIMITER

2799 elif c == '"':

2800 state = self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED

2801 else:

2802 # The previous double-quote was just an unescaped quote.

2803 fields[-1] = fields[-1] + '"' + c

2804 state = self.IN_STR_QUOTED

2805

2806 elif state == self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED:

2807 if c == ',':

2808 # End of the string.

2809 state = self.HAD_DELIMITER

2810 fields[-1] = fields[-1] + '"'

2811 else:

2812 # That's just how the logger rolls. Revert back to appending the

2813 # char and "guess" it was a quote in a double-quoted string.

2814 state = self.IN_STR_QUOTED

2815 fields[-1] = fields[-1] + '"' + c

2816

2817 elif state == self.IN_STR_QUOTED:

2818 if c == '"':

2819 # Could be a delimiter or an escape.

2820 state = self.HAD_QUOTE_IN_QUOTED

2821 else:

2822 fields[-1] = fields[-1] + c

2823

2824 if state == self.HAD_SECOND_QUOTE_IN_A_ROW_IN_QUOTED:

2825 fields[-1] = fields[-1] + '"'

2826 else:

2827 assert state in (

2828 # Terminated with a normal field.

2829 self.IN_STR,

2830 # Terminated with an empty field.

2831 self.STARTING_SECOND_QUOTE,

2832 # Terminated with a normal quoted field.

2833 self.HAD_QUOTE_IN_QUOTED), (

2834 line, state, fields)

2835 return fields

2836

2837 def __init__(self, logname):

2838 """Starts the log collection.

2839

2840 Requires administrative access. logman.exe is synchronous so no need for a

2841 "warmup" call. 'Windows Kernel Trace' is localized so use its GUID

2842 instead. The GUID constant name is SystemTraceControlGuid. Lovely.

2843

2844 One can get the list of potentially interesting providers with:

2845 "logman query providers \| findstr /i file"

2846 """

2847 super(LogmanTrace.Tracer, self).__init__(logname)

2848 self._signal_script = create_subprocess_thunk()

2849 self._scripts_to_cleanup.append(self._signal_script)

2850 cmd_start = [

2851 'logman.exe',

2852 'start',

2853 'NT Kernel Logger',

2854 '-p', '{9e814aad-3204-11d2-9a82-006008a86939}',

2855 # splitio,fileiocompletion,syscall,file,cswitch,img

2856 '(process,fileio,thread)',

2857 '-o', self._logname + '.etl',

2858 '-ets', # Send directly to kernel

2859 # Values extracted out of thin air.

2860 # Event Trace Session buffer size in kb.

2861 '-bs', '10240',

2862 # Number of Event Trace Session buffers.

2863 '-nb', '16', '256',

2864 ]

2865 logging.debug('Running: %s' % cmd_start)

2866 try:

2867 subprocess.check_call(

2868 cmd_start,

2869 stdin=subprocess.PIPE,

2870 stdout=subprocess.PIPE,

2871 stderr=subprocess.STDOUT)

2872 except subprocess.CalledProcessError, e:

2873 if e.returncode == -2147024891:

2874 print >> sys.stderr, 'Please restart with an elevated admin prompt'

2875 elif e.returncode == -2144337737:

2876 print >> sys.stderr, (

2877 'A kernel trace was already running, stop it and try again')

2878 raise

2879

2880 def trace(self, cmd, cwd, tracename, output):

2881 logging.info('trace(%s, %s, %s, %s)' % (cmd, cwd, tracename, output))

2882 assert os.path.isabs(cmd[0]), cmd[0]

2883 assert os.path.isabs(cwd), cwd

2884 assert os.path.normpath(cwd) == cwd, cwd

2885 with self._lock:

2886 if not self._initialized:

2887 raise TracingFailure(

2888 'Called Tracer.trace() on an unitialized object',

2889 None, None, None, tracename)

2890 assert tracename not in (i['trace'] for i in self._traces)

2891

2892 # Use "logman -?" for help.

2893

2894 stdout = stderr = None

2895 if output:

2896 stdout = subprocess.PIPE

2897 stderr = subprocess.STDOUT

2898

2899 # Run the child process.

2900 logging.debug('Running: %s' % cmd)

2901 # Use the temporary script generated with create_subprocess_thunk() so we

2902 # have a clear pid owner. Since trace_inputs.py can be used as a library

2903 # and could trace multiple processes simultaneously, it makes it more

2904 # complex if the executable to be traced is executed directly here. It

2905 # also solves issues related to logman.exe that needs to be executed to

2906 # control the kernel trace.

2907 child_cmd = [

2908 sys.executable,

2909 self._signal_script,

2910 tracename,

2911 ]

2912 child = subprocess.Popen(

2913 child_cmd + tools.fix_python_path(cmd),

2914 cwd=cwd,

2915 stdin=subprocess.PIPE,

2916 stdout=stdout,

2917 stderr=stderr)

2918 logging.debug('Started child pid: %d' % child.pid)

2919 out = child.communicate()[0]

2920 # This doesn't mean all the grand-children are done. Sadly, we don't have

2921 # a good way to determine that.

2922

2923 with self._lock:

2924 assert tracename not in (i['trace'] for i in self._traces)

2925 self._traces.append({

2926 'cmd': cmd,

2927 'cwd': cwd,

2928 'output': out,

2929 'pid': child.pid,

2930 # Used to figure out the real process when process ids are reused.

2931 'thunk_cmd': child_cmd,

2932 'trace': tracename,

2933 })

2934

2935 return child.returncode, out

2936

2937 def close(self, _timeout=None):

2938 """Stops the kernel log collection and converts the traces to text

2939 representation.

2940 """

2941 with self._lock:

2942 try:

2943 super(LogmanTrace.Tracer, self).close()

2944 finally:

2945 cmd_stop = [

2946 'logman.exe',

2947 'stop',

2948 'NT Kernel Logger',

2949 '-ets', # Sends the command directly to the kernel.

2950 ]

2951 logging.debug('Running: %s' % cmd_stop)

2952 subprocess.check_call(

2953 cmd_stop,

2954 stdin=subprocess.PIPE,

2955 stdout=subprocess.PIPE,

2956 stderr=subprocess.STDOUT)

2957

2958 def post_process_log(self):

2959 """Converts the .etl file into .csv then into .json."""

2960 super(LogmanTrace.Tracer, self).post_process_log()

2961 logformat = 'csv'

2962 self._convert_log(logformat)

2963 self._trim_log(logformat)

2964

2965 def _gen_logdata(self):

2966 return {

2967 'format': 'csv',

2968 'traces': self._traces,

2969 }

2970

2971 def _trim_log(self, logformat):

2972 """Reduces the amount of data in original log by generating a 'reduced'

2973 log.

2974 """

2975 if logformat == 'csv_utf16':

2976 file_handle = codecs.open(

2977 self._logname + '.' + logformat, 'r', encoding='utf-16')

2978

2979 elif logformat == 'csv':

2980 assert sys.getfilesystemencoding() == 'mbcs'

2981 file_handle = codecs.open(

2982 self._logname + '.' + logformat, 'r',

2983 encoding=get_current_encoding())

2984

2985 supported_events = LogmanTrace.Context.supported_events()

2986

2987 def trim(generator):

2988 """Loads items from the generator and returns the interesting data.

2989

2990 It filters out any uninteresting line and reduce the amount of data in

2991 the trace.

2992 """

2993 for index, line in enumerate(generator):

2994 if not index:

2995 if line != self.EXPECTED_HEADER:

2996 raise TracingFailure(

2997 'Found malformed header: %s' % line,

2998 None, None, None)

2999 continue

3000 # As you can see, the CSV is full of useful non-redundant information:

3001 if (line[2] != '0' or # Event ID

3002 line[3] not in ('2', '3') or # Version

3003 line[4] != '0' or # Channel

3004 line[5] != '0' or # Level

3005 line[7] != '0' or # Task

3006 line[8] != '0x0000000000000000' or # Keyword

3007 line[12] != '' or # Instance ID

3008 line[13] != '' or # Parent Instance ID

3009 line[14] != self.NULL_GUID or # Activity ID

3010 line[15] != ''): # Related Activity ID

3011 raise TracingFailure(

3012 'Found unexpected values in line: %s' % ' '.join(line),

3013 None, None, None)

3014

3015 if (line[self.EVENT_NAME], line[self.TYPE]) not in supported_events:

3016 continue

3017

3018 yield [

3019 line[self.EVENT_NAME],

3020 line[self.TYPE],

3021 line[self.PID],

3022 line[self.TID],

3023 line[self.PROCESSOR_ID],

3024 line[self.TIMESTAMP],

3025 ] + line[self.USER_DATA:]

3026

3027 # must not convert the trim() call into a list, since it will use too much

3028 # memory for large trace. use a csv file as a workaround since the json

3029 # parser requires a complete in-memory file.

3030 with open('%s.preprocessed' % self._logname, 'wb') as f:

3031 # $ and * can't be used in file name on windows, reducing the likelihood

3032 # of having to escape a string.

3033 out = csv.writer(

3034 f, delimiter='$', quotechar='*', quoting=csv.QUOTE_MINIMAL)

3035 for line in trim(self.CsvReader(file_handle)):

3036 out.writerow([s.encode('utf-8') for s in line])

3037

3038 def _convert_log(self, logformat):

3039 """Converts the ETL trace to text representation.

3040

3041 Normally, 'csv' is sufficient. If complex scripts are used (like eastern

3042 languages), use 'csv_utf16'. If localization gets in the way, use 'xml'.

3043

3044 Arguments:

3045 - logformat: Text format to be generated, csv, csv_utf16 or xml.

3046

3047 Use "tracerpt -?" for help.

3048 """

3049 LOCALE_INVARIANT = 0x7F

3050 windll.kernel32.SetThreadLocale(LOCALE_INVARIANT)

3051 cmd_convert = [

3052 'tracerpt.exe',

3053 '-l', self._logname + '.etl',

3054 '-o', self._logname + '.' + logformat,

3055 '-gmt', # Use UTC

3056 '-y', # No prompt

3057 # Use -of XML to get the header of each items after column 19, e.g. all

3058 # the actual headers of 'User Data'.

3059 ]

3060

3061 if logformat == 'csv':

3062 # tracerpt localizes the 'Type' column, for major brainfuck

3063 # entertainment. I can't imagine any sane reason to do that.

3064 cmd_convert.extend(['-of', 'CSV'])

3065 elif logformat == 'csv_utf16':

3066 # This causes it to use UTF-16, which doubles the log size but ensures

3067 # the log is readable for non-ASCII characters.

3068 cmd_convert.extend(['-of', 'CSV', '-en', 'Unicode'])

3069 elif logformat == 'xml':

3070 cmd_convert.extend(['-of', 'XML'])

3071 else:

3072 raise ValueError('Unexpected log format \'%s\'' % logformat)

3073 logging.debug('Running: %s' % cmd_convert)

3074 # This can takes tens of minutes for large logs.

3075 # Redirects all output to stderr.

3076 subprocess.check_call(

3077 cmd_convert,

3078 stdin=subprocess.PIPE,

3079 stdout=sys.stderr,

3080 stderr=sys.stderr)

3081

3082 def __init__(self, use_sudo=False): # pylint: disable=W0613

3083 super(LogmanTrace, self).__init__()

3084 # Ignore use_sudo. It's irrelevant on Windows but kept to simplify the API.

3085

3086 @staticmethod

3087 def clean_trace(logname):

3088 for ext in ('', '.csv', '.etl', '.json', '.xml', '.preprocessed'):

3089 if os.path.isfile(logname + ext):

3090 os.remove(logname + ext)

3091

3092 @classmethod

3093 def parse_log(cls, logname, blacklist, trace_name):

3094 logging.info('parse_log(%s, ..., %s)', logname, trace_name)

3095 assert os.path.isabs(logname)

3096

3097 def blacklist_more(filepath):

3098 # All the NTFS metadata is in the form x:\$EXTEND or stuff like that.

3099 return blacklist(filepath) or re.match(r'[A-Z]\:\\\$EXTEND', filepath)

3100

3101 # Create a list of (Context, result_dict) tuples. This is necessary because

3102 # the csv file may be larger than the amount of available memory.

3103 contexes = [

3104 (

3105 cls.Context(

3106 blacklist_more, item['pid'], item['trace'], item['thunk_cmd']),

3107 {

3108 'output': item['output'],

3109 'trace': item['trace'],

3110 },

3111 )

3112 for item in read_json(logname)['traces']

3113 if not trace_name or item['trace'] == trace_name

3114 ]

3115

3116 # The log may be too large to fit in memory and it is not efficient to read

3117 # it multiple times, so multiplex the contexes instead, which is slightly

3118 # more awkward.

3119 with open('%s.preprocessed' % logname, 'rb') as f:

3120 lines = csv.reader(

3121 f, delimiter='$', quotechar='*', quoting=csv.QUOTE_MINIMAL)

3122 for encoded in lines:

3123 line = [s.decode('utf-8') for s in encoded]

3124 # Convert the PID in-place from hex.

3125 line[cls.Context.PID] = int(line[cls.Context.PID], 16)

3126 for context in contexes:

3127 if 'exception' in context[1]:

3128 continue

3129 try:

3130 context[0].on_line(line)

3131 except TracingFailure:

3132 context[1]['exception'] = sys.exc_info()

3133

3134 for context in contexes:

3135 if 'exception' in context[1]:

3136 continue

3137 context[1]['results'] = context[0].to_results()

3138

3139 return [context[1] for context in contexes]

3140

3141

3142 def get_api(**kwargs):

3143 """Returns the correct implementation for the current OS."""

3144 if sys.platform == 'cygwin':

3145 raise NotImplementedError(

3146 'Not implemented for cygwin, start the script from Win32 python')

3147 flavors = {

3148 'win32': LogmanTrace,

3149 'darwin': Dtrace,

3150 'sunos5': Dtrace,

3151 'freebsd7': Dtrace,

3152 'freebsd8': Dtrace,

3153 }

3154 # Defaults to strace.

3155 return flavors.get(sys.platform, Strace)(**kwargs)

3156

3157

3158 def extract_directories(root_dir, files, blacklist):

3159 """Detects if all the files in a directory are in \|files\| and if so, replace

3160 the individual files by a Results.Directory instance.

3161

3162 Takes a list of Results.File instances and returns a shorter list of

3163 Results.File and Results.Directory instances.

3164

3165 Arguments:

3166 - root_dir: Optional base directory that shouldn't be search further.

3167 - files: list of Results.File instances.

3168 - blacklist: lambda to reject unneeded files, for example r'.+\.pyc'.

3169 """

3170 logging.info(

3171 'extract_directories(%s, %d files, ...)' % (root_dir, len(files)))

3172 assert not (root_dir or '').endswith(os.path.sep), root_dir

3173 # It is important for root_dir to not be a symlinked path, make sure to call

3174 # os.path.realpath() as needed.

3175 assert not root_dir or (

3176 os.path.realpath(file_path.get_native_path_case(root_dir)) == root_dir)

3177 assert not any(isinstance(f, Results.Directory) for f in files)

3178 # Remove non existent files.

3179 files = [f for f in files if f.existent]

3180 if not files:

3181 return files

3182 # All files must share the same root, which can be None.

3183 assert len(set(f.root for f in files)) == 1, set(f.root for f in files)

3184

3185 # Creates a {directory: {filename: File}} mapping, up to root.

3186 buckets = {}

3187 if root_dir:

3188 buckets[root_dir] = {}

3189 for fileobj in files:

3190 path = fileobj.full_path

3191 directory = os.path.dirname(path)

3192 assert directory

3193 # Do not use os.path.basename() so trailing os.path.sep is kept.

3194 basename = path[len(directory)+1:]

3195 files_in_directory = buckets.setdefault(directory, {})

3196 files_in_directory[basename] = fileobj

3197 # Add all the directories recursively up to root.

3198 while True:

3199 old_d = directory

3200 directory = os.path.dirname(directory)

3201 if directory + os.path.sep == root_dir or directory == old_d:

3202 break

3203 buckets.setdefault(directory, {})

3204

3205 root_prefix = len(root_dir) + 1 if root_dir else 0

3206 for directory in sorted(buckets, reverse=True):

3207 if not os.path.isdir(directory):

3208 logging.debug(

3209 '%s was a directory but doesn\'t exist anymore; ignoring', directory)

3210 continue

3211 actual = set(f for f in os.listdir(directory) if not blacklist(f))

3212 expected = set(buckets[directory])

3213 if not (actual - expected):

3214 parent = os.path.dirname(directory)

3215 buckets[parent][os.path.basename(directory)] = Results.Directory(

3216 root_dir,

3217 directory[root_prefix:],

3218 False,

3219 sum(f.size for f in buckets[directory].itervalues()),

3220 sum(f.nb_files for f in buckets[directory].itervalues()))

3221 # Remove the whole bucket.

3222 del buckets[directory]

3223

3224 # Reverse the mapping with what remains. The original instances are returned,

3225 # so the cached meta data is kept.

3226 files = sum((x.values() for x in buckets.itervalues()), [])

3227 return sorted(files, key=lambda x: x.path)

3228

3229

3230 def trace(logfile, cmd, cwd, api, output):

3231 """Traces an executable. Returns (returncode, output) from api.

3232

3233 Arguments:

3234 - logfile: file to write to.

3235 - cmd: command to run.

3236 - cwd: current directory to start the process in.

3237 - api: a tracing api instance.

3238 - output: if True, returns output, otherwise prints it at the console.

3239 """

3240 cmd = tools.fix_python_path(cmd)

3241 api.clean_trace(logfile)

3242 with api.get_tracer(logfile) as tracer:

3243 return tracer.trace(cmd, cwd, 'default', output)

3244

3245

3246 def CMDclean(parser, args):

3247 """Cleans up traces."""

3248 options, args = parser.parse_args(args)

3249 api = get_api()

3250 api.clean_trace(options.log)

3251 return 0

3252

3253

3254 def CMDtrace(parser, args):

3255 """Traces an executable."""

3256 parser.allow_interspersed_args = False

3257 parser.add_option(

3258 '-q', '--quiet', action='store_true',

3259 help='Redirects traced executable output to /dev/null')

3260 parser.add_option(

3261 '-s', '--sudo', action='store_true',

3262 help='Use sudo when shelling out the tracer tool (ignored on Windows)')

3263 parser.add_option(

3264 '-n', '--no-sudo', action='store_false',

3265 help='Don\'t use sudo')

3266 options, args = parser.parse_args(args)

3267

3268 if not args:

3269 parser.error('Please provide a command to run')

3270

3271 if not os.path.isabs(args[0]) and os.access(args[0], os.X_OK):

3272 args[0] = os.path.abspath(args[0])

3273

3274 # options.sudo default value is None, which is to do whatever tracer defaults

3275 # do.

3276 api = get_api(use_sudo=options.sudo)

3277 return trace(options.log, args, os.getcwd(), api, options.quiet)[0]

3278

3279

3280 def CMDread(parser, args):

3281 """Reads the logs and prints the result."""

3282 parser.add_option(

3283 '-V', '--variable',

3284 nargs=2,

3285 action='append',

3286 dest='variables',

3287 metavar='VAR_NAME directory',

3288 default=[],

3289 help=('Variables to replace relative directories against. Example: '

3290 '"-v \'$HOME\' \'/home/%s\'" will replace all occurence of your '

3291 'home dir with $HOME') % getpass.getuser())

3292 parser.add_option(

3293 '--root-dir',

3294 help='Root directory to base everything off it. Anything outside of this '

3295 'this directory will not be reported')

3296 parser.add_option(

3297 '--trace-name',

3298 help='Only reads one of the trace. Defaults to reading all traces')

3299 parser.add_option(

3300 '-j', '--json', action='store_true',

3301 help='Outputs raw result data as json')

3302 parser.add_option(

3303 '--trace-blacklist', action='append', default=[],

3304 help='List of regexp to use as blacklist filter')

3305 options, args = parser.parse_args(args)

3306

3307 if options.root_dir:

3308 options.root_dir = file_path.get_native_path_case(

3309 unicode(os.path.abspath(options.root_dir)))

3310

3311 variables = dict(options.variables)

3312 api = get_api()

3313 blacklist = gen_blacklist(options.trace_blacklist)

3314 data = api.parse_log(options.log, blacklist, options.trace_name)

3315 # Process each trace.

3316 output_as_json = []

3317 try:

3318 for item in data:

3319 if 'exception' in item:

3320 # Do not abort the other traces.

3321 print >> sys.stderr, (

3322 'Trace %s: Got an exception: %s' % (

3323 item['trace'], item['exception'][1]))

3324 continue

3325 results = item['results']

3326 if options.root_dir:

3327 results = results.strip_root(options.root_dir)

3328

3329 if options.json:

3330 output_as_json.append(results.flatten())

3331 else:

3332 simplified = extract_directories(

3333 options.root_dir, results.files, blacklist)

3334 simplified = [f.replace_variables(variables) for f in simplified]

3335 if len(data) > 1:

3336 print('Trace: %s' % item['trace'])

3337 print('Total: %d' % len(results.files))

3338 print('Non existent: %d' % len(results.non_existent))

3339 for f in results.non_existent:

3340 print(' %s' % f.path)

3341 print(

3342 'Interesting: %d reduced to %d' % (

3343 len(results.existent), len(simplified)))

3344 for f in simplified:

3345 print(' %s' % f.path)

3346

3347 if options.json:

3348 write_json(sys.stdout, output_as_json, False)

3349 except KeyboardInterrupt:

3350 return 1

3351 except IOError as e:

3352 if e.errno == errno.EPIPE:

3353 # Do not print a stack trace when the output is piped to less and the user

3354 # quits before the whole output was written.

3355 return 1

3356 raise

3357 return 0

3358

3359

3360 class OptionParserTraceInputs(tools.OptionParserWithLogging):

3361 """Adds automatic --log handling."""

3362

3363 # Disable --log-file options since both --log and --log-file options are

3364 # confusing.

3365 # TODO(vadimsh): Rename --log-file or --log to something else.

3366 enable_log_file = False

3367

3368 def __init__(self, **kwargs):

3369 tools.OptionParserWithLogging.__init__(self, **kwargs)

3370 self.add_option(

3371 '-l', '--log', help='Log file to generate or read, required')

3372

3373 def parse_args(self, args, *kwargs):

3374 """Makes sure the paths make sense.

3375

3376 On Windows, / and \ are often mixed together in a path.

3377 """

3378 options, args = tools.OptionParserWithLogging.parse_args(

3379 self, args, *kwargs)

3380 if not options.log:

3381 self.error('Must supply a log file with -l')

3382 options.log = os.path.abspath(options.log)

3383 return options, args

3384

3385

3386 def main(argv):

3387 dispatcher = subcommand.CommandDispatcher(__name__)

3388 try:

3389 return dispatcher.execute(

3390 OptionParserTraceInputs(version=__version__), argv)

3391 except TracingFailure, e:

3392 sys.stderr.write('\nError: ')

3393 sys.stderr.write(str(e))

3394 sys.stderr.write('\n')

3395 return 1

3396

3397

3398 if __name__ == '__main__':

3399 fix_encoding.fix_encoding()

3400 tools.disable_buffering()

3401 colorama.init()

3402 sys.exit(main(sys.argv[1:]))

OLD	NEW

« no previous file with comments | « swarm_client/tools/zip_run_isolated.py ('k') | swarm_client/utils/file_path.py » ('j') | no next file with comments »