| OLD | NEW |
| 1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
| 2 # coding=utf-8 | 2 # coding=utf-8 |
| 3 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 3 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 4 # Use of this source code is governed by a BSD-style license that can be | 4 # Use of this source code is governed by a BSD-style license that can be |
| 5 # found in the LICENSE file. | 5 # found in the LICENSE file. |
| 6 | 6 |
| 7 """Traces an executable and its child processes and extract the files accessed | 7 """Traces an executable and its child processes and extract the files accessed |
| 8 by them. | 8 by them. |
| 9 | 9 |
| 10 The implementation uses OS-specific API. The native Kernel logger and the ETL | 10 The implementation uses OS-specific API. The native Kernel logger and the ETL |
| (...skipping 15 matching lines...) Expand all Loading... |
| 26 import json | 26 import json |
| 27 import logging | 27 import logging |
| 28 import os | 28 import os |
| 29 import re | 29 import re |
| 30 import stat | 30 import stat |
| 31 import subprocess | 31 import subprocess |
| 32 import sys | 32 import sys |
| 33 import tempfile | 33 import tempfile |
| 34 import threading | 34 import threading |
| 35 import time | 35 import time |
| 36 import unicodedata | |
| 37 import weakref | 36 import weakref |
| 38 | 37 |
| 39 from third_party import colorama | 38 from third_party import colorama |
| 40 from third_party.depot_tools import fix_encoding | 39 from third_party.depot_tools import fix_encoding |
| 41 from third_party.depot_tools import subcommand | 40 from third_party.depot_tools import subcommand |
| 42 | 41 |
| 42 from utils import file_path |
| 43 from utils import tools | 43 from utils import tools |
| 44 | 44 |
| 45 ## OS-specific imports | 45 ## OS-specific imports |
| 46 | 46 |
| 47 if sys.platform == 'win32': | 47 if sys.platform == 'win32': |
| 48 from ctypes.wintypes import byref, create_unicode_buffer, c_int, c_wchar_p | 48 from ctypes.wintypes import byref, c_int, c_wchar_p |
| 49 from ctypes.wintypes import windll, FormatError # pylint: disable=E0611 | 49 from ctypes.wintypes import windll # pylint: disable=E0611 |
| 50 from ctypes.wintypes import GetLastError # pylint: disable=E0611 | |
| 51 elif sys.platform == 'darwin': | |
| 52 import Carbon.File # pylint: disable=F0401 | |
| 53 import MacOS # pylint: disable=F0401 | |
| 54 | 50 |
| 55 | 51 |
| 56 __version__ = '0.1' | 52 __version__ = '0.1' |
| 57 | 53 |
| 58 | 54 |
| 59 BASE_DIR = os.path.dirname(os.path.abspath(__file__)) | 55 BASE_DIR = os.path.dirname(os.path.abspath(__file__)) |
| 60 ROOT_DIR = os.path.dirname(os.path.dirname(BASE_DIR)) | 56 ROOT_DIR = os.path.dirname(os.path.dirname(BASE_DIR)) |
| 61 | 57 |
| 62 | 58 |
| 63 class TracingFailure(Exception): | 59 class TracingFailure(Exception): |
| (...skipping 16 matching lines...) Expand all Loading... |
| 80 if self.line: | 76 if self.line: |
| 81 out += '\n%s' % self.line | 77 out += '\n%s' % self.line |
| 82 if self.extra: | 78 if self.extra: |
| 83 out += '\n' + ', '.join(map(str, filter(None, self.extra))) | 79 out += '\n' + ', '.join(map(str, filter(None, self.extra))) |
| 84 return out | 80 return out |
| 85 | 81 |
| 86 | 82 |
| 87 ## OS-specific functions | 83 ## OS-specific functions |
| 88 | 84 |
| 89 if sys.platform == 'win32': | 85 if sys.platform == 'win32': |
| 90 def QueryDosDevice(drive_letter): | |
| 91 """Returns the Windows 'native' path for a DOS drive letter.""" | |
| 92 assert re.match(r'^[a-zA-Z]:$', drive_letter), drive_letter | |
| 93 assert isinstance(drive_letter, unicode) | |
| 94 # Guesswork. QueryDosDeviceW never returns the required number of bytes. | |
| 95 chars = 1024 | |
| 96 drive_letter = drive_letter | |
| 97 p = create_unicode_buffer(chars) | |
| 98 if 0 == windll.kernel32.QueryDosDeviceW(drive_letter, p, chars): | |
| 99 err = GetLastError() | |
| 100 if err: | |
| 101 # pylint: disable=E0602 | |
| 102 msg = u'QueryDosDevice(%s): %s (%d)' % ( | |
| 103 drive_letter, FormatError(err), err) | |
| 104 raise WindowsError(err, msg.encode('utf-8')) | |
| 105 return p.value | |
| 106 | |
| 107 | |
| 108 def GetShortPathName(long_path): | |
| 109 """Returns the Windows short path equivalent for a 'long' path.""" | |
| 110 assert isinstance(long_path, unicode), repr(long_path) | |
| 111 # Adds '\\\\?\\' when given an absolute path so the MAX_PATH (260) limit is | |
| 112 # not enforced. | |
| 113 if os.path.isabs(long_path) and not long_path.startswith('\\\\?\\'): | |
| 114 long_path = '\\\\?\\' + long_path | |
| 115 chars = windll.kernel32.GetShortPathNameW(long_path, None, 0) | |
| 116 if chars: | |
| 117 p = create_unicode_buffer(chars) | |
| 118 if windll.kernel32.GetShortPathNameW(long_path, p, chars): | |
| 119 return p.value | |
| 120 | |
| 121 err = GetLastError() | |
| 122 if err: | |
| 123 # pylint: disable=E0602 | |
| 124 msg = u'GetShortPathName(%s): %s (%d)' % ( | |
| 125 long_path, FormatError(err), err) | |
| 126 raise WindowsError(err, msg.encode('utf-8')) | |
| 127 | |
| 128 | |
| 129 def GetLongPathName(short_path): | |
| 130 """Returns the Windows long path equivalent for a 'short' path.""" | |
| 131 assert isinstance(short_path, unicode) | |
| 132 # Adds '\\\\?\\' when given an absolute path so the MAX_PATH (260) limit is | |
| 133 # not enforced. | |
| 134 if os.path.isabs(short_path) and not short_path.startswith('\\\\?\\'): | |
| 135 short_path = '\\\\?\\' + short_path | |
| 136 chars = windll.kernel32.GetLongPathNameW(short_path, None, 0) | |
| 137 if chars: | |
| 138 p = create_unicode_buffer(chars) | |
| 139 if windll.kernel32.GetLongPathNameW(short_path, p, chars): | |
| 140 return p.value | |
| 141 | |
| 142 err = GetLastError() | |
| 143 if err: | |
| 144 # pylint: disable=E0602 | |
| 145 msg = u'GetLongPathName(%s): %s (%d)' % ( | |
| 146 short_path, FormatError(err), err) | |
| 147 raise WindowsError(err, msg.encode('utf-8')) | |
| 148 | |
| 149 | |
| 150 def get_current_encoding(): | 86 def get_current_encoding(): |
| 151 """Returns the 'ANSI' code page associated to the process.""" | 87 """Returns the 'ANSI' code page associated to the process.""" |
| 152 return 'cp%d' % int(windll.kernel32.GetACP()) | 88 return 'cp%d' % int(windll.kernel32.GetACP()) |
| 153 | 89 |
| 154 | 90 |
| 155 class DosDriveMap(object): | |
| 156 """Maps \Device\HarddiskVolumeN to N: on Windows.""" | |
| 157 # Keep one global cache. | |
| 158 _MAPPING = {} | |
| 159 | |
| 160 def __init__(self): | |
| 161 """Lazy loads the cache.""" | |
| 162 if not self._MAPPING: | |
| 163 # This is related to UNC resolver on windows. Ignore that. | |
| 164 self._MAPPING[u'\\Device\\Mup'] = None | |
| 165 self._MAPPING[u'\\SystemRoot'] = os.environ[u'SystemRoot'] | |
| 166 | |
| 167 for letter in (chr(l) for l in xrange(ord('C'), ord('Z')+1)): | |
| 168 try: | |
| 169 letter = u'%s:' % letter | |
| 170 mapped = QueryDosDevice(letter) | |
| 171 if mapped in self._MAPPING: | |
| 172 logging.warn( | |
| 173 ('Two drives: \'%s\' and \'%s\', are mapped to the same disk' | |
| 174 '. Drive letters are a user-mode concept and the kernel ' | |
| 175 'traces only have NT path, so all accesses will be ' | |
| 176 'associated with the first drive letter, independent of the ' | |
| 177 'actual letter used by the code') % ( | |
| 178 self._MAPPING[mapped], letter)) | |
| 179 else: | |
| 180 self._MAPPING[mapped] = letter | |
| 181 except WindowsError: # pylint: disable=E0602 | |
| 182 pass | |
| 183 | |
| 184 def to_win32(self, path): | |
| 185 """Converts a native NT path to Win32/DOS compatible path.""" | |
| 186 match = re.match(r'(^\\Device\\[a-zA-Z0-9]+)(\\.*)?$', path) | |
| 187 if not match: | |
| 188 raise ValueError( | |
| 189 'Can\'t convert %s into a Win32 compatible path' % path, | |
| 190 path) | |
| 191 if not match.group(1) in self._MAPPING: | |
| 192 # Unmapped partitions may be accessed by windows for the | |
| 193 # fun of it while the test is running. Discard these. | |
| 194 return None | |
| 195 drive = self._MAPPING[match.group(1)] | |
| 196 if not drive or not match.group(2): | |
| 197 return drive | |
| 198 return drive + match.group(2) | |
| 199 | |
| 200 | |
| 201 def isabs(path): | |
| 202 """Accepts X: as an absolute path, unlike python's os.path.isabs().""" | |
| 203 return os.path.isabs(path) or len(path) == 2 and path[1] == ':' | |
| 204 | |
| 205 | |
| 206 def find_item_native_case(root, item): | |
| 207 """Gets the native path case of a single item based at root_path.""" | |
| 208 if item == '..': | |
| 209 return item | |
| 210 | |
| 211 root = get_native_path_case(root) | |
| 212 return os.path.basename(get_native_path_case(os.path.join(root, item))) | |
| 213 | |
| 214 | |
| 215 def get_native_path_case(p): | |
| 216 """Returns the native path case for an existing file. | |
| 217 | |
| 218 On Windows, removes any leading '\\?\'. | |
| 219 """ | |
| 220 assert isinstance(p, unicode), repr(p) | |
| 221 if not isabs(p): | |
| 222 raise ValueError( | |
| 223 'get_native_path_case(%r): Require an absolute path' % p, p) | |
| 224 | |
| 225 # Make sure it is normalized to os.path.sep. Do not do it here to keep the | |
| 226 # function fast | |
| 227 assert '/' not in p, p | |
| 228 suffix = '' | |
| 229 count = p.count(':') | |
| 230 if count > 1: | |
| 231 # This means it has an alternate-data stream. There could be 3 ':', since | |
| 232 # it could be the $DATA datastream of an ADS. Split the whole ADS suffix | |
| 233 # off and add it back afterward. There is no way to know the native path | |
| 234 # case of an alternate data stream. | |
| 235 items = p.split(':') | |
| 236 p = ':'.join(items[0:2]) | |
| 237 suffix = ''.join(':' + i for i in items[2:]) | |
| 238 | |
| 239 # TODO(maruel): Use os.path.normpath? | |
| 240 if p.endswith('.\\'): | |
| 241 p = p[:-2] | |
| 242 | |
| 243 # Windows used to have an option to turn on case sensitivity on non Win32 | |
| 244 # subsystem but that's out of scope here and isn't supported anymore. | |
| 245 # Go figure why GetShortPathName() is needed. | |
| 246 try: | |
| 247 out = GetLongPathName(GetShortPathName(p)) | |
| 248 except OSError, e: | |
| 249 if e.args[0] in (2, 3, 5): | |
| 250 # The path does not exist. Try to recurse and reconstruct the path. | |
| 251 base = os.path.dirname(p) | |
| 252 rest = os.path.basename(p) | |
| 253 return os.path.join(get_native_path_case(base), rest) | |
| 254 raise | |
| 255 if out.startswith('\\\\?\\'): | |
| 256 out = out[4:] | |
| 257 # Always upper case the first letter since GetLongPathName() will return the | |
| 258 # drive letter in the case it was given. | |
| 259 return out[0].upper() + out[1:] + suffix | |
| 260 | |
| 261 | |
| 262 def CommandLineToArgvW(command_line): | 91 def CommandLineToArgvW(command_line): |
| 263 """Splits a commandline into argv using CommandLineToArgvW().""" | 92 """Splits a commandline into argv using CommandLineToArgvW().""" |
| 264 # http://msdn.microsoft.com/library/windows/desktop/bb776391.aspx | 93 # http://msdn.microsoft.com/library/windows/desktop/bb776391.aspx |
| 265 size = c_int() | 94 size = c_int() |
| 266 assert isinstance(command_line, unicode) | 95 assert isinstance(command_line, unicode) |
| 267 ptr = windll.shell32.CommandLineToArgvW(command_line, byref(size)) | 96 ptr = windll.shell32.CommandLineToArgvW(command_line, byref(size)) |
| 268 try: | 97 try: |
| 269 return [arg for arg in (c_wchar_p * size.value).from_address(ptr)] | 98 return [arg for arg in (c_wchar_p * size.value).from_address(ptr)] |
| 270 finally: | 99 finally: |
| 271 windll.kernel32.LocalFree(ptr) | 100 windll.kernel32.LocalFree(ptr) |
| 272 | 101 |
| 273 | 102 |
| 274 elif sys.platform == 'darwin': | |
| 275 | |
| 276 | |
| 277 # On non-windows, keep the stdlib behavior. | |
| 278 isabs = os.path.isabs | |
| 279 | |
| 280 | |
| 281 def _native_case(p): | |
| 282 """Gets the native path case. Warning: this function resolves symlinks.""" | |
| 283 try: | |
| 284 rel_ref, _ = Carbon.File.FSPathMakeRef(p.encode('utf-8')) | |
| 285 # The OSX underlying code uses NFD but python strings are in NFC. This | |
| 286 # will cause issues with os.listdir() for example. Since the dtrace log | |
| 287 # *is* in NFC, normalize it here. | |
| 288 out = unicodedata.normalize( | |
| 289 'NFC', rel_ref.FSRefMakePath().decode('utf-8')) | |
| 290 if p.endswith(os.path.sep) and not out.endswith(os.path.sep): | |
| 291 return out + os.path.sep | |
| 292 return out | |
| 293 except MacOS.Error, e: | |
| 294 if e.args[0] in (-43, -120): | |
| 295 # The path does not exist. Try to recurse and reconstruct the path. | |
| 296 # -43 means file not found. | |
| 297 # -120 means directory not found. | |
| 298 base = os.path.dirname(p) | |
| 299 rest = os.path.basename(p) | |
| 300 return os.path.join(_native_case(base), rest) | |
| 301 raise OSError( | |
| 302 e.args[0], 'Failed to get native path for %s' % p, p, e.args[1]) | |
| 303 | |
| 304 | |
| 305 def _split_at_symlink_native(base_path, rest): | |
| 306 """Returns the native path for a symlink.""" | |
| 307 base, symlink, rest = split_at_symlink(base_path, rest) | |
| 308 if symlink: | |
| 309 if not base_path: | |
| 310 base_path = base | |
| 311 else: | |
| 312 base_path = safe_join(base_path, base) | |
| 313 symlink = find_item_native_case(base_path, symlink) | |
| 314 return base, symlink, rest | |
| 315 | |
| 316 | |
| 317 def find_item_native_case(root_path, item): | |
| 318 """Gets the native path case of a single item based at root_path. | |
| 319 | |
| 320 There is no API to get the native path case of symlinks on OSX. So it | |
| 321 needs to be done the slow way. | |
| 322 """ | |
| 323 if item == '..': | |
| 324 return item | |
| 325 | |
| 326 item = item.lower() | |
| 327 for element in os.listdir(root_path): | |
| 328 if element.lower() == item: | |
| 329 return element | |
| 330 | |
| 331 | |
| 332 def get_native_path_case(path): | |
| 333 """Returns the native path case for an existing file. | |
| 334 | |
| 335 Technically, it's only HFS+ on OSX that is case preserving and | |
| 336 insensitive. It's the default setting on HFS+ but can be changed. | |
| 337 """ | |
| 338 assert isinstance(path, unicode), repr(path) | |
| 339 if not isabs(path): | |
| 340 raise ValueError( | |
| 341 'get_native_path_case(%r): Require an absolute path' % path, path) | |
| 342 if path.startswith('/dev'): | |
| 343 # /dev is not visible from Carbon, causing an exception. | |
| 344 return path | |
| 345 | |
| 346 # Starts assuming there is no symlink along the path. | |
| 347 resolved = _native_case(path) | |
| 348 if path.lower() in (resolved.lower(), resolved.lower() + './'): | |
| 349 # This code path is incredibly faster. | |
| 350 logging.debug('get_native_path_case(%s) = %s' % (path, resolved)) | |
| 351 return resolved | |
| 352 | |
| 353 # There was a symlink, process it. | |
| 354 base, symlink, rest = _split_at_symlink_native(None, path) | |
| 355 assert symlink, (path, base, symlink, rest, resolved) | |
| 356 prev = base | |
| 357 base = safe_join(_native_case(base), symlink) | |
| 358 assert len(base) > len(prev) | |
| 359 while rest: | |
| 360 prev = base | |
| 361 relbase, symlink, rest = _split_at_symlink_native(base, rest) | |
| 362 base = safe_join(base, relbase) | |
| 363 assert len(base) > len(prev), (prev, base, symlink) | |
| 364 if symlink: | |
| 365 base = safe_join(base, symlink) | |
| 366 assert len(base) > len(prev), (prev, base, symlink) | |
| 367 # Make sure no symlink was resolved. | |
| 368 assert base.lower() == path.lower(), (base, path) | |
| 369 logging.debug('get_native_path_case(%s) = %s' % (path, base)) | |
| 370 return base | |
| 371 | |
| 372 | |
| 373 else: # OSes other than Windows and OSX. | |
| 374 | |
| 375 | |
| 376 # On non-windows, keep the stdlib behavior. | |
| 377 isabs = os.path.isabs | |
| 378 | |
| 379 | |
| 380 def find_item_native_case(root, item): | |
| 381 """Gets the native path case of a single item based at root_path.""" | |
| 382 if item == '..': | |
| 383 return item | |
| 384 | |
| 385 root = get_native_path_case(root) | |
| 386 return os.path.basename(get_native_path_case(os.path.join(root, item))) | |
| 387 | |
| 388 | |
| 389 def get_native_path_case(path): | |
| 390 """Returns the native path case for an existing file. | |
| 391 | |
| 392 On OSes other than OSX and Windows, assume the file system is | |
| 393 case-sensitive. | |
| 394 | |
| 395 TODO(maruel): This is not strictly true. Implement if necessary. | |
| 396 """ | |
| 397 assert isinstance(path, unicode), repr(path) | |
| 398 if not isabs(path): | |
| 399 raise ValueError( | |
| 400 'get_native_path_case(%r): Require an absolute path' % path, path) | |
| 401 # Give up on cygwin, as GetLongPathName() can't be called. | |
| 402 # Linux traces tends to not be normalized so use this occasion to normalize | |
| 403 # it. This function implementation already normalizes the path on the other | |
| 404 # OS so this needs to be done here to be coherent between OSes. | |
| 405 out = os.path.normpath(path) | |
| 406 if path.endswith(os.path.sep) and not out.endswith(os.path.sep): | |
| 407 return out + os.path.sep | |
| 408 return out | |
| 409 | |
| 410 | |
| 411 if sys.platform != 'win32': # All non-Windows OSes. | |
| 412 | |
| 413 | |
| 414 def safe_join(*args): | |
| 415 """Joins path elements like os.path.join() but doesn't abort on absolute | |
| 416 path. | |
| 417 | |
| 418 os.path.join('foo', '/bar') == '/bar' | |
| 419 but safe_join('foo', '/bar') == 'foo/bar'. | |
| 420 """ | |
| 421 out = '' | |
| 422 for element in args: | |
| 423 if element.startswith(os.path.sep): | |
| 424 if out.endswith(os.path.sep): | |
| 425 out += element[1:] | |
| 426 else: | |
| 427 out += element | |
| 428 else: | |
| 429 if out.endswith(os.path.sep): | |
| 430 out += element | |
| 431 else: | |
| 432 out += os.path.sep + element | |
| 433 return out | |
| 434 | |
| 435 | |
| 436 def split_at_symlink(base_dir, relfile): | |
| 437 """Scans each component of relfile and cut the string at the symlink if | |
| 438 there is any. | |
| 439 | |
| 440 Returns a tuple (base_path, symlink, rest), with symlink == rest == None if | |
| 441 not symlink was found. | |
| 442 """ | |
| 443 if base_dir: | |
| 444 assert relfile | |
| 445 assert os.path.isabs(base_dir) | |
| 446 index = 0 | |
| 447 else: | |
| 448 assert os.path.isabs(relfile) | |
| 449 index = 1 | |
| 450 | |
| 451 def at_root(rest): | |
| 452 if base_dir: | |
| 453 return safe_join(base_dir, rest) | |
| 454 return rest | |
| 455 | |
| 456 while True: | |
| 457 try: | |
| 458 index = relfile.index(os.path.sep, index) | |
| 459 except ValueError: | |
| 460 index = len(relfile) | |
| 461 full = at_root(relfile[:index]) | |
| 462 if os.path.islink(full): | |
| 463 # A symlink! | |
| 464 base = os.path.dirname(relfile[:index]) | |
| 465 symlink = os.path.basename(relfile[:index]) | |
| 466 rest = relfile[index:] | |
| 467 logging.debug( | |
| 468 'split_at_symlink(%s, %s) -> (%s, %s, %s)' % | |
| 469 (base_dir, relfile, base, symlink, rest)) | |
| 470 return base, symlink, rest | |
| 471 if index == len(relfile): | |
| 472 break | |
| 473 index += 1 | |
| 474 return relfile, None, None | |
| 475 | |
| 476 | 103 |
| 477 def gen_blacklist(regexes): | 104 def gen_blacklist(regexes): |
| 478 """Returns a lambda to be used as a blacklist.""" | 105 """Returns a lambda to be used as a blacklist.""" |
| 479 compiled = [re.compile(i) for i in regexes] | 106 compiled = [re.compile(i) for i in regexes] |
| 480 def match(f): | 107 def match(f): |
| 481 return any(j.match(f) for j in compiled) | 108 return any(j.match(f) for j in compiled) |
| 482 return match | 109 return match |
| 483 | 110 |
| 484 | 111 |
| 485 def create_subprocess_thunk(): | 112 def create_subprocess_thunk(): |
| (...skipping 217 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 703 self.nb_files = nb_files | 330 self.nb_files = nb_files |
| 704 # Can be used as a cache or a default value, depending on context. In | 331 # Can be used as a cache or a default value, depending on context. In |
| 705 # particular, once self.tainted is True, because the path was replaced | 332 # particular, once self.tainted is True, because the path was replaced |
| 706 # with a variable, it is not possible to look up the file size. | 333 # with a variable, it is not possible to look up the file size. |
| 707 self._size = size | 334 self._size = size |
| 708 # These are cache only. | 335 # These are cache only. |
| 709 self._real_path = None | 336 self._real_path = None |
| 710 | 337 |
| 711 # Check internal consistency. | 338 # Check internal consistency. |
| 712 assert path, path | 339 assert path, path |
| 713 assert tainted or bool(root) != bool(isabs(path)), (root, path) | 340 assert tainted or bool(root) != bool(file_path.isabs(path)), (root, path) |
| 714 assert tainted or ( | 341 assert tainted or ( |
| 715 not os.path.exists(self.full_path) or | 342 not os.path.exists(self.full_path) or |
| 716 (self.full_path == get_native_path_case(self.full_path))), ( | 343 (self.full_path == file_path.get_native_path_case(self.full_path))), ( |
| 717 tainted, self.full_path, get_native_path_case(self.full_path)) | 344 tainted, |
| 345 self.full_path, |
| 346 file_path.get_native_path_case(self.full_path)) |
| 718 | 347 |
| 719 @property | 348 @property |
| 720 def existent(self): | 349 def existent(self): |
| 721 return self.size != -1 | 350 return self.size != -1 |
| 722 | 351 |
| 723 @property | 352 @property |
| 724 def full_path(self): | 353 def full_path(self): |
| 725 if self.root: | 354 if self.root: |
| 726 return os.path.join(self.root, self.path) | 355 return os.path.join(self.root, self.path) |
| 727 return self.path | 356 return self.path |
| (...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 769 # No need to clone, returns ourself. | 398 # No need to clone, returns ourself. |
| 770 return self | 399 return self |
| 771 | 400 |
| 772 def strip_root(self, root): | 401 def strip_root(self, root): |
| 773 """Returns a clone of itself with 'root' stripped off. | 402 """Returns a clone of itself with 'root' stripped off. |
| 774 | 403 |
| 775 Note that the file is kept if it is either accessible from a symlinked | 404 Note that the file is kept if it is either accessible from a symlinked |
| 776 path that was used to access the file or through the real path. | 405 path that was used to access the file or through the real path. |
| 777 """ | 406 """ |
| 778 # Check internal consistency. | 407 # Check internal consistency. |
| 779 assert self.tainted or (isabs(root) and root.endswith(os.path.sep)), root | 408 assert ( |
| 409 self.tainted or |
| 410 (file_path.isabs(root) and root.endswith(os.path.sep))), root |
| 780 if not self.full_path.startswith(root): | 411 if not self.full_path.startswith(root): |
| 781 # Now try to resolve the symlinks to see if it can be reached this way. | 412 # Now try to resolve the symlinks to see if it can be reached this way. |
| 782 # Only try *after* trying without resolving symlink. | 413 # Only try *after* trying without resolving symlink. |
| 783 if not self.real_path.startswith(root): | 414 if not self.real_path.startswith(root): |
| 784 return None | 415 return None |
| 785 path = self.real_path | 416 path = self.real_path |
| 786 else: | 417 else: |
| 787 path = self.full_path | 418 path = self.full_path |
| 788 return self._clone(root, path[len(root):], self.tainted) | 419 return self._clone(root, path[len(root):], self.tainted) |
| 789 | 420 |
| (...skipping 101 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 891 return { | 522 return { |
| 892 'children': [c.flatten() for c in self.children], | 523 'children': [c.flatten() for c in self.children], |
| 893 'command': self.command, | 524 'command': self.command, |
| 894 'executable': self.executable, | 525 'executable': self.executable, |
| 895 'files': [f.flatten() for f in self.files], | 526 'files': [f.flatten() for f in self.files], |
| 896 'initial_cwd': self.initial_cwd, | 527 'initial_cwd': self.initial_cwd, |
| 897 'pid': self.pid, | 528 'pid': self.pid, |
| 898 } | 529 } |
| 899 | 530 |
| 900 def strip_root(self, root): | 531 def strip_root(self, root): |
| 901 assert isabs(root) and root.endswith(os.path.sep), root | 532 assert file_path.isabs(root) and root.endswith(os.path.sep), root |
| 902 # Loads the files after since they are constructed as objects. | 533 # Loads the files after since they are constructed as objects. |
| 903 out = self.__class__( | 534 out = self.__class__( |
| 904 self.pid, | 535 self.pid, |
| 905 filter(None, (f.strip_root(root) for f in self.files)), | 536 filter(None, (f.strip_root(root) for f in self.files)), |
| 906 self.executable, | 537 self.executable, |
| 907 self.command, | 538 self.command, |
| 908 self.initial_cwd, | 539 self.initial_cwd, |
| 909 [c.strip_root(root) for c in self.children]) | 540 [c.strip_root(root) for c in self.children]) |
| 910 logging.debug( | 541 logging.debug( |
| 911 'strip_root(%s) %d -> %d' % (root, len(self.files), len(out.files))) | 542 'strip_root(%s) %d -> %d' % (root, len(self.files), len(out.files))) |
| (...skipping 27 matching lines...) Expand all Loading... |
| 939 | 570 |
| 940 def strip_root(self, root): | 571 def strip_root(self, root): |
| 941 """Returns a clone with all the files outside the directory |root| removed | 572 """Returns a clone with all the files outside the directory |root| removed |
| 942 and converts all the path to be relative paths. | 573 and converts all the path to be relative paths. |
| 943 | 574 |
| 944 It keeps files accessible through the |root| directory or that have been | 575 It keeps files accessible through the |root| directory or that have been |
| 945 accessed through any symlink which points to the same directory. | 576 accessed through any symlink which points to the same directory. |
| 946 """ | 577 """ |
| 947 # Resolve any symlink | 578 # Resolve any symlink |
| 948 root = os.path.realpath(root) | 579 root = os.path.realpath(root) |
| 949 root = get_native_path_case(root).rstrip(os.path.sep) + os.path.sep | 580 root = ( |
| 581 file_path.get_native_path_case(root).rstrip(os.path.sep) + os.path.sep) |
| 950 logging.debug('strip_root(%s)' % root) | 582 logging.debug('strip_root(%s)' % root) |
| 951 return Results(self.process.strip_root(root)) | 583 return Results(self.process.strip_root(root)) |
| 952 | 584 |
| 953 | 585 |
| 954 class ApiBase(object): | 586 class ApiBase(object): |
| 955 """OS-agnostic API to trace a process and its children.""" | 587 """OS-agnostic API to trace a process and its children.""" |
| 956 class Context(object): | 588 class Context(object): |
| 957 """Processes one log line at a time and keeps the list of traced processes. | 589 """Processes one log line at a time and keeps the list of traced processes. |
| 958 | 590 |
| 959 The parsing is complicated by the fact that logs are traced out of order for | 591 The parsing is complicated by the fact that logs are traced out of order for |
| (...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 995 Converts late-bound strings. | 627 Converts late-bound strings. |
| 996 """ | 628 """ |
| 997 if not x: | 629 if not x: |
| 998 # Do not convert None instance to 'None'. | 630 # Do not convert None instance to 'None'. |
| 999 return x | 631 return x |
| 1000 x = render(x) | 632 x = render(x) |
| 1001 if os.path.isabs(x): | 633 if os.path.isabs(x): |
| 1002 # If the path is not absolute, which tends to happen occasionally on | 634 # If the path is not absolute, which tends to happen occasionally on |
| 1003 # Windows, it is not possible to get the native path case so ignore | 635 # Windows, it is not possible to get the native path case so ignore |
| 1004 # that trace. It mostly happens for 'executable' value. | 636 # that trace. It mostly happens for 'executable' value. |
| 1005 x = get_native_path_case(x) | 637 x = file_path.get_native_path_case(x) |
| 1006 return x | 638 return x |
| 1007 | 639 |
| 1008 def fix_and_blacklist_path(x, m): | 640 def fix_and_blacklist_path(x, m): |
| 1009 """Receives a tuple (filepath, mode) and processes filepath.""" | 641 """Receives a tuple (filepath, mode) and processes filepath.""" |
| 1010 x = fix_path(x) | 642 x = fix_path(x) |
| 1011 if not x: | 643 if not x: |
| 1012 return | 644 return |
| 1013 # The blacklist needs to be reapplied, since path casing could | 645 # The blacklist needs to be reapplied, since path casing could |
| 1014 # influence blacklisting. | 646 # influence blacklisting. |
| 1015 if self._blacklist(x): | 647 if self._blacklist(x): |
| (...skipping 1736 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2752 def __init__(self, *args): | 2384 def __init__(self, *args): |
| 2753 super(LogmanTrace.Context.Process, self).__init__(*args) | 2385 super(LogmanTrace.Context.Process, self).__init__(*args) |
| 2754 # Handle file objects that succeeded. | 2386 # Handle file objects that succeeded. |
| 2755 self.file_objects = {} | 2387 self.file_objects = {} |
| 2756 | 2388 |
| 2757 def __init__(self, blacklist, thunk_pid, trace_name, thunk_cmd): | 2389 def __init__(self, blacklist, thunk_pid, trace_name, thunk_cmd): |
| 2758 logging.info( | 2390 logging.info( |
| 2759 '%s(%d, %s, %s)', self.__class__.__name__, thunk_pid, trace_name, | 2391 '%s(%d, %s, %s)', self.__class__.__name__, thunk_pid, trace_name, |
| 2760 thunk_cmd) | 2392 thunk_cmd) |
| 2761 super(LogmanTrace.Context, self).__init__(blacklist) | 2393 super(LogmanTrace.Context, self).__init__(blacklist) |
| 2762 self._drive_map = DosDriveMap() | 2394 self._drive_map = file_path.DosDriveMap() |
| 2763 # Threads mapping to the corresponding process id. | 2395 # Threads mapping to the corresponding process id. |
| 2764 self._threads_active = {} | 2396 self._threads_active = {} |
| 2765 # Process ID of the tracer, e.g. the temporary script created by | 2397 # Process ID of the tracer, e.g. the temporary script created by |
| 2766 # create_subprocess_thunk(). This is tricky because the process id may | 2398 # create_subprocess_thunk(). This is tricky because the process id may |
| 2767 # have been reused. | 2399 # have been reused. |
| 2768 self._thunk_pid = thunk_pid | 2400 self._thunk_pid = thunk_pid |
| 2769 self._thunk_cmd = thunk_cmd | 2401 self._thunk_cmd = thunk_cmd |
| 2770 self._trace_name = trace_name | 2402 self._trace_name = trace_name |
| 2771 self._line_number = 0 | 2403 self._line_number = 0 |
| 2772 self._thunk_process = None | 2404 self._thunk_process = None |
| (...skipping 217 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2990 # anything else too. If it happens that command[0] ends with executable, | 2622 # anything else too. If it happens that command[0] ends with executable, |
| 2991 # use it, otherwise defaults to the base name. | 2623 # use it, otherwise defaults to the base name. |
| 2992 cmd0 = proc.command[0].lower() | 2624 cmd0 = proc.command[0].lower() |
| 2993 if not cmd0.endswith('.exe'): | 2625 if not cmd0.endswith('.exe'): |
| 2994 # TODO(maruel): That's not strictly true either. | 2626 # TODO(maruel): That's not strictly true either. |
| 2995 cmd0 += '.exe' | 2627 cmd0 += '.exe' |
| 2996 if cmd0.endswith(proc.executable) and os.path.isfile(cmd0): | 2628 if cmd0.endswith(proc.executable) and os.path.isfile(cmd0): |
| 2997 # Fix the path. | 2629 # Fix the path. |
| 2998 cmd0 = cmd0.replace('/', os.path.sep) | 2630 cmd0 = cmd0.replace('/', os.path.sep) |
| 2999 cmd0 = os.path.normpath(cmd0) | 2631 cmd0 = os.path.normpath(cmd0) |
| 3000 proc.executable = get_native_path_case(cmd0) | 2632 proc.executable = file_path.get_native_path_case(cmd0) |
| 3001 logging.info( | 2633 logging.info( |
| 3002 'New child: %s -> %d %s' % (ppid, pid, proc.executable)) | 2634 'New child: %s -> %d %s' % (ppid, pid, proc.executable)) |
| 3003 | 2635 |
| 3004 def handle_Thread_End(self, line): | 2636 def handle_Thread_End(self, line): |
| 3005 """Has the same parameters as Thread_Start.""" | 2637 """Has the same parameters as Thread_Start.""" |
| 3006 tid = int(line[self.TID], 16) | 2638 tid = int(line[self.TID], 16) |
| 3007 self._threads_active.pop(tid, None) | 2639 self._threads_active.pop(tid, None) |
| 3008 | 2640 |
| 3009 def handle_Thread_Start(self, line): | 2641 def handle_Thread_Start(self, line): |
| 3010 """Handles a new thread created. | 2642 """Handles a new thread created. |
| (...skipping 523 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3534 - root_dir: Optional base directory that shouldn't be search further. | 3166 - root_dir: Optional base directory that shouldn't be search further. |
| 3535 - files: list of Results.File instances. | 3167 - files: list of Results.File instances. |
| 3536 - blacklist: lambda to reject unneeded files, for example r'.+\.pyc'. | 3168 - blacklist: lambda to reject unneeded files, for example r'.+\.pyc'. |
| 3537 """ | 3169 """ |
| 3538 logging.info( | 3170 logging.info( |
| 3539 'extract_directories(%s, %d files, ...)' % (root_dir, len(files))) | 3171 'extract_directories(%s, %d files, ...)' % (root_dir, len(files))) |
| 3540 assert not (root_dir or '').endswith(os.path.sep), root_dir | 3172 assert not (root_dir or '').endswith(os.path.sep), root_dir |
| 3541 # It is important for root_dir to not be a symlinked path, make sure to call | 3173 # It is important for root_dir to not be a symlinked path, make sure to call |
| 3542 # os.path.realpath() as needed. | 3174 # os.path.realpath() as needed. |
| 3543 assert not root_dir or ( | 3175 assert not root_dir or ( |
| 3544 os.path.realpath(get_native_path_case(root_dir)) == root_dir) | 3176 os.path.realpath(file_path.get_native_path_case(root_dir)) == root_dir) |
| 3545 assert not any(isinstance(f, Results.Directory) for f in files) | 3177 assert not any(isinstance(f, Results.Directory) for f in files) |
| 3546 # Remove non existent files. | 3178 # Remove non existent files. |
| 3547 files = [f for f in files if f.existent] | 3179 files = [f for f in files if f.existent] |
| 3548 if not files: | 3180 if not files: |
| 3549 return files | 3181 return files |
| 3550 # All files must share the same root, which can be None. | 3182 # All files must share the same root, which can be None. |
| 3551 assert len(set(f.root for f in files)) == 1, set(f.root for f in files) | 3183 assert len(set(f.root for f in files)) == 1, set(f.root for f in files) |
| 3552 | 3184 |
| 3553 # Creates a {directory: {filename: File}} mapping, up to root. | 3185 # Creates a {directory: {filename: File}} mapping, up to root. |
| 3554 buckets = {} | 3186 buckets = {} |
| (...skipping 111 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3666 help='Only reads one of the trace. Defaults to reading all traces') | 3298 help='Only reads one of the trace. Defaults to reading all traces') |
| 3667 parser.add_option( | 3299 parser.add_option( |
| 3668 '-j', '--json', action='store_true', | 3300 '-j', '--json', action='store_true', |
| 3669 help='Outputs raw result data as json') | 3301 help='Outputs raw result data as json') |
| 3670 parser.add_option( | 3302 parser.add_option( |
| 3671 '--trace-blacklist', action='append', default=[], | 3303 '--trace-blacklist', action='append', default=[], |
| 3672 help='List of regexp to use as blacklist filter') | 3304 help='List of regexp to use as blacklist filter') |
| 3673 options, args = parser.parse_args(args) | 3305 options, args = parser.parse_args(args) |
| 3674 | 3306 |
| 3675 if options.root_dir: | 3307 if options.root_dir: |
| 3676 options.root_dir = get_native_path_case( | 3308 options.root_dir = file_path.get_native_path_case( |
| 3677 unicode(os.path.abspath(options.root_dir))) | 3309 unicode(os.path.abspath(options.root_dir))) |
| 3678 | 3310 |
| 3679 variables = dict(options.variables) | 3311 variables = dict(options.variables) |
| 3680 api = get_api() | 3312 api = get_api() |
| 3681 blacklist = gen_blacklist(options.trace_blacklist) | 3313 blacklist = gen_blacklist(options.trace_blacklist) |
| 3682 data = api.parse_log(options.log, blacklist, options.trace_name) | 3314 data = api.parse_log(options.log, blacklist, options.trace_name) |
| 3683 # Process each trace. | 3315 # Process each trace. |
| 3684 output_as_json = [] | 3316 output_as_json = [] |
| 3685 try: | 3317 try: |
| 3686 for item in data: | 3318 for item in data: |
| (...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3761 sys.stderr.write(str(e)) | 3393 sys.stderr.write(str(e)) |
| 3762 sys.stderr.write('\n') | 3394 sys.stderr.write('\n') |
| 3763 return 1 | 3395 return 1 |
| 3764 | 3396 |
| 3765 | 3397 |
| 3766 if __name__ == '__main__': | 3398 if __name__ == '__main__': |
| 3767 fix_encoding.fix_encoding() | 3399 fix_encoding.fix_encoding() |
| 3768 tools.disable_buffering() | 3400 tools.disable_buffering() |
| 3769 colorama.init() | 3401 colorama.init() |
| 3770 sys.exit(main(sys.argv[1:])) | 3402 sys.exit(main(sys.argv[1:])) |
| OLD | NEW |