OLD | NEW |
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 # coding=utf-8 | 2 # coding=utf-8 |
3 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 3 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
4 # Use of this source code is governed by a BSD-style license that can be | 4 # Use of this source code is governed by a BSD-style license that can be |
5 # found in the LICENSE file. | 5 # found in the LICENSE file. |
6 | 6 |
7 """Traces an executable and its child processes and extract the files accessed | 7 """Traces an executable and its child processes and extract the files accessed |
8 by them. | 8 by them. |
9 | 9 |
10 The implementation uses OS-specific API. The native Kernel logger and the ETL | 10 The implementation uses OS-specific API. The native Kernel logger and the ETL |
(...skipping 15 matching lines...) Expand all Loading... |
26 import json | 26 import json |
27 import logging | 27 import logging |
28 import os | 28 import os |
29 import re | 29 import re |
30 import stat | 30 import stat |
31 import subprocess | 31 import subprocess |
32 import sys | 32 import sys |
33 import tempfile | 33 import tempfile |
34 import threading | 34 import threading |
35 import time | 35 import time |
36 import unicodedata | |
37 import weakref | 36 import weakref |
38 | 37 |
39 from third_party import colorama | 38 from third_party import colorama |
40 from third_party.depot_tools import fix_encoding | 39 from third_party.depot_tools import fix_encoding |
41 from third_party.depot_tools import subcommand | 40 from third_party.depot_tools import subcommand |
42 | 41 |
| 42 from utils import file_path |
43 from utils import tools | 43 from utils import tools |
44 | 44 |
45 ## OS-specific imports | 45 ## OS-specific imports |
46 | 46 |
47 if sys.platform == 'win32': | 47 if sys.platform == 'win32': |
48 from ctypes.wintypes import byref, create_unicode_buffer, c_int, c_wchar_p | 48 from ctypes.wintypes import byref, c_int, c_wchar_p |
49 from ctypes.wintypes import windll, FormatError # pylint: disable=E0611 | 49 from ctypes.wintypes import windll # pylint: disable=E0611 |
50 from ctypes.wintypes import GetLastError # pylint: disable=E0611 | |
51 elif sys.platform == 'darwin': | |
52 import Carbon.File # pylint: disable=F0401 | |
53 import MacOS # pylint: disable=F0401 | |
54 | 50 |
55 | 51 |
56 __version__ = '0.1' | 52 __version__ = '0.1' |
57 | 53 |
58 | 54 |
59 BASE_DIR = os.path.dirname(os.path.abspath(__file__)) | 55 BASE_DIR = os.path.dirname(os.path.abspath(__file__)) |
60 ROOT_DIR = os.path.dirname(os.path.dirname(BASE_DIR)) | 56 ROOT_DIR = os.path.dirname(os.path.dirname(BASE_DIR)) |
61 | 57 |
62 | 58 |
63 class TracingFailure(Exception): | 59 class TracingFailure(Exception): |
(...skipping 16 matching lines...) Expand all Loading... |
80 if self.line: | 76 if self.line: |
81 out += '\n%s' % self.line | 77 out += '\n%s' % self.line |
82 if self.extra: | 78 if self.extra: |
83 out += '\n' + ', '.join(map(str, filter(None, self.extra))) | 79 out += '\n' + ', '.join(map(str, filter(None, self.extra))) |
84 return out | 80 return out |
85 | 81 |
86 | 82 |
87 ## OS-specific functions | 83 ## OS-specific functions |
88 | 84 |
89 if sys.platform == 'win32': | 85 if sys.platform == 'win32': |
90 def QueryDosDevice(drive_letter): | |
91 """Returns the Windows 'native' path for a DOS drive letter.""" | |
92 assert re.match(r'^[a-zA-Z]:$', drive_letter), drive_letter | |
93 assert isinstance(drive_letter, unicode) | |
94 # Guesswork. QueryDosDeviceW never returns the required number of bytes. | |
95 chars = 1024 | |
96 drive_letter = drive_letter | |
97 p = create_unicode_buffer(chars) | |
98 if 0 == windll.kernel32.QueryDosDeviceW(drive_letter, p, chars): | |
99 err = GetLastError() | |
100 if err: | |
101 # pylint: disable=E0602 | |
102 msg = u'QueryDosDevice(%s): %s (%d)' % ( | |
103 drive_letter, FormatError(err), err) | |
104 raise WindowsError(err, msg.encode('utf-8')) | |
105 return p.value | |
106 | |
107 | |
108 def GetShortPathName(long_path): | |
109 """Returns the Windows short path equivalent for a 'long' path.""" | |
110 assert isinstance(long_path, unicode), repr(long_path) | |
111 # Adds '\\\\?\\' when given an absolute path so the MAX_PATH (260) limit is | |
112 # not enforced. | |
113 if os.path.isabs(long_path) and not long_path.startswith('\\\\?\\'): | |
114 long_path = '\\\\?\\' + long_path | |
115 chars = windll.kernel32.GetShortPathNameW(long_path, None, 0) | |
116 if chars: | |
117 p = create_unicode_buffer(chars) | |
118 if windll.kernel32.GetShortPathNameW(long_path, p, chars): | |
119 return p.value | |
120 | |
121 err = GetLastError() | |
122 if err: | |
123 # pylint: disable=E0602 | |
124 msg = u'GetShortPathName(%s): %s (%d)' % ( | |
125 long_path, FormatError(err), err) | |
126 raise WindowsError(err, msg.encode('utf-8')) | |
127 | |
128 | |
129 def GetLongPathName(short_path): | |
130 """Returns the Windows long path equivalent for a 'short' path.""" | |
131 assert isinstance(short_path, unicode) | |
132 # Adds '\\\\?\\' when given an absolute path so the MAX_PATH (260) limit is | |
133 # not enforced. | |
134 if os.path.isabs(short_path) and not short_path.startswith('\\\\?\\'): | |
135 short_path = '\\\\?\\' + short_path | |
136 chars = windll.kernel32.GetLongPathNameW(short_path, None, 0) | |
137 if chars: | |
138 p = create_unicode_buffer(chars) | |
139 if windll.kernel32.GetLongPathNameW(short_path, p, chars): | |
140 return p.value | |
141 | |
142 err = GetLastError() | |
143 if err: | |
144 # pylint: disable=E0602 | |
145 msg = u'GetLongPathName(%s): %s (%d)' % ( | |
146 short_path, FormatError(err), err) | |
147 raise WindowsError(err, msg.encode('utf-8')) | |
148 | |
149 | |
150 def get_current_encoding(): | 86 def get_current_encoding(): |
151 """Returns the 'ANSI' code page associated to the process.""" | 87 """Returns the 'ANSI' code page associated to the process.""" |
152 return 'cp%d' % int(windll.kernel32.GetACP()) | 88 return 'cp%d' % int(windll.kernel32.GetACP()) |
153 | 89 |
154 | 90 |
155 class DosDriveMap(object): | |
156 """Maps \Device\HarddiskVolumeN to N: on Windows.""" | |
157 # Keep one global cache. | |
158 _MAPPING = {} | |
159 | |
160 def __init__(self): | |
161 """Lazy loads the cache.""" | |
162 if not self._MAPPING: | |
163 # This is related to UNC resolver on windows. Ignore that. | |
164 self._MAPPING[u'\\Device\\Mup'] = None | |
165 self._MAPPING[u'\\SystemRoot'] = os.environ[u'SystemRoot'] | |
166 | |
167 for letter in (chr(l) for l in xrange(ord('C'), ord('Z')+1)): | |
168 try: | |
169 letter = u'%s:' % letter | |
170 mapped = QueryDosDevice(letter) | |
171 if mapped in self._MAPPING: | |
172 logging.warn( | |
173 ('Two drives: \'%s\' and \'%s\', are mapped to the same disk' | |
174 '. Drive letters are a user-mode concept and the kernel ' | |
175 'traces only have NT path, so all accesses will be ' | |
176 'associated with the first drive letter, independent of the ' | |
177 'actual letter used by the code') % ( | |
178 self._MAPPING[mapped], letter)) | |
179 else: | |
180 self._MAPPING[mapped] = letter | |
181 except WindowsError: # pylint: disable=E0602 | |
182 pass | |
183 | |
184 def to_win32(self, path): | |
185 """Converts a native NT path to Win32/DOS compatible path.""" | |
186 match = re.match(r'(^\\Device\\[a-zA-Z0-9]+)(\\.*)?$', path) | |
187 if not match: | |
188 raise ValueError( | |
189 'Can\'t convert %s into a Win32 compatible path' % path, | |
190 path) | |
191 if not match.group(1) in self._MAPPING: | |
192 # Unmapped partitions may be accessed by windows for the | |
193 # fun of it while the test is running. Discard these. | |
194 return None | |
195 drive = self._MAPPING[match.group(1)] | |
196 if not drive or not match.group(2): | |
197 return drive | |
198 return drive + match.group(2) | |
199 | |
200 | |
201 def isabs(path): | |
202 """Accepts X: as an absolute path, unlike python's os.path.isabs().""" | |
203 return os.path.isabs(path) or len(path) == 2 and path[1] == ':' | |
204 | |
205 | |
206 def find_item_native_case(root, item): | |
207 """Gets the native path case of a single item based at root_path.""" | |
208 if item == '..': | |
209 return item | |
210 | |
211 root = get_native_path_case(root) | |
212 return os.path.basename(get_native_path_case(os.path.join(root, item))) | |
213 | |
214 | |
215 def get_native_path_case(p): | |
216 """Returns the native path case for an existing file. | |
217 | |
218 On Windows, removes any leading '\\?\'. | |
219 """ | |
220 assert isinstance(p, unicode), repr(p) | |
221 if not isabs(p): | |
222 raise ValueError( | |
223 'get_native_path_case(%r): Require an absolute path' % p, p) | |
224 | |
225 # Make sure it is normalized to os.path.sep. Do not do it here to keep the | |
226 # function fast | |
227 assert '/' not in p, p | |
228 suffix = '' | |
229 count = p.count(':') | |
230 if count > 1: | |
231 # This means it has an alternate-data stream. There could be 3 ':', since | |
232 # it could be the $DATA datastream of an ADS. Split the whole ADS suffix | |
233 # off and add it back afterward. There is no way to know the native path | |
234 # case of an alternate data stream. | |
235 items = p.split(':') | |
236 p = ':'.join(items[0:2]) | |
237 suffix = ''.join(':' + i for i in items[2:]) | |
238 | |
239 # TODO(maruel): Use os.path.normpath? | |
240 if p.endswith('.\\'): | |
241 p = p[:-2] | |
242 | |
243 # Windows used to have an option to turn on case sensitivity on non Win32 | |
244 # subsystem but that's out of scope here and isn't supported anymore. | |
245 # Go figure why GetShortPathName() is needed. | |
246 try: | |
247 out = GetLongPathName(GetShortPathName(p)) | |
248 except OSError, e: | |
249 if e.args[0] in (2, 3, 5): | |
250 # The path does not exist. Try to recurse and reconstruct the path. | |
251 base = os.path.dirname(p) | |
252 rest = os.path.basename(p) | |
253 return os.path.join(get_native_path_case(base), rest) | |
254 raise | |
255 if out.startswith('\\\\?\\'): | |
256 out = out[4:] | |
257 # Always upper case the first letter since GetLongPathName() will return the | |
258 # drive letter in the case it was given. | |
259 return out[0].upper() + out[1:] + suffix | |
260 | |
261 | |
262 def CommandLineToArgvW(command_line): | 91 def CommandLineToArgvW(command_line): |
263 """Splits a commandline into argv using CommandLineToArgvW().""" | 92 """Splits a commandline into argv using CommandLineToArgvW().""" |
264 # http://msdn.microsoft.com/library/windows/desktop/bb776391.aspx | 93 # http://msdn.microsoft.com/library/windows/desktop/bb776391.aspx |
265 size = c_int() | 94 size = c_int() |
266 assert isinstance(command_line, unicode) | 95 assert isinstance(command_line, unicode) |
267 ptr = windll.shell32.CommandLineToArgvW(command_line, byref(size)) | 96 ptr = windll.shell32.CommandLineToArgvW(command_line, byref(size)) |
268 try: | 97 try: |
269 return [arg for arg in (c_wchar_p * size.value).from_address(ptr)] | 98 return [arg for arg in (c_wchar_p * size.value).from_address(ptr)] |
270 finally: | 99 finally: |
271 windll.kernel32.LocalFree(ptr) | 100 windll.kernel32.LocalFree(ptr) |
272 | 101 |
273 | 102 |
274 elif sys.platform == 'darwin': | |
275 | |
276 | |
277 # On non-windows, keep the stdlib behavior. | |
278 isabs = os.path.isabs | |
279 | |
280 | |
281 def _native_case(p): | |
282 """Gets the native path case. Warning: this function resolves symlinks.""" | |
283 try: | |
284 rel_ref, _ = Carbon.File.FSPathMakeRef(p.encode('utf-8')) | |
285 # The OSX underlying code uses NFD but python strings are in NFC. This | |
286 # will cause issues with os.listdir() for example. Since the dtrace log | |
287 # *is* in NFC, normalize it here. | |
288 out = unicodedata.normalize( | |
289 'NFC', rel_ref.FSRefMakePath().decode('utf-8')) | |
290 if p.endswith(os.path.sep) and not out.endswith(os.path.sep): | |
291 return out + os.path.sep | |
292 return out | |
293 except MacOS.Error, e: | |
294 if e.args[0] in (-43, -120): | |
295 # The path does not exist. Try to recurse and reconstruct the path. | |
296 # -43 means file not found. | |
297 # -120 means directory not found. | |
298 base = os.path.dirname(p) | |
299 rest = os.path.basename(p) | |
300 return os.path.join(_native_case(base), rest) | |
301 raise OSError( | |
302 e.args[0], 'Failed to get native path for %s' % p, p, e.args[1]) | |
303 | |
304 | |
305 def _split_at_symlink_native(base_path, rest): | |
306 """Returns the native path for a symlink.""" | |
307 base, symlink, rest = split_at_symlink(base_path, rest) | |
308 if symlink: | |
309 if not base_path: | |
310 base_path = base | |
311 else: | |
312 base_path = safe_join(base_path, base) | |
313 symlink = find_item_native_case(base_path, symlink) | |
314 return base, symlink, rest | |
315 | |
316 | |
317 def find_item_native_case(root_path, item): | |
318 """Gets the native path case of a single item based at root_path. | |
319 | |
320 There is no API to get the native path case of symlinks on OSX. So it | |
321 needs to be done the slow way. | |
322 """ | |
323 if item == '..': | |
324 return item | |
325 | |
326 item = item.lower() | |
327 for element in os.listdir(root_path): | |
328 if element.lower() == item: | |
329 return element | |
330 | |
331 | |
332 def get_native_path_case(path): | |
333 """Returns the native path case for an existing file. | |
334 | |
335 Technically, it's only HFS+ on OSX that is case preserving and | |
336 insensitive. It's the default setting on HFS+ but can be changed. | |
337 """ | |
338 assert isinstance(path, unicode), repr(path) | |
339 if not isabs(path): | |
340 raise ValueError( | |
341 'get_native_path_case(%r): Require an absolute path' % path, path) | |
342 if path.startswith('/dev'): | |
343 # /dev is not visible from Carbon, causing an exception. | |
344 return path | |
345 | |
346 # Starts assuming there is no symlink along the path. | |
347 resolved = _native_case(path) | |
348 if path.lower() in (resolved.lower(), resolved.lower() + './'): | |
349 # This code path is incredibly faster. | |
350 logging.debug('get_native_path_case(%s) = %s' % (path, resolved)) | |
351 return resolved | |
352 | |
353 # There was a symlink, process it. | |
354 base, symlink, rest = _split_at_symlink_native(None, path) | |
355 assert symlink, (path, base, symlink, rest, resolved) | |
356 prev = base | |
357 base = safe_join(_native_case(base), symlink) | |
358 assert len(base) > len(prev) | |
359 while rest: | |
360 prev = base | |
361 relbase, symlink, rest = _split_at_symlink_native(base, rest) | |
362 base = safe_join(base, relbase) | |
363 assert len(base) > len(prev), (prev, base, symlink) | |
364 if symlink: | |
365 base = safe_join(base, symlink) | |
366 assert len(base) > len(prev), (prev, base, symlink) | |
367 # Make sure no symlink was resolved. | |
368 assert base.lower() == path.lower(), (base, path) | |
369 logging.debug('get_native_path_case(%s) = %s' % (path, base)) | |
370 return base | |
371 | |
372 | |
373 else: # OSes other than Windows and OSX. | |
374 | |
375 | |
376 # On non-windows, keep the stdlib behavior. | |
377 isabs = os.path.isabs | |
378 | |
379 | |
380 def find_item_native_case(root, item): | |
381 """Gets the native path case of a single item based at root_path.""" | |
382 if item == '..': | |
383 return item | |
384 | |
385 root = get_native_path_case(root) | |
386 return os.path.basename(get_native_path_case(os.path.join(root, item))) | |
387 | |
388 | |
389 def get_native_path_case(path): | |
390 """Returns the native path case for an existing file. | |
391 | |
392 On OSes other than OSX and Windows, assume the file system is | |
393 case-sensitive. | |
394 | |
395 TODO(maruel): This is not strictly true. Implement if necessary. | |
396 """ | |
397 assert isinstance(path, unicode), repr(path) | |
398 if not isabs(path): | |
399 raise ValueError( | |
400 'get_native_path_case(%r): Require an absolute path' % path, path) | |
401 # Give up on cygwin, as GetLongPathName() can't be called. | |
402 # Linux traces tends to not be normalized so use this occasion to normalize | |
403 # it. This function implementation already normalizes the path on the other | |
404 # OS so this needs to be done here to be coherent between OSes. | |
405 out = os.path.normpath(path) | |
406 if path.endswith(os.path.sep) and not out.endswith(os.path.sep): | |
407 return out + os.path.sep | |
408 return out | |
409 | |
410 | |
411 if sys.platform != 'win32': # All non-Windows OSes. | |
412 | |
413 | |
414 def safe_join(*args): | |
415 """Joins path elements like os.path.join() but doesn't abort on absolute | |
416 path. | |
417 | |
418 os.path.join('foo', '/bar') == '/bar' | |
419 but safe_join('foo', '/bar') == 'foo/bar'. | |
420 """ | |
421 out = '' | |
422 for element in args: | |
423 if element.startswith(os.path.sep): | |
424 if out.endswith(os.path.sep): | |
425 out += element[1:] | |
426 else: | |
427 out += element | |
428 else: | |
429 if out.endswith(os.path.sep): | |
430 out += element | |
431 else: | |
432 out += os.path.sep + element | |
433 return out | |
434 | |
435 | |
436 def split_at_symlink(base_dir, relfile): | |
437 """Scans each component of relfile and cut the string at the symlink if | |
438 there is any. | |
439 | |
440 Returns a tuple (base_path, symlink, rest), with symlink == rest == None if | |
441 not symlink was found. | |
442 """ | |
443 if base_dir: | |
444 assert relfile | |
445 assert os.path.isabs(base_dir) | |
446 index = 0 | |
447 else: | |
448 assert os.path.isabs(relfile) | |
449 index = 1 | |
450 | |
451 def at_root(rest): | |
452 if base_dir: | |
453 return safe_join(base_dir, rest) | |
454 return rest | |
455 | |
456 while True: | |
457 try: | |
458 index = relfile.index(os.path.sep, index) | |
459 except ValueError: | |
460 index = len(relfile) | |
461 full = at_root(relfile[:index]) | |
462 if os.path.islink(full): | |
463 # A symlink! | |
464 base = os.path.dirname(relfile[:index]) | |
465 symlink = os.path.basename(relfile[:index]) | |
466 rest = relfile[index:] | |
467 logging.debug( | |
468 'split_at_symlink(%s, %s) -> (%s, %s, %s)' % | |
469 (base_dir, relfile, base, symlink, rest)) | |
470 return base, symlink, rest | |
471 if index == len(relfile): | |
472 break | |
473 index += 1 | |
474 return relfile, None, None | |
475 | |
476 | 103 |
477 def gen_blacklist(regexes): | 104 def gen_blacklist(regexes): |
478 """Returns a lambda to be used as a blacklist.""" | 105 """Returns a lambda to be used as a blacklist.""" |
479 compiled = [re.compile(i) for i in regexes] | 106 compiled = [re.compile(i) for i in regexes] |
480 def match(f): | 107 def match(f): |
481 return any(j.match(f) for j in compiled) | 108 return any(j.match(f) for j in compiled) |
482 return match | 109 return match |
483 | 110 |
484 | 111 |
485 def create_subprocess_thunk(): | 112 def create_subprocess_thunk(): |
(...skipping 217 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
703 self.nb_files = nb_files | 330 self.nb_files = nb_files |
704 # Can be used as a cache or a default value, depending on context. In | 331 # Can be used as a cache or a default value, depending on context. In |
705 # particular, once self.tainted is True, because the path was replaced | 332 # particular, once self.tainted is True, because the path was replaced |
706 # with a variable, it is not possible to look up the file size. | 333 # with a variable, it is not possible to look up the file size. |
707 self._size = size | 334 self._size = size |
708 # These are cache only. | 335 # These are cache only. |
709 self._real_path = None | 336 self._real_path = None |
710 | 337 |
711 # Check internal consistency. | 338 # Check internal consistency. |
712 assert path, path | 339 assert path, path |
713 assert tainted or bool(root) != bool(isabs(path)), (root, path) | 340 assert tainted or bool(root) != bool(file_path.isabs(path)), (root, path) |
714 assert tainted or ( | 341 assert tainted or ( |
715 not os.path.exists(self.full_path) or | 342 not os.path.exists(self.full_path) or |
716 (self.full_path == get_native_path_case(self.full_path))), ( | 343 (self.full_path == file_path.get_native_path_case(self.full_path))), ( |
717 tainted, self.full_path, get_native_path_case(self.full_path)) | 344 tainted, |
| 345 self.full_path, |
| 346 file_path.get_native_path_case(self.full_path)) |
718 | 347 |
719 @property | 348 @property |
720 def existent(self): | 349 def existent(self): |
721 return self.size != -1 | 350 return self.size != -1 |
722 | 351 |
723 @property | 352 @property |
724 def full_path(self): | 353 def full_path(self): |
725 if self.root: | 354 if self.root: |
726 return os.path.join(self.root, self.path) | 355 return os.path.join(self.root, self.path) |
727 return self.path | 356 return self.path |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
769 # No need to clone, returns ourself. | 398 # No need to clone, returns ourself. |
770 return self | 399 return self |
771 | 400 |
772 def strip_root(self, root): | 401 def strip_root(self, root): |
773 """Returns a clone of itself with 'root' stripped off. | 402 """Returns a clone of itself with 'root' stripped off. |
774 | 403 |
775 Note that the file is kept if it is either accessible from a symlinked | 404 Note that the file is kept if it is either accessible from a symlinked |
776 path that was used to access the file or through the real path. | 405 path that was used to access the file or through the real path. |
777 """ | 406 """ |
778 # Check internal consistency. | 407 # Check internal consistency. |
779 assert self.tainted or (isabs(root) and root.endswith(os.path.sep)), root | 408 assert ( |
| 409 self.tainted or |
| 410 (file_path.isabs(root) and root.endswith(os.path.sep))), root |
780 if not self.full_path.startswith(root): | 411 if not self.full_path.startswith(root): |
781 # Now try to resolve the symlinks to see if it can be reached this way. | 412 # Now try to resolve the symlinks to see if it can be reached this way. |
782 # Only try *after* trying without resolving symlink. | 413 # Only try *after* trying without resolving symlink. |
783 if not self.real_path.startswith(root): | 414 if not self.real_path.startswith(root): |
784 return None | 415 return None |
785 path = self.real_path | 416 path = self.real_path |
786 else: | 417 else: |
787 path = self.full_path | 418 path = self.full_path |
788 return self._clone(root, path[len(root):], self.tainted) | 419 return self._clone(root, path[len(root):], self.tainted) |
789 | 420 |
(...skipping 101 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
891 return { | 522 return { |
892 'children': [c.flatten() for c in self.children], | 523 'children': [c.flatten() for c in self.children], |
893 'command': self.command, | 524 'command': self.command, |
894 'executable': self.executable, | 525 'executable': self.executable, |
895 'files': [f.flatten() for f in self.files], | 526 'files': [f.flatten() for f in self.files], |
896 'initial_cwd': self.initial_cwd, | 527 'initial_cwd': self.initial_cwd, |
897 'pid': self.pid, | 528 'pid': self.pid, |
898 } | 529 } |
899 | 530 |
900 def strip_root(self, root): | 531 def strip_root(self, root): |
901 assert isabs(root) and root.endswith(os.path.sep), root | 532 assert file_path.isabs(root) and root.endswith(os.path.sep), root |
902 # Loads the files after since they are constructed as objects. | 533 # Loads the files after since they are constructed as objects. |
903 out = self.__class__( | 534 out = self.__class__( |
904 self.pid, | 535 self.pid, |
905 filter(None, (f.strip_root(root) for f in self.files)), | 536 filter(None, (f.strip_root(root) for f in self.files)), |
906 self.executable, | 537 self.executable, |
907 self.command, | 538 self.command, |
908 self.initial_cwd, | 539 self.initial_cwd, |
909 [c.strip_root(root) for c in self.children]) | 540 [c.strip_root(root) for c in self.children]) |
910 logging.debug( | 541 logging.debug( |
911 'strip_root(%s) %d -> %d' % (root, len(self.files), len(out.files))) | 542 'strip_root(%s) %d -> %d' % (root, len(self.files), len(out.files))) |
(...skipping 27 matching lines...) Expand all Loading... |
939 | 570 |
940 def strip_root(self, root): | 571 def strip_root(self, root): |
941 """Returns a clone with all the files outside the directory |root| removed | 572 """Returns a clone with all the files outside the directory |root| removed |
942 and converts all the path to be relative paths. | 573 and converts all the path to be relative paths. |
943 | 574 |
944 It keeps files accessible through the |root| directory or that have been | 575 It keeps files accessible through the |root| directory or that have been |
945 accessed through any symlink which points to the same directory. | 576 accessed through any symlink which points to the same directory. |
946 """ | 577 """ |
947 # Resolve any symlink | 578 # Resolve any symlink |
948 root = os.path.realpath(root) | 579 root = os.path.realpath(root) |
949 root = get_native_path_case(root).rstrip(os.path.sep) + os.path.sep | 580 root = ( |
| 581 file_path.get_native_path_case(root).rstrip(os.path.sep) + os.path.sep) |
950 logging.debug('strip_root(%s)' % root) | 582 logging.debug('strip_root(%s)' % root) |
951 return Results(self.process.strip_root(root)) | 583 return Results(self.process.strip_root(root)) |
952 | 584 |
953 | 585 |
954 class ApiBase(object): | 586 class ApiBase(object): |
955 """OS-agnostic API to trace a process and its children.""" | 587 """OS-agnostic API to trace a process and its children.""" |
956 class Context(object): | 588 class Context(object): |
957 """Processes one log line at a time and keeps the list of traced processes. | 589 """Processes one log line at a time and keeps the list of traced processes. |
958 | 590 |
959 The parsing is complicated by the fact that logs are traced out of order for | 591 The parsing is complicated by the fact that logs are traced out of order for |
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
995 Converts late-bound strings. | 627 Converts late-bound strings. |
996 """ | 628 """ |
997 if not x: | 629 if not x: |
998 # Do not convert None instance to 'None'. | 630 # Do not convert None instance to 'None'. |
999 return x | 631 return x |
1000 x = render(x) | 632 x = render(x) |
1001 if os.path.isabs(x): | 633 if os.path.isabs(x): |
1002 # If the path is not absolute, which tends to happen occasionally on | 634 # If the path is not absolute, which tends to happen occasionally on |
1003 # Windows, it is not possible to get the native path case so ignore | 635 # Windows, it is not possible to get the native path case so ignore |
1004 # that trace. It mostly happens for 'executable' value. | 636 # that trace. It mostly happens for 'executable' value. |
1005 x = get_native_path_case(x) | 637 x = file_path.get_native_path_case(x) |
1006 return x | 638 return x |
1007 | 639 |
1008 def fix_and_blacklist_path(x, m): | 640 def fix_and_blacklist_path(x, m): |
1009 """Receives a tuple (filepath, mode) and processes filepath.""" | 641 """Receives a tuple (filepath, mode) and processes filepath.""" |
1010 x = fix_path(x) | 642 x = fix_path(x) |
1011 if not x: | 643 if not x: |
1012 return | 644 return |
1013 # The blacklist needs to be reapplied, since path casing could | 645 # The blacklist needs to be reapplied, since path casing could |
1014 # influence blacklisting. | 646 # influence blacklisting. |
1015 if self._blacklist(x): | 647 if self._blacklist(x): |
(...skipping 1736 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2752 def __init__(self, *args): | 2384 def __init__(self, *args): |
2753 super(LogmanTrace.Context.Process, self).__init__(*args) | 2385 super(LogmanTrace.Context.Process, self).__init__(*args) |
2754 # Handle file objects that succeeded. | 2386 # Handle file objects that succeeded. |
2755 self.file_objects = {} | 2387 self.file_objects = {} |
2756 | 2388 |
2757 def __init__(self, blacklist, thunk_pid, trace_name, thunk_cmd): | 2389 def __init__(self, blacklist, thunk_pid, trace_name, thunk_cmd): |
2758 logging.info( | 2390 logging.info( |
2759 '%s(%d, %s, %s)', self.__class__.__name__, thunk_pid, trace_name, | 2391 '%s(%d, %s, %s)', self.__class__.__name__, thunk_pid, trace_name, |
2760 thunk_cmd) | 2392 thunk_cmd) |
2761 super(LogmanTrace.Context, self).__init__(blacklist) | 2393 super(LogmanTrace.Context, self).__init__(blacklist) |
2762 self._drive_map = DosDriveMap() | 2394 self._drive_map = file_path.DosDriveMap() |
2763 # Threads mapping to the corresponding process id. | 2395 # Threads mapping to the corresponding process id. |
2764 self._threads_active = {} | 2396 self._threads_active = {} |
2765 # Process ID of the tracer, e.g. the temporary script created by | 2397 # Process ID of the tracer, e.g. the temporary script created by |
2766 # create_subprocess_thunk(). This is tricky because the process id may | 2398 # create_subprocess_thunk(). This is tricky because the process id may |
2767 # have been reused. | 2399 # have been reused. |
2768 self._thunk_pid = thunk_pid | 2400 self._thunk_pid = thunk_pid |
2769 self._thunk_cmd = thunk_cmd | 2401 self._thunk_cmd = thunk_cmd |
2770 self._trace_name = trace_name | 2402 self._trace_name = trace_name |
2771 self._line_number = 0 | 2403 self._line_number = 0 |
2772 self._thunk_process = None | 2404 self._thunk_process = None |
(...skipping 217 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2990 # anything else too. If it happens that command[0] ends with executable, | 2622 # anything else too. If it happens that command[0] ends with executable, |
2991 # use it, otherwise defaults to the base name. | 2623 # use it, otherwise defaults to the base name. |
2992 cmd0 = proc.command[0].lower() | 2624 cmd0 = proc.command[0].lower() |
2993 if not cmd0.endswith('.exe'): | 2625 if not cmd0.endswith('.exe'): |
2994 # TODO(maruel): That's not strictly true either. | 2626 # TODO(maruel): That's not strictly true either. |
2995 cmd0 += '.exe' | 2627 cmd0 += '.exe' |
2996 if cmd0.endswith(proc.executable) and os.path.isfile(cmd0): | 2628 if cmd0.endswith(proc.executable) and os.path.isfile(cmd0): |
2997 # Fix the path. | 2629 # Fix the path. |
2998 cmd0 = cmd0.replace('/', os.path.sep) | 2630 cmd0 = cmd0.replace('/', os.path.sep) |
2999 cmd0 = os.path.normpath(cmd0) | 2631 cmd0 = os.path.normpath(cmd0) |
3000 proc.executable = get_native_path_case(cmd0) | 2632 proc.executable = file_path.get_native_path_case(cmd0) |
3001 logging.info( | 2633 logging.info( |
3002 'New child: %s -> %d %s' % (ppid, pid, proc.executable)) | 2634 'New child: %s -> %d %s' % (ppid, pid, proc.executable)) |
3003 | 2635 |
3004 def handle_Thread_End(self, line): | 2636 def handle_Thread_End(self, line): |
3005 """Has the same parameters as Thread_Start.""" | 2637 """Has the same parameters as Thread_Start.""" |
3006 tid = int(line[self.TID], 16) | 2638 tid = int(line[self.TID], 16) |
3007 self._threads_active.pop(tid, None) | 2639 self._threads_active.pop(tid, None) |
3008 | 2640 |
3009 def handle_Thread_Start(self, line): | 2641 def handle_Thread_Start(self, line): |
3010 """Handles a new thread created. | 2642 """Handles a new thread created. |
(...skipping 523 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3534 - root_dir: Optional base directory that shouldn't be search further. | 3166 - root_dir: Optional base directory that shouldn't be search further. |
3535 - files: list of Results.File instances. | 3167 - files: list of Results.File instances. |
3536 - blacklist: lambda to reject unneeded files, for example r'.+\.pyc'. | 3168 - blacklist: lambda to reject unneeded files, for example r'.+\.pyc'. |
3537 """ | 3169 """ |
3538 logging.info( | 3170 logging.info( |
3539 'extract_directories(%s, %d files, ...)' % (root_dir, len(files))) | 3171 'extract_directories(%s, %d files, ...)' % (root_dir, len(files))) |
3540 assert not (root_dir or '').endswith(os.path.sep), root_dir | 3172 assert not (root_dir or '').endswith(os.path.sep), root_dir |
3541 # It is important for root_dir to not be a symlinked path, make sure to call | 3173 # It is important for root_dir to not be a symlinked path, make sure to call |
3542 # os.path.realpath() as needed. | 3174 # os.path.realpath() as needed. |
3543 assert not root_dir or ( | 3175 assert not root_dir or ( |
3544 os.path.realpath(get_native_path_case(root_dir)) == root_dir) | 3176 os.path.realpath(file_path.get_native_path_case(root_dir)) == root_dir) |
3545 assert not any(isinstance(f, Results.Directory) for f in files) | 3177 assert not any(isinstance(f, Results.Directory) for f in files) |
3546 # Remove non existent files. | 3178 # Remove non existent files. |
3547 files = [f for f in files if f.existent] | 3179 files = [f for f in files if f.existent] |
3548 if not files: | 3180 if not files: |
3549 return files | 3181 return files |
3550 # All files must share the same root, which can be None. | 3182 # All files must share the same root, which can be None. |
3551 assert len(set(f.root for f in files)) == 1, set(f.root for f in files) | 3183 assert len(set(f.root for f in files)) == 1, set(f.root for f in files) |
3552 | 3184 |
3553 # Creates a {directory: {filename: File}} mapping, up to root. | 3185 # Creates a {directory: {filename: File}} mapping, up to root. |
3554 buckets = {} | 3186 buckets = {} |
(...skipping 111 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3666 help='Only reads one of the trace. Defaults to reading all traces') | 3298 help='Only reads one of the trace. Defaults to reading all traces') |
3667 parser.add_option( | 3299 parser.add_option( |
3668 '-j', '--json', action='store_true', | 3300 '-j', '--json', action='store_true', |
3669 help='Outputs raw result data as json') | 3301 help='Outputs raw result data as json') |
3670 parser.add_option( | 3302 parser.add_option( |
3671 '--trace-blacklist', action='append', default=[], | 3303 '--trace-blacklist', action='append', default=[], |
3672 help='List of regexp to use as blacklist filter') | 3304 help='List of regexp to use as blacklist filter') |
3673 options, args = parser.parse_args(args) | 3305 options, args = parser.parse_args(args) |
3674 | 3306 |
3675 if options.root_dir: | 3307 if options.root_dir: |
3676 options.root_dir = get_native_path_case( | 3308 options.root_dir = file_path.get_native_path_case( |
3677 unicode(os.path.abspath(options.root_dir))) | 3309 unicode(os.path.abspath(options.root_dir))) |
3678 | 3310 |
3679 variables = dict(options.variables) | 3311 variables = dict(options.variables) |
3680 api = get_api() | 3312 api = get_api() |
3681 blacklist = gen_blacklist(options.trace_blacklist) | 3313 blacklist = gen_blacklist(options.trace_blacklist) |
3682 data = api.parse_log(options.log, blacklist, options.trace_name) | 3314 data = api.parse_log(options.log, blacklist, options.trace_name) |
3683 # Process each trace. | 3315 # Process each trace. |
3684 output_as_json = [] | 3316 output_as_json = [] |
3685 try: | 3317 try: |
3686 for item in data: | 3318 for item in data: |
(...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3761 sys.stderr.write(str(e)) | 3393 sys.stderr.write(str(e)) |
3762 sys.stderr.write('\n') | 3394 sys.stderr.write('\n') |
3763 return 1 | 3395 return 1 |
3764 | 3396 |
3765 | 3397 |
3766 if __name__ == '__main__': | 3398 if __name__ == '__main__': |
3767 fix_encoding.fix_encoding() | 3399 fix_encoding.fix_encoding() |
3768 tools.disable_buffering() | 3400 tools.disable_buffering() |
3769 colorama.init() | 3401 colorama.init() |
3770 sys.exit(main(sys.argv[1:])) | 3402 sys.exit(main(sys.argv[1:])) |
OLD | NEW |