Index: tools/isolate/trace_inputs.py |
diff --git a/tools/isolate/trace_inputs.py b/tools/isolate/trace_inputs.py |
index a298c4004e9f1ce2bf0c414380b865dd1804742b..ca8cf7ad83eb587958bfe55ee8d656d309fc9a5e 100755 |
--- a/tools/isolate/trace_inputs.py |
+++ b/tools/isolate/trace_inputs.py |
@@ -11,8 +11,9 @@ The implementation uses OS-specific API. The native Kernel logger and the ETL |
interface is used on Windows. Dtrace is used on OSX. Strace is used otherwise. |
The OS-specific implementation is hidden in an 'API' interface. |
-The list is done in two phases, the first is to do the actual trace and generate |
-an technique-specific log file. Then the log file is parsed to extract the |
+The results are embedded in a Results instance. The tracing is done in two |
+phases, the first is to do the actual trace and generate an |
+implementation-specific log file. Then the log file is parsed to extract the |
information, including the individual child processes and the files accessed |
from the log. |
""" |
@@ -977,7 +978,11 @@ class Dtrace(ApiBase): |
get the absolute path of the 'cwd' dtrace variable from the probe. |
Also, OSX doesn't populate curpsinfo->pr_psargs properly, see |
- https://discussions.apple.com/thread/1980539. |
+ https://discussions.apple.com/thread/1980539. So resort to handling execve() |
+ manually. |
+ |
+ errno is not printed in the log since this implementation currently care only |
MAD
2012/06/28 13:04:57
care only -> only cares
|
+ about files that were successfully opened. |
""" |
IGNORED = ( |
'/.vol', |
@@ -992,27 +997,58 @@ class Dtrace(ApiBase): |
) |
# pylint: disable=C0301 |
+ # |
# To understand the following code, you'll want to take a look at: |
# http://developers.sun.com/solaris/articles/dtrace_quickref/dtrace_quickref.html |
# https://wikis.oracle.com/display/DTrace/Variables |
# http://docs.oracle.com/cd/E19205-01/820-4221/ |
# |
- # The list of valid probes can be retrieved with: |
- # sudo dtrace -l -P syscall | less |
+ # 0. Dump all the valid probes into a text file. It is important, you |
+ # want to redirect into a file and you don't want to constantly 'sudo'. |
+ # $ sudo dtrace -l > probes.txt |
+ # |
+ # 1. Count the number of probes: |
+ # $ wc -l probes.txt |
+ # 81823 # On OSX 10.7, including 1 header line. |
+ # |
+ # 2. List providers, intentionally skipping all the 'syspolicy10925' and the |
+ # likes and skipping the header with NR>1: |
+ # $ awk 'NR>1 { print $2 }' probes.txt | sort | uniq | grep -v '[[:digit:]]' |
+ # dtrace |
+ # fbt |
+ # io |
+ # ip |
+ # lockstat |
+ # mach_trap |
+ # proc |
+ # profile |
+ # sched |
+ # syscall |
+ # tcp |
+ # vminfo |
+ # |
+ # 3. List of valid probes: |
+ # $ grep syscall probes.txt | less |
+ # or use dtrace directly: |
+ # $ sudo dtrace -l -P syscall | less |
+ |
D_CODE = """ |
proc:::start /trackedpid[ppid]/ { |
trackedpid[pid] = 1; |
current_processes += 1; |
- printf("%d %d:%d %s_%s(\\"%s\\", %d) = 0\\n", |
- logindex, ppid, pid, probeprov, probename, execname, |
+ printf("%d %d %s_%s(%d, \\"%s\\", %d)\\n", |
+ logindex, pid, probeprov, probename, |
+ ppid, |
+ execname, |
current_processes); |
logindex++; |
} |
proc:::exit /trackedpid[pid] && current_processes == 1/ { |
+ /* Last process is exiting. */ |
trackedpid[pid] = 0; |
current_processes -= 1; |
- printf("%d %d:%d %s_%s(\\"%s\\", %d) = 0\\n", |
- logindex, ppid, pid, probeprov, probename, execname, |
+ printf("%d %d %s_%s(%d)\\n", |
+ logindex, pid, probeprov, probename, |
current_processes); |
logindex++; |
exit(0); |
@@ -1020,17 +1056,19 @@ class Dtrace(ApiBase): |
proc:::exit /trackedpid[pid]/ { |
trackedpid[pid] = 0; |
current_processes -= 1; |
- printf("%d %d:%d %s_%s(\\"%s\\", %d) = 0\\n", |
- logindex, ppid, pid, probeprov, probename, execname, |
+ printf("%d %d %s_%s(%d)\\n", |
+ logindex, pid, probeprov, probename, |
current_processes); |
logindex++; |
} |
/* Finally what we care about! */ |
- syscall::exec*:entry /trackedpid[pid]/ { |
+ syscall::exec*:entry /trackedpid[pid] == 1/ { |
self->e_arg0 = copyinstr(arg0); |
/* Incrementally probe for a NULL in the argv parameter of execve() to |
* figure out argc. */ |
+ /* TODO(maruel): Skip the remaining copyin() when a NULL pointer was |
+ * found. */ |
self->argc = 0; |
self->argv = (user_addr_t*)copyin( |
arg1, sizeof(user_addr_t) * (self->argc + 1)); |
@@ -1052,6 +1090,15 @@ class Dtrace(ApiBase): |
self->argc = self->argv[self->argc] ? (self->argc + 1) : self->argc; |
self->argv = (user_addr_t*)copyin( |
arg1, sizeof(user_addr_t) * (self->argc + 1)); |
+ self->argc = self->argv[self->argc] ? (self->argc + 1) : self->argc; |
+ self->argv = (user_addr_t*)copyin( |
+ arg1, sizeof(user_addr_t) * (self->argc + 1)); |
+ self->argc = self->argv[self->argc] ? (self->argc + 1) : self->argc; |
+ self->argv = (user_addr_t*)copyin( |
+ arg1, sizeof(user_addr_t) * (self->argc + 1)); |
MAD
2012/06/28 13:04:57
I guess you can't use a loop for this... A bit sad
|
+ self->argc = self->argv[self->argc] ? (self->argc + 1) : self->argc; |
+ self->argv = (user_addr_t*)copyin( |
+ arg1, sizeof(user_addr_t) * (self->argc + 1)); |
/* Copy the inputs strings since there is no guarantee they'll be |
* present after the call completed. */ |
@@ -1066,7 +1113,7 @@ class Dtrace(ApiBase): |
self->args[8] = (self->argc > 8) ? copyinstr(self->argv[8]) : ""; |
self->args[9] = (self->argc > 9) ? copyinstr(self->argv[9]) : ""; |
} |
- syscall::exec*: /trackedpid[pid] && errno == 0/ { |
+ syscall::exec*:return /trackedpid[pid] == 1 && errno == 0/ { |
/* We need to join strings here, as using multiple printf() would cause |
* tearing when multiple threads/processes are traced. */ |
this->args = ""; |
@@ -1112,15 +1159,14 @@ class Dtrace(ApiBase): |
/* Prints self->argc to permits verifying the internal consistency since |
* this code is quite fishy. */ |
- printf("%d %d:%d %s(\\"%s\\", [%d%s]) = %d\\n", |
- logindex, ppid, pid, probefunc, |
+ printf("%d %d %s(\\"%s\\", [%d%s])\\n", |
+ logindex, pid, probefunc, |
self->e_arg0, |
self->argc, |
- this->args, |
- errno); |
+ this->args); |
logindex++; |
- |
- /* TODO(maruel): Clean up memory |
+ } |
+ syscall::exec*:return /trackedpid[pid] == 1/ { |
self->e_arg0 = 0; |
self->argc = 0; |
self->args[0] = 0; |
@@ -1133,46 +1179,89 @@ class Dtrace(ApiBase): |
self->args[7] = 0; |
self->args[8] = 0; |
self->args[9] = 0; |
- */ |
} |
- syscall::open*:entry /trackedpid[pid]/ { |
- self->arg0 = arg0; |
- self->arg1 = arg1; |
- self->arg2 = arg2; |
+ /* These are a good learning experience, since it traces a lot of things |
MAD
2012/06/28 13:04:57
These are -> This is
|
+ * related to the process and child processes. |
+ * Warning: it generates a gigantic log. For example, tracing |
+ * "data/trace_inputs/child1.py --child" generates a 2mb log and takes |
+ * several minutes to execute. |
+ */ |
+ /* |
+ mach_trap::: /trackedpid[pid] == 1 || trackedpid[ppid]/ { |
+ printf("%d %d %s_%s() = %d\\n", |
+ logindex, pid, probeprov, probefunc, errno); |
+ logindex++; |
} |
- syscall::open*:return /trackedpid[pid] && errno == 0/ { |
- printf("%d %d:%d %s(\\"%s\\", %d, %d) = %d\\n", |
- logindex, ppid, pid, probefunc, copyinstr(self->arg0), |
- self->arg1, self->arg2, errno); |
+ proc::: /trackedpid[pid] == 1 || trackedpid[ppid]/ { |
+ printf("%d %d %s_%s() = %d\\n", |
+ logindex, pid, probeprov, probefunc, errno); |
logindex++; |
- self->arg0 = 0; |
- self->arg1 = 0; |
- self->arg2 = 0; |
} |
+ sched::: /trackedpid[pid] == 1 || trackedpid[ppid]/ { |
+ printf("%d %d %s_%s() = %d\\n", |
+ logindex, pid, probeprov, probefunc, errno); |
+ logindex++; |
+ } |
+ syscall::: /trackedpid[pid] == 1 || trackedpid[ppid]/ { |
+ printf("%d %d %s_%s() = %d\\n", |
+ logindex, pid, probeprov, probefunc, errno); |
+ logindex++; |
+ } |
+ vminfo::: /trackedpid[pid] == 1 || trackedpid[ppid]/ { |
+ printf("%d %d %s_%s() = %d\\n", |
+ logindex, pid, probeprov, probefunc, errno); |
+ logindex++; |
+ } |
+ */ |
- syscall::rename:entry /trackedpid[pid]/ { |
- self->arg0 = arg0; |
- self->arg1 = arg1; |
+ syscall::open*:entry /trackedpid[pid] == 1/ { |
+ self->open_arg0 = arg0; |
+ self->open_arg1 = arg1; |
+ self->open_arg2 = arg2; |
} |
- syscall::rename:return /trackedpid[pid]/ { |
- printf("%d %d:%d %s(\\"%s\\", \\"%s\\") = %d\\n", |
- logindex, ppid, pid, probefunc, copyinstr(self->arg0), |
- copyinstr(self->arg1), errno); |
+ syscall::open*:return /trackedpid[pid] == 1 && errno == 0/ { |
+ printf("%d %d %s(\\"%s\\", 0x%x, 0x%x)\\n", |
+ logindex, pid, probefunc, |
+ copyinstr(self->open_arg0), |
+ self->open_arg1, |
+ self->open_arg2); |
logindex++; |
- self->arg0 = 0; |
- self->arg1 = 0; |
+ } |
+ syscall::open*:return /trackedpid[pid] == 1/ { |
+ self->open_arg0 = 0; |
+ self->open_arg1 = 0; |
+ self->open_arg2 = 0; |
+ } |
+ |
+ syscall::rename:entry /trackedpid[pid] == 1/ { |
+ self->rename_arg0 = arg0; |
+ self->rename_arg1 = arg1; |
+ } |
+ syscall::rename:return /trackedpid[pid] == 1 && errno == 0/ { |
+ printf("%d %d %s(\\"%s\\", \\"%s\\")\\n", |
+ logindex, pid, probefunc, |
+ copyinstr(self->rename_arg0), |
+ copyinstr(self->rename_arg1)); |
+ logindex++; |
+ } |
+ syscall::rename:return /trackedpid[pid] == 1/ { |
+ self->rename_arg0 = 0; |
+ self->rename_arg1 = 0; |
} |
/* Track chdir, it's painful because it is only receiving relative path */ |
- syscall::chdir:entry /trackedpid[pid]/ { |
- self->arg0 = arg0; |
+ syscall::chdir:entry /trackedpid[pid] == 1/ { |
+ self->chdir_arg0 = arg0; |
} |
- syscall::chdir:return /trackedpid[pid] && errno == 0/ { |
- printf("%d %d:%d %s(\\"%s\\") = %d\\n", |
- logindex, ppid, pid, probefunc, copyinstr(self->arg0), errno); |
+ syscall::chdir:return /trackedpid[pid] == 1 && errno == 0/ { |
+ printf("%d %d %s(\\"%s\\")\\n", |
+ logindex, pid, probefunc, |
+ copyinstr(self->chdir_arg0)); |
logindex++; |
- self->arg0 = 0; |
+ } |
+ syscall::chdir:return /trackedpid[pid] == 1/ { |
+ self->chdir_arg0 = 0; |
} |
/* TODO(maruel): *stat* functions and friends |
syscall::access:return, |
@@ -1203,12 +1292,14 @@ class Dtrace(ApiBase): |
"""Setups the D code to implement child process tracking. |
Injects the pid and the initial cwd into the trace header for context. |
- The reason is that the child process is already running at that point so: |
+ The reason is that the child process trace_child_process.py is already |
+ running at that point so: |
- no proc_start() is logged for it. |
- there is no way to figure out the absolute path of cwd in kernel on OSX |
Since the child process is already started, initialize current_processes to |
1. |
+ |
""" |
pid = str(pid) |
cwd = os.path.realpath(cwd).replace('\\', '\\\\').replace('%', '%%') |
@@ -1216,21 +1307,28 @@ class Dtrace(ApiBase): |
'dtrace:::BEGIN {\n' |
' current_processes = 1;\n' |
' logindex = 0;\n' |
- ' trackedpid[' + pid + '] = 1;\n' |
- ' printf("%d %d:%d %s_%s(\\"' + cwd + '\\") = 0\\n",\n' |
- ' logindex, ppid, ' + pid + ', probeprov, probename);\n' |
+ # trackedpid is an associative array where its value can be 0, 1 or 2. |
+ # 0 is for untracked processes and is the default value for items not |
+ # in the associative array. |
+ # 1 is for tracked processes. |
+ # 2 is for trace_child_process.py only. It is not tracked itself but |
+ # all its decendants are. |
+ ' trackedpid[' + pid + '] = 2;\n' |
+ ' printf("%d %d %s_%s(\\"' + cwd + '\\")\\n",\n' |
+ ' logindex, ' + pid + ', probeprov, probename);\n' |
' logindex++;\n' |
'}\n') + cls.D_CODE |
class Context(ApiBase.Context): |
- # This is the most common format. index pid function(args) = result |
- RE_HEADER = re.compile(r'^\d+ (\d+):(\d+) ([a-zA-Z_\-]+)\((.*?)\) = (.+)$') |
+ # This is the most common format. index pid function(args) |
+ RE_HEADER = re.compile(r'^\d+ (\d+) ([a-zA-Z_\-]+)\((.*?)\)$') |
# Arguments parsing. |
RE_DTRACE_BEGIN = re.compile(r'^\"(.+?)\"$') |
RE_CHDIR = re.compile(r'^\"(.+?)\"$') |
RE_EXECVE = re.compile(r'^\"(.+?)\", \[(\d+), (.+)\]$') |
- RE_OPEN = re.compile(r'^\"(.+?)\", (\d+), (-?\d+)$') |
+ RE_OPEN = re.compile(r'^\"(.+?)\", (0x[0-9a-z]+), (0x[0-9a-z]+)$') |
+ RE_PROC_START = re.compile(r'^(\d+), \"(.+?)\", (\d+)$') |
RE_RENAME = re.compile(r'^\"(.+?)\", \"(.+?)\"$') |
O_DIRECTORY = 0x100000 |
@@ -1251,14 +1349,11 @@ class Dtrace(ApiBase): |
assert match, line |
fn = getattr( |
self, |
- 'handle_%s' % match.group(3).replace('-', '_'), |
+ 'handle_%s' % match.group(2).replace('-', '_'), |
self._handle_ignored) |
return fn( |
int(match.group(1)), |
- int(match.group(2)), |
- match.group(3), |
- match.group(4), |
- match.group(5)) |
+ match.group(3)) |
def to_results(self): |
"""Uses self._initial_pid to determine the initial process.""" |
@@ -1267,20 +1362,21 @@ class Dtrace(ApiBase): |
sorted(self.processes), sorted(p.pid for p in process.all)) |
return Results(process) |
- def handle_dtrace_BEGIN(self, _ppid, pid, _function, args, _result): |
+ def handle_dtrace_BEGIN(self, pid, args): |
assert not self._tracer_pid and not self._initial_pid |
self._tracer_pid = pid |
self._initial_cwd = self.RE_DTRACE_BEGIN.match(args).group(1) |
- def handle_proc_start(self, ppid, pid, _function, _args, result): |
+ def handle_proc_start(self, pid, args): |
"""Transfers cwd. |
The dtrace script already takes care of only tracing the processes that |
are child of the traced processes so there is no need to verify the |
process hierarchy. |
""" |
- assert result == '0' |
assert pid not in self.processes |
+ match = self.RE_PROC_START.match(args) |
+ ppid = int(match.group(1)) |
assert (ppid == self._tracer_pid) != bool(self._initial_pid) |
if not self._initial_pid: |
self._initial_pid = pid |
@@ -1293,13 +1389,13 @@ class Dtrace(ApiBase): |
proc.cwd = cwd |
logging.debug('New child: %s -> %d' % (ppid, pid)) |
- def handle_proc_exit(self, _ppid, pid, _function, _args, _result): |
+ def handle_proc_exit(self, pid, _args): |
"""Removes cwd.""" |
if pid != self._tracer_pid: |
# self._tracer_pid is not traced itself. |
self.processes[pid].cwd = None |
- def handle_execve(self, _ppid, pid, _function, args, _result): |
+ def handle_execve(self, pid, args): |
"""Sets the process' executable. |
TODO(maruel): Read command line arguments. See |
@@ -1316,43 +1412,38 @@ class Dtrace(ApiBase): |
proc.command = process_quoted_arguments(match.group(3)) |
assert int(match.group(2)) == len(proc.command), args |
- def handle_chdir(self, _ppid, pid, _function, args, result): |
+ def handle_chdir(self, pid, args): |
"""Updates cwd.""" |
assert self._tracer_pid |
assert pid in self.processes |
- if result.startswith('0'): |
- cwd = self.RE_CHDIR.match(args).group(1) |
- if not cwd.startswith('/'): |
- cwd2 = os.path.join(self.processes[pid].cwd, cwd) |
- logging.debug('handle_chdir(%d, %s) -> %s' % (pid, cwd, cwd2)) |
- else: |
- logging.debug('handle_chdir(%d, %s)' % (pid, cwd)) |
- cwd2 = cwd |
- self.processes[pid].cwd = cwd2 |
+ cwd = self.RE_CHDIR.match(args).group(1) |
+ if not cwd.startswith('/'): |
+ cwd2 = os.path.join(self.processes[pid].cwd, cwd) |
+ logging.debug('handle_chdir(%d, %s) -> %s' % (pid, cwd, cwd2)) |
else: |
- assert False, 'Unexecpected fail: %s' % result |
+ logging.debug('handle_chdir(%d, %s)' % (pid, cwd)) |
+ cwd2 = cwd |
+ self.processes[pid].cwd = cwd2 |
- def handle_open_nocancel(self, ppid, pid, function, args, result): |
- return self.handle_open(ppid, pid, function, args, result) |
+ def handle_open_nocancel(self, pid, args): |
+ return self.handle_open(pid, args) |
- def handle_open(self, _ppid, pid, function, args, result): |
+ def handle_open(self, pid, args): |
match = self.RE_OPEN.match(args) |
- assert match, (pid, function, args, result) |
- args = match.groups() |
- flag = int(args[1]) |
+ assert match, (pid, args) |
+ flag = int(match.group(2), 16) |
if self.O_DIRECTORY & flag == self.O_DIRECTORY: |
# Ignore directories. |
return |
- self._handle_file(pid, args[0], result) |
+ self._handle_file(pid, match.group(1)) |
- def handle_rename(self, _ppid, pid, _function, args, result): |
- args = self.RE_RENAME.match(args).groups() |
- self._handle_file(pid, args[0], result) |
- self._handle_file(pid, args[1], result) |
+ def handle_rename(self, pid, args): |
+ match = self.RE_RENAME.match(args) |
+ assert match, (pid, args) |
+ self._handle_file(pid, match.group(1)) |
+ self._handle_file(pid, match.group(2)) |
- def _handle_file(self, pid, filepath, result): |
- if result.startswith(('-1', '2')): |
- return |
+ def _handle_file(self, pid, filepath): |
if not filepath.startswith('/'): |
filepath = os.path.join(self.processes[pid].cwd, filepath) |
# We can get '..' in the path. |
@@ -1364,8 +1455,9 @@ class Dtrace(ApiBase): |
self.processes[pid].add_file(filepath) |
@staticmethod |
- def _handle_ignored(_ppid, pid, function, args, result): |
- logging.debug('%d %s(%s) = %s' % (pid, function, args, result)) |
+ def _handle_ignored(pid, args): |
+ """Is called for all the event traces that are not handled.""" |
+ assert False, '%d ??(%s)' % (pid, args) |
@staticmethod |
def clean_trace(logname): |