Chromium Code Reviews| Index: tools/cygprofile/mergetraces.py |
| diff --git a/tools/cygprofile/mergetraces.py b/tools/cygprofile/mergetraces.py |
| index 1c7627a8dff507408a3bcee8af0a030a45d8f6c3..e6d41095d18738fc42bc573208107eb7e95c3f7e 100755 |
| --- a/tools/cygprofile/mergetraces.py |
| +++ b/tools/cygprofile/mergetraces.py |
| @@ -13,9 +13,7 @@ create a single log that is an ordered trace of calls by both processes. |
| """ |
| import optparse |
| -import os |
| import string |
| -import subprocess |
| import sys |
| def ParseLogLines(lines): |
| @@ -55,7 +53,7 @@ def ParseLogLines(lines): |
| return (call_lines, vm_start, vm_end) |
| def HasDuplicates(calls): |
| - """Funcition is a sanity check to make sure that calls are only logged once. |
| + """Makes sure that calls are only logged once. |
| Args: |
| calls: list of calls logged |
| @@ -63,12 +61,12 @@ def HasDuplicates(calls): |
| Returns: |
| boolean indicating if calls has duplicate calls |
| """ |
| - seen = [] |
| + seen = set([]) |
| for call in calls: |
| if call[3] in seen: |
| - return true |
| - else: |
| - seen.append(call[3]) |
| + return True |
| + seen.add(call[3]) |
| + return False |
| def CheckTimestamps(calls): |
| """Prints warning to stderr if the call timestamps are not in order. |
| @@ -137,6 +135,76 @@ def AddTrace (tracemap, trace): |
| Timestamp(tracemap[call]) > Timestamp(trace_entry)): |
| tracemap[call] = trace_entry |
| +def GroupByProcessAndThreadId(sorted_trace): |
|
pasko
2014/06/05 12:16:36
I still think my function is easier to read. It pa
Philippe
2014/06/05 12:38:52
Right :) I guess I had to see it to believe it. Th
|
| + """Returns an array of traces grouped by pid and tid. |
| + |
| + This is used to make the order of functions not depend on thread-scheduling |
| + which can be greatly impacted when profiling is done with cygprofile. As a |
| + result each thread has its own contiguous segment of code (ordered by |
| + timestamp) and processes also have their code isolated (i.e. not interleaved). |
| + |
| + This works by building a forest internally with three levels: |
| + <PID 1> ..........................<PID N0> |
| + / / |
| + <TID 1:1>...<TID 1:N1> <TID N0:1>...<TID N0:N''> |
| + / |
| + <Trace 1:1:1>..<Trace 1:1:N2> |
| + |
| + The nodes are inserted in the forest by order of occurence in |sorted_trace| |
| + which is expected to be sorted by timestamp. |
| + |
| + This forest is then flattened and returned as in: |
| + <TID 1:1> <TID 1:2>...<TID 1:N>...<TID N':1> <TID N':2>...<TID N':N''> |
| + """ |
| + # Array of (pid, [(tid, [trace])]). Not that arrays are used (as opposed to |
| + # (un)ordered maps) to preserve order. |
| + trace_arrays_for_processes = [] |
| + |
| + def _GetTraceArray(pid, tid): |
|
pasko
2014/06/05 11:28:49
This code operates on lists, there are no arrays.
Philippe
2014/06/05 12:38:52
Done.
|
| + # TODO(pliard): This may require a cache if the linear search below turns |
| + # out to be too slow. |
| + found_process_index = -1 |
|
pasko
2014/06/05 11:28:49
I am not a Python expert, but it seems more python
Philippe
2014/06/05 12:38:52
Done.
|
| + |
| + for i, (current_pid, current_trace_arrays) in enumerate( |
| + trace_arrays_for_processes): |
| + if current_pid != pid: |
| + continue |
| + |
| + found_process_index = i |
| + for (current_tid, trace_array) in current_trace_arrays: |
| + if current_tid == tid: |
| + return trace_array |
| + break |
| + |
| + process_pair = None |
|
pasko
2014/06/05 11:28:49
I would appreciate a comment hee saying something
Philippe
2014/06/05 12:38:52
Done.
|
| + if found_process_index == -1: |
| + process_pair = (pid, []) |
| + trace_arrays_for_processes.append(process_pair) |
| + else: |
| + process_pair = trace_arrays_for_processes[found_process_index] |
| + |
| + (_, trace_arrays) = process_pair |
| + new_array = [] |
| + trace_arrays.append((tid, new_array)) |
| + return new_array |
| + |
| + # Build the forest. |
| + for trace in sorted_trace: |
| + tokens = trace[2].split(':') |
| + (pid, tid) = (int(tokens[0]), int(tokens[1])) |
| + _GetTraceArray(pid, tid).append(trace) |
| + |
| + # Flatten the forest. |
| + flat_trace_array = [] |
| + for (_, current_trace_arrays) in trace_arrays_for_processes: |
| + for (_, trace_array) in current_trace_arrays: |
| + for trace in trace_array: |
| + flat_trace_array.append(trace) |
| + |
| + assert not HasDuplicates(flat_trace_array) |
| + |
| + return flat_trace_array |
| + |
| def main(): |
| """Merge two traces for code in specified library and write to stdout. |
| @@ -151,7 +219,10 @@ def main(): |
| parser.error('expected at least the following args: trace1 trace2') |
| step = 0 |
| + |
| + # Maps function addresses to their corresponding trace entry. |
| tracemap = dict() |
| + |
| for trace_file in args: |
| step += 1 |
| sys.stderr.write(" " + str(step) + "/" + str(len(args)) + |
| @@ -176,9 +247,12 @@ def main(): |
| merged_trace.append(tracemap[call]) |
| merged_trace.sort(key=Timestamp) |
| + grouped_trace = GroupByProcessAndThreadId(merged_trace) |
| + merged_trace = None |
| + |
| print "0-ffffffff r-xp 00000000 xx:00 00000 ./" |
| print "secs\tusecs\tpid:threadid\tfunc" |
| - for call in merged_trace: |
| + for call in grouped_trace: |
| print (str(call[0]) + "\t" + str(call[1]) + "\t" + call[2] + "\t" + |
| hex(call[3])) |