Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 #!/usr/bin/python | 1 #!/usr/bin/python |
| 2 # Copyright 2013 The Chromium Authors. All rights reserved. | 2 # Copyright 2013 The Chromium Authors. All rights reserved. |
| 3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
| 4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
| 5 | 5 |
| 6 # Use: ../mergetraces.py `ls cyglog.* -Sr` > merged_cyglog | 6 # Use: ../mergetraces.py `ls cyglog.* -Sr` > merged_cyglog |
| 7 | 7 |
| 8 """"Merge multiple logs files from different processes into a single log. | 8 """"Merge multiple logs files from different processes into a single log. |
| 9 | 9 |
| 10 Given two log files of execution traces, merge the traces into a single trace. | 10 Given two log files of execution traces, merge the traces into a single trace. |
| 11 Merging will use timestamps (i.e. the first two columns of logged calls) to | 11 Merging will use timestamps (i.e. the first two columns of logged calls) to |
| 12 create a single log that is an ordered trace of calls by both processes. | 12 create a single log that is an ordered trace of calls by both processes. |
| 13 """ | 13 """ |
| 14 | 14 |
| 15 import optparse | 15 import optparse |
| 16 import os | |
| 17 import string | 16 import string |
| 18 import subprocess | |
| 19 import sys | 17 import sys |
| 20 | 18 |
| 21 def ParseLogLines(lines): | 19 def ParseLogLines(lines): |
| 22 """Parse log file lines. | 20 """Parse log file lines. |
| 23 | 21 |
| 24 Args: | 22 Args: |
| 25 lines: lines from log file produced by profiled run | 23 lines: lines from log file produced by profiled run |
| 26 | 24 |
| 27 Below is an example of a small log file: | 25 Below is an example of a small log file: |
| 28 5086e000-52e92000 r-xp 00000000 b3:02 51276 libchromeview.so | 26 5086e000-52e92000 r-xp 00000000 b3:02 51276 libchromeview.so |
| (...skipping 19 matching lines...) Expand all Loading... | |
| 48 vm_end = int (lines[0][dash_index+1:space_index], 16) | 46 vm_end = int (lines[0][dash_index+1:space_index], 16) |
| 49 for line in lines[2:]: | 47 for line in lines[2:]: |
| 50 line = line.strip() | 48 line = line.strip() |
| 51 # print hex (vm_start) | 49 # print hex (vm_start) |
| 52 fields = line.split() | 50 fields = line.split() |
| 53 call_lines.append (fields) | 51 call_lines.append (fields) |
| 54 | 52 |
| 55 return (call_lines, vm_start, vm_end) | 53 return (call_lines, vm_start, vm_end) |
| 56 | 54 |
| 57 def HasDuplicates(calls): | 55 def HasDuplicates(calls): |
| 58 """Funcition is a sanity check to make sure that calls are only logged once. | 56 """Makes sure that calls are only logged once. |
| 59 | 57 |
| 60 Args: | 58 Args: |
| 61 calls: list of calls logged | 59 calls: list of calls logged |
| 62 | 60 |
| 63 Returns: | 61 Returns: |
| 64 boolean indicating if calls has duplicate calls | 62 boolean indicating if calls has duplicate calls |
| 65 """ | 63 """ |
| 66 seen = [] | 64 seen = set([]) |
| 67 for call in calls: | 65 for call in calls: |
| 68 if call[3] in seen: | 66 if call[3] in seen: |
| 69 return true | 67 return True |
| 70 else: | 68 seen.add(call[3]) |
| 71 seen.append(call[3]) | 69 return False |
| 72 | 70 |
| 73 def CheckTimestamps(calls): | 71 def CheckTimestamps(calls): |
| 74 """Prints warning to stderr if the call timestamps are not in order. | 72 """Prints warning to stderr if the call timestamps are not in order. |
| 75 | 73 |
| 76 Args: | 74 Args: |
| 77 calls: list of calls logged | 75 calls: list of calls logged |
| 78 """ | 76 """ |
| 79 index = 0 | 77 index = 0 |
| 80 last_timestamp_secs = -1 | 78 last_timestamp_secs = -1 |
| 81 last_timestamp_us = -1 | 79 last_timestamp_us = -1 |
| (...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 130 tracemap: the tracemap | 128 tracemap: the tracemap |
| 131 trace: the trace | 129 trace: the trace |
| 132 | 130 |
| 133 """ | 131 """ |
| 134 for trace_entry in trace: | 132 for trace_entry in trace: |
| 135 call = trace_entry[3] | 133 call = trace_entry[3] |
| 136 if (not call in tracemap) or ( | 134 if (not call in tracemap) or ( |
| 137 Timestamp(tracemap[call]) > Timestamp(trace_entry)): | 135 Timestamp(tracemap[call]) > Timestamp(trace_entry)): |
| 138 tracemap[call] = trace_entry | 136 tracemap[call] = trace_entry |
| 139 | 137 |
| 138 def GroupByProcessAndThreadId(sorted_trace): | |
|
pasko
2014/06/05 12:16:36
I still think my function is easier to read. It pa
Philippe
2014/06/05 12:38:52
Right :) I guess I had to see it to believe it. Th
| |
| 139 """Returns an array of traces grouped by pid and tid. | |
| 140 | |
| 141 This is used to make the order of functions not depend on thread-scheduling | |
| 142 which can be greatly impacted when profiling is done with cygprofile. As a | |
| 143 result each thread has its own contiguous segment of code (ordered by | |
| 144 timestamp) and processes also have their code isolated (i.e. not interleaved). | |
| 145 | |
| 146 This works by building a forest internally with three levels: | |
| 147 <PID 1> ..........................<PID N0> | |
| 148 / / | |
| 149 <TID 1:1>...<TID 1:N1> <TID N0:1>...<TID N0:N''> | |
| 150 / | |
| 151 <Trace 1:1:1>..<Trace 1:1:N2> | |
| 152 | |
| 153 The nodes are inserted in the forest by order of occurence in |sorted_trace| | |
| 154 which is expected to be sorted by timestamp. | |
| 155 | |
| 156 This forest is then flattened and returned as in: | |
| 157 <TID 1:1> <TID 1:2>...<TID 1:N>...<TID N':1> <TID N':2>...<TID N':N''> | |
| 158 """ | |
| 159 # Array of (pid, [(tid, [trace])]). Not that arrays are used (as opposed to | |
| 160 # (un)ordered maps) to preserve order. | |
| 161 trace_arrays_for_processes = [] | |
| 162 | |
| 163 def _GetTraceArray(pid, tid): | |
|
pasko
2014/06/05 11:28:49
This code operates on lists, there are no arrays.
Philippe
2014/06/05 12:38:52
Done.
| |
| 164 # TODO(pliard): This may require a cache if the linear search below turns | |
| 165 # out to be too slow. | |
| 166 found_process_index = -1 | |
|
pasko
2014/06/05 11:28:49
I am not a Python expert, but it seems more python
Philippe
2014/06/05 12:38:52
Done.
| |
| 167 | |
| 168 for i, (current_pid, current_trace_arrays) in enumerate( | |
| 169 trace_arrays_for_processes): | |
| 170 if current_pid != pid: | |
| 171 continue | |
| 172 | |
| 173 found_process_index = i | |
| 174 for (current_tid, trace_array) in current_trace_arrays: | |
| 175 if current_tid == tid: | |
| 176 return trace_array | |
| 177 break | |
| 178 | |
| 179 process_pair = None | |
|
pasko
2014/06/05 11:28:49
I would appreciate a comment hee saying something
Philippe
2014/06/05 12:38:52
Done.
| |
| 180 if found_process_index == -1: | |
| 181 process_pair = (pid, []) | |
| 182 trace_arrays_for_processes.append(process_pair) | |
| 183 else: | |
| 184 process_pair = trace_arrays_for_processes[found_process_index] | |
| 185 | |
| 186 (_, trace_arrays) = process_pair | |
| 187 new_array = [] | |
| 188 trace_arrays.append((tid, new_array)) | |
| 189 return new_array | |
| 190 | |
| 191 # Build the forest. | |
| 192 for trace in sorted_trace: | |
| 193 tokens = trace[2].split(':') | |
| 194 (pid, tid) = (int(tokens[0]), int(tokens[1])) | |
| 195 _GetTraceArray(pid, tid).append(trace) | |
| 196 | |
| 197 # Flatten the forest. | |
| 198 flat_trace_array = [] | |
| 199 for (_, current_trace_arrays) in trace_arrays_for_processes: | |
| 200 for (_, trace_array) in current_trace_arrays: | |
| 201 for trace in trace_array: | |
| 202 flat_trace_array.append(trace) | |
| 203 | |
| 204 assert not HasDuplicates(flat_trace_array) | |
| 205 | |
| 206 return flat_trace_array | |
| 207 | |
| 140 def main(): | 208 def main(): |
| 141 """Merge two traces for code in specified library and write to stdout. | 209 """Merge two traces for code in specified library and write to stdout. |
| 142 | 210 |
| 143 Merges the two traces and coverts the virtual addresses to the offsets in the | 211 Merges the two traces and coverts the virtual addresses to the offsets in the |
| 144 library. First line of merged trace has dummy virtual address of 0-ffffffff | 212 library. First line of merged trace has dummy virtual address of 0-ffffffff |
| 145 so that symbolizing the addresses uses the addresses in the log, since the | 213 so that symbolizing the addresses uses the addresses in the log, since the |
| 146 addresses have already been converted to static offsets. | 214 addresses have already been converted to static offsets. |
| 147 """ | 215 """ |
| 148 parser = optparse.OptionParser('usage: %prog trace1 ... traceN') | 216 parser = optparse.OptionParser('usage: %prog trace1 ... traceN') |
| 149 (_, args) = parser.parse_args() | 217 (_, args) = parser.parse_args() |
| 150 if len(args) <= 1: | 218 if len(args) <= 1: |
| 151 parser.error('expected at least the following args: trace1 trace2') | 219 parser.error('expected at least the following args: trace1 trace2') |
| 152 | 220 |
| 153 step = 0 | 221 step = 0 |
| 222 | |
| 223 # Maps function addresses to their corresponding trace entry. | |
| 154 tracemap = dict() | 224 tracemap = dict() |
| 225 | |
| 155 for trace_file in args: | 226 for trace_file in args: |
| 156 step += 1 | 227 step += 1 |
| 157 sys.stderr.write(" " + str(step) + "/" + str(len(args)) + | 228 sys.stderr.write(" " + str(step) + "/" + str(len(args)) + |
| 158 ": " + trace_file + ":\n") | 229 ": " + trace_file + ":\n") |
| 159 | 230 |
| 160 trace_lines = map(string.rstrip, open(trace_file).readlines()) | 231 trace_lines = map(string.rstrip, open(trace_file).readlines()) |
| 161 (trace_calls, trace_start, trace_end) = ParseLogLines(trace_lines) | 232 (trace_calls, trace_start, trace_end) = ParseLogLines(trace_lines) |
| 162 CheckTimestamps(trace_calls) | 233 CheckTimestamps(trace_calls) |
| 163 sys.stderr.write("Len: " + str(len(trace_calls)) + | 234 sys.stderr.write("Len: " + str(len(trace_calls)) + |
| 164 ". Start: " + hex(trace_start) + | 235 ". Start: " + hex(trace_start) + |
| 165 ", end: " + hex(trace_end) + '\n') | 236 ", end: " + hex(trace_end) + '\n') |
| 166 | 237 |
| 167 trace_calls = Convert(trace_calls, trace_start, trace_end) | 238 trace_calls = Convert(trace_calls, trace_start, trace_end) |
| 168 sys.stderr.write("Converted len: " + str(len(trace_calls)) + "\n") | 239 sys.stderr.write("Converted len: " + str(len(trace_calls)) + "\n") |
| 169 | 240 |
| 170 AddTrace(tracemap, trace_calls) | 241 AddTrace(tracemap, trace_calls) |
| 171 sys.stderr.write("Merged len: " + str(len(tracemap)) + "\n") | 242 sys.stderr.write("Merged len: " + str(len(tracemap)) + "\n") |
| 172 | 243 |
| 173 # Extract the resulting trace from the tracemap | 244 # Extract the resulting trace from the tracemap |
| 174 merged_trace = [] | 245 merged_trace = [] |
| 175 for call in tracemap: | 246 for call in tracemap: |
| 176 merged_trace.append(tracemap[call]) | 247 merged_trace.append(tracemap[call]) |
| 177 merged_trace.sort(key=Timestamp) | 248 merged_trace.sort(key=Timestamp) |
| 178 | 249 |
| 250 grouped_trace = GroupByProcessAndThreadId(merged_trace) | |
| 251 merged_trace = None | |
| 252 | |
| 179 print "0-ffffffff r-xp 00000000 xx:00 00000 ./" | 253 print "0-ffffffff r-xp 00000000 xx:00 00000 ./" |
| 180 print "secs\tusecs\tpid:threadid\tfunc" | 254 print "secs\tusecs\tpid:threadid\tfunc" |
| 181 for call in merged_trace: | 255 for call in grouped_trace: |
| 182 print (str(call[0]) + "\t" + str(call[1]) + "\t" + call[2] + "\t" + | 256 print (str(call[0]) + "\t" + str(call[1]) + "\t" + call[2] + "\t" + |
| 183 hex(call[3])) | 257 hex(call[3])) |
| 184 | 258 |
| 185 if __name__ == '__main__': | 259 if __name__ == '__main__': |
| 186 main() | 260 main() |
| OLD | NEW |