tools/cygprofile/mergetraces.py - Issue 281093002: Make cygprofile order functions by process and thread ID.

Unified Diff: tools/cygprofile/mergetraces.py

Issue 281093002: Make cygprofile order functions by process and thread ID. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Created 6 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: tools/cygprofile/mergetraces.py

diff --git a/tools/cygprofile/mergetraces.py b/tools/cygprofile/mergetraces.py

index 1c7627a8dff507408a3bcee8af0a030a45d8f6c3..e6d41095d18738fc42bc573208107eb7e95c3f7e 100755

--- a/tools/cygprofile/mergetraces.py

+++ b/tools/cygprofile/mergetraces.py

@@ -13,9 +13,7 @@ create a single log that is an ordered trace of calls by both processes.

"""

import optparse

-import os

import string

-import subprocess

import sys

def ParseLogLines(lines):

@@ -55,7 +53,7 @@ def ParseLogLines(lines):

return (call_lines, vm_start, vm_end)

def HasDuplicates(calls):

- """Funcition is a sanity check to make sure that calls are only logged once.

+ """Makes sure that calls are only logged once.

Args:

calls: list of calls logged

@@ -63,12 +61,12 @@ def HasDuplicates(calls):

Returns:

boolean indicating if calls has duplicate calls

"""

- seen = []

+ seen = set([])

for call in calls:

if call[3] in seen:

- return true

- else:

- seen.append(call[3])

+ return True

+ seen.add(call[3])

+ return False

def CheckTimestamps(calls):

"""Prints warning to stderr if the call timestamps are not in order.

@@ -137,6 +135,76 @@ def AddTrace (tracemap, trace):

Timestamp(tracemap[call]) > Timestamp(trace_entry)):

tracemap[call] = trace_entry

+def GroupByProcessAndThreadId(sorted_trace):

pasko 2014/06/05 12:16:36 I still think my function is easier to read. It pa

Philippe 2014/06/05 12:38:52 Right :) I guess I had to see it to believe it. Th

+ """Returns an array of traces grouped by pid and tid.

+ This is used to make the order of functions not depend on thread-scheduling

+ which can be greatly impacted when profiling is done with cygprofile. As a

+ result each thread has its own contiguous segment of code (ordered by

+ timestamp) and processes also have their code isolated (i.e. not interleaved).

+ This works by building a forest internally with three levels:

+ <PID 1> ..........................<PID N0>

+ / /

+ <TID 1:1>...<TID 1:N1> <TID N0:1>...<TID N0:N''>

+ /

+ <Trace 1:1:1>..<Trace 1:1:N2>

+ The nodes are inserted in the forest by order of occurence in |sorted_trace|

+ which is expected to be sorted by timestamp.

+ This forest is then flattened and returned as in:

+ <TID 1:1> <TID 1:2>...<TID 1:N>...<TID N':1> <TID N':2>...<TID N':N''>

+ """

+ # Array of (pid, [(tid, [trace])]). Not that arrays are used (as opposed to

+ # (un)ordered maps) to preserve order.

+ trace_arrays_for_processes = []

+ def _GetTraceArray(pid, tid):

pasko 2014/06/05 11:28:49 This code operates on lists, there are no arrays.

Philippe 2014/06/05 12:38:52 Done.

+ # TODO(pliard): This may require a cache if the linear search below turns

+ # out to be too slow.

+ found_process_index = -1

pasko 2014/06/05 11:28:49 I am not a Python expert, but it seems more python

Philippe 2014/06/05 12:38:52 Done.

+ for i, (current_pid, current_trace_arrays) in enumerate(

+ trace_arrays_for_processes):

+ if current_pid != pid:

+ continue

+ found_process_index = i

+ for (current_tid, trace_array) in current_trace_arrays:

+ if current_tid == tid:

+ return trace_array

+ break

+ process_pair = None

pasko 2014/06/05 11:28:49 I would appreciate a comment hee saying something

Philippe 2014/06/05 12:38:52 Done.

+ if found_process_index == -1:

+ process_pair = (pid, [])

+ trace_arrays_for_processes.append(process_pair)

+ else:

+ process_pair = trace_arrays_for_processes[found_process_index]

+ (_, trace_arrays) = process_pair

+ new_array = []

+ trace_arrays.append((tid, new_array))

+ return new_array

+ # Build the forest.

+ for trace in sorted_trace:

+ tokens = trace[2].split(':')

+ (pid, tid) = (int(tokens[0]), int(tokens[1]))

+ _GetTraceArray(pid, tid).append(trace)

+ # Flatten the forest.

+ flat_trace_array = []

+ for (_, current_trace_arrays) in trace_arrays_for_processes:

+ for (_, trace_array) in current_trace_arrays:

+ for trace in trace_array:

+ flat_trace_array.append(trace)

+ assert not HasDuplicates(flat_trace_array)

+ return flat_trace_array

def main():

"""Merge two traces for code in specified library and write to stdout.

@@ -151,7 +219,10 @@ def main():

parser.error('expected at least the following args: trace1 trace2')

step = 0

+ # Maps function addresses to their corresponding trace entry.

tracemap = dict()

for trace_file in args:

step += 1

sys.stderr.write(" " + str(step) + "/" + str(len(args)) +

@@ -176,9 +247,12 @@ def main():

merged_trace.append(tracemap[call])

merged_trace.sort(key=Timestamp)

+ grouped_trace = GroupByProcessAndThreadId(merged_trace)

+ merged_trace = None

print "0-ffffffff r-xp 00000000 xx:00 00000 ./"

print "secs\tusecs\tpid:threadid\tfunc"

- for call in merged_trace:

+ for call in grouped_trace:

print (str(call[0]) + "\t" + str(call[1]) + "\t" + call[2] + "\t" +

hex(call[3]))

« no previous file with comments | « tools/cygprofile/PRESUBMIT.py ('k') | tools/cygprofile/mergetraces_unittest.py » ('j') | no next file with comments »