Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(67)

Unified Diff: tools/ll_prof.py

Issue 3831002: Support profiling based on linux kernel performance events. (Closed)
Patch Set: More fixes Created 10 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/platform-win32.cc ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: tools/ll_prof.py
diff --git a/tools/ll_prof.py b/tools/ll_prof.py
new file mode 100755
index 0000000000000000000000000000000000000000..563084ddfca1b901896b71b5316fa0ab59c23299
--- /dev/null
+++ b/tools/ll_prof.py
@@ -0,0 +1,955 @@
+#!/usr/bin/env python
+#
+# Copyright 2010 the V8 project authors. All rights reserved.
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials provided
+# with the distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import bisect
+import collections
+import ctypes
+import mmap
+import optparse
+import os
+import re
+import subprocess
+import sys
+import tempfile
+import time
+
+
+USAGE="""usage: %prog [OPTION]...
+
+Analyses V8 and perf logs to produce profiles.
+
+Perf logs can be collected using a command like:
+ $ perf record -R -e cycles -c 10000 -f -i ./shell bench.js --ll-prof
+ # -R: collect all data
+ # -e cycles: use cpu-cycles event (run "perf list" for details)
+ # -c 10000: write a sample after each 10000 events
+ # -f: force output file overwrite
+ # -i: limit profiling to our process and the kernel
+ # --ll-prof shell flag enables the right V8 logs
+This will produce a binary trace file (perf.data) that %prog can analyse.
+
+Examples:
+ # Print flat profile with annotated disassembly for the 10 top
+ # symbols. Use default log names and include the snapshot log.
+ $ %prog --snapshot --disasm-top=10
+
+ # Print flat profile with annotated disassembly for all used symbols.
+ # Use default log names and include kernel symbols into analysis.
+ $ %prog --disasm-all --kernel
+
+ # Print flat profile. Use custom log names.
+ $ %prog --log=foo.log --snapshot-log=snap-foo.log --trace=foo.data --snapshot
+"""
+
+
+# Must match kGcFakeMmap.
+V8_GC_FAKE_MMAP = "/tmp/__v8_gc__"
+
+JS_ORIGIN = "js"
+JS_SNAPSHOT_ORIGIN = "js-snapshot"
+
+# Avoid using the slow (google-specific) wrapper around objdump.
+OBJDUMP_BIN = "/usr/bin/objdump"
+if not os.path.exists(OBJDUMP_BIN):
+ OBJDUMP_BIN = "objdump"
+
+
+class Code(object):
+ """Code object."""
+
+ _COMMON_DISASM_OPTIONS = ["-M", "intel-mnemonic", "-C"]
+
+ _DISASM_HEADER_RE = re.compile(r"[a-f0-9]+\s+<.*:$")
+ _DISASM_LINE_RE = re.compile(r"\s*([a-f0-9]+):.*")
+
+ # Keys must match constants in Logger::LogCodeInfo.
+ _ARCH_MAP = {
+ "ia32": "-m i386",
+ "x64": "-m i386 -M x86-64",
+ "arm": "-m arm" # Not supported by our objdump build.
+ }
+
+ _id = 0
+
+ def __init__(self, name, start_address, end_address, origin, origin_offset):
+ self.id = Code._id
+ Code._id += 1
+ self.name = name
+ self.other_names = None
+ self.start_address = start_address
+ self.end_address = end_address
+ self.origin = origin
+ self.origin_offset = origin_offset
+ self.self_ticks = 0
+ self.self_ticks_map = None
+ self.callee_ticks = None
+
+ def AddName(self, name):
+ assert self.name != name
+ if self.other_names is None:
+ self.other_names = [name]
+ return
+ if not name in self.other_names:
+ self.other_names.append(name)
+
+ def FullName(self):
+ if self.other_names is None:
+ return self.name
+ self.other_names.sort()
+ return "%s (aka %s)" % (self.name, ", ".join(self.other_names))
+
+ def IsUsed(self):
+ return self.self_ticks > 0 or self.callee_ticks is not None
+
+ def Tick(self, pc):
+ self.self_ticks += 1
+ if self.self_ticks_map is None:
+ self.self_ticks_map = collections.defaultdict(lambda: 0)
+ offset = pc - self.start_address
+ self.self_ticks_map[offset] += 1
+
+ def CalleeTick(self, callee):
+ if self.callee_ticks is None:
+ self.callee_ticks = collections.defaultdict(lambda: 0)
+ self.callee_ticks[callee] += 1
+
+ def PrintAnnotated(self, code_info, options):
+ if self.self_ticks_map is None:
+ ticks_map = []
+ else:
+ ticks_map = self.self_ticks_map.items()
+ # Convert the ticks map to offsets and counts arrays so that later
+ # we can do binary search in the offsets array.
+ ticks_map.sort(key=lambda t: t[0])
+ ticks_offsets = [t[0] for t in ticks_map]
+ ticks_counts = [t[1] for t in ticks_map]
+ # Get a list of disassembled lines and their addresses.
+ lines = []
+ for line in self._GetDisasmLines(code_info, options):
+ match = Code._DISASM_LINE_RE.match(line)
+ if match:
+ line_address = int(match.group(1), 16)
+ lines.append((line_address, line))
+ if len(lines) == 0:
+ return
+ # Print annotated lines.
+ address = lines[0][0]
+ total_count = 0
+ for i in xrange(len(lines)):
+ start_offset = lines[i][0] - address
+ if i == len(lines) - 1:
+ end_offset = self.end_address - self.start_address
+ else:
+ end_offset = lines[i + 1][0] - address
+ # Ticks (reported pc values) are not always precise, i.e. not
+ # necessarily point at instruction starts. So we have to search
+ # for ticks that touch the current instruction line.
+ j = bisect.bisect_left(ticks_offsets, end_offset)
+ count = 0
+ for offset, cnt in reversed(zip(ticks_offsets[:j], ticks_counts[:j])):
+ if offset < start_offset:
+ break
+ count += cnt
+ total_count += count
+ count = 100.0 * count / self.self_ticks
+ if count >= 0.01:
+ print "%15.2f %s" % (count, lines[i][1])
+ else:
+ print "%s %s" % (" " * 15, lines[i][1])
+ print
+ assert total_count == self.self_ticks, \
+ "Lost ticks (%d != %d) in %s" % (total_count, self.self_ticks, self)
+
+ def __str__(self):
+ return "%s [0x%x, 0x%x) size: %d origin: %s" % (
+ self.name,
+ self.start_address,
+ self.end_address,
+ self.end_address - self.start_address,
+ self.origin)
+
+ def _GetDisasmLines(self, code_info, options):
+ tmp_name = None
+ if self.origin == JS_ORIGIN or self.origin == JS_SNAPSHOT_ORIGIN:
+ assert code_info.arch in Code._ARCH_MAP, \
+ "Unsupported architecture '%s'" % arch
+ arch_flags = Code._ARCH_MAP[code_info.arch]
+ # Create a temporary file just with this code object.
+ tmp_name = tempfile.mktemp(".v8code")
+ size = self.end_address - self.start_address
+ command = "dd if=%s.code of=%s bs=1 count=%d skip=%d && " \
+ "%s %s -D -b binary %s %s" % (
+ options.log, tmp_name, size, self.origin_offset,
+ OBJDUMP_BIN, ' '.join(Code._COMMON_DISASM_OPTIONS), arch_flags,
+ tmp_name)
+ else:
+ command = "%s %s --start-address=%d --stop-address=%d -d %s " % (
+ OBJDUMP_BIN, ' '.join(Code._COMMON_DISASM_OPTIONS),
+ self.origin_offset,
+ self.origin_offset + self.end_address - self.start_address,
+ self.origin)
+ process = subprocess.Popen(command,
+ shell=True,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.STDOUT)
+ out, err = process.communicate()
+ lines = out.split("\n")
+ header_line = 0
+ for i, line in enumerate(lines):
+ if Code._DISASM_HEADER_RE.match(line):
+ header_line = i
+ break
+ if tmp_name:
+ os.unlink(tmp_name)
+ return lines[header_line + 1:]
+
+
+class CodePage(object):
+ """Group of adjacent code objects."""
+
+ SHIFT = 12 # 4K pages
+ SIZE = (1 << SHIFT)
+ MASK = ~(SIZE - 1)
+
+ @staticmethod
+ def PageAddress(address):
+ return address & CodePage.MASK
+
+ @staticmethod
+ def PageId(address):
+ return address >> CodePage.SHIFT
+
+ @staticmethod
+ def PageAddressFromId(id):
+ return id << CodePage.SHIFT
+
+ def __init__(self, address):
+ self.address = address
+ self.code_objects = []
+
+ def Add(self, code):
+ self.code_objects.append(code)
+
+ def Remove(self, code):
+ self.code_objects.remove(code)
+
+ def Find(self, pc):
+ code_objects = self.code_objects
+ for i, code in enumerate(code_objects):
+ if code.start_address <= pc < code.end_address:
+ code_objects[0], code_objects[i] = code, code_objects[0]
+ return code
+ return None
+
+ def __iter__(self):
+ return self.code_objects.__iter__()
+
+
+class CodeMap(object):
+ """Code object map."""
+
+ def __init__(self):
+ self.pages = {}
+ self.min_address = 1 << 64
+ self.max_address = -1
+
+ def Add(self, code, max_pages=-1):
+ page_id = CodePage.PageId(code.start_address)
+ limit_id = CodePage.PageId(code.end_address + CodePage.SIZE - 1)
+ pages = 0
+ while page_id < limit_id:
+ if max_pages >= 0 and pages > max_pages:
+ print >>sys.stderr, \
+ "Warning: page limit (%d) reached for %s [%s]" % (
+ max_pages, code.name, code.origin)
+ break
+ if page_id in self.pages:
+ page = self.pages[page_id]
+ else:
+ page = CodePage(CodePage.PageAddressFromId(page_id))
+ self.pages[page_id] = page
+ page.Add(code)
+ page_id += 1
+ pages += 1
+ self.min_address = min(self.min_address, code.start_address)
+ self.max_address = max(self.max_address, code.end_address)
+
+ def Remove(self, code):
+ page_id = CodePage.PageId(code.start_address)
+ limit_id = CodePage.PageId(code.end_address + CodePage.SIZE - 1)
+ removed = False
+ while page_id < limit_id:
+ if page_id not in self.pages:
+ page_id += 1
+ continue
+ page = self.pages[page_id]
+ page.Remove(code)
+ removed = True
+ page_id += 1
+ return removed
+
+ def AllCode(self):
+ for page in self.pages.itervalues():
+ for code in page:
+ if CodePage.PageAddress(code.start_address) == page.address:
+ yield code
+
+ def UsedCode(self):
+ for code in self.AllCode():
+ if code.IsUsed():
+ yield code
+
+ def Print(self):
+ for code in self.AllCode():
+ print code
+
+ def Find(self, pc):
+ if pc < self.min_address or pc >= self.max_address:
+ return None
+ page_id = CodePage.PageId(pc)
+ if page_id not in self.pages:
+ return None
+ return self.pages[page_id].Find(pc)
+
+
+class CodeInfo(object):
+ """Generic info about generated code objects."""
+
+ def __init__(self, arch, header_size):
+ self.arch = arch
+ self.header_size = header_size
+
+
+class CodeLogReader(object):
+ """V8 code event log reader."""
+
+ _CODE_INFO_RE = re.compile(
+ r"code-info,([^,]+),(\d+)")
+
+ _CODE_CREATE_RE = re.compile(
+ r"code-creation,([^,]+),(0x[a-f0-9]+),(\d+),\"([^\"]*)\"(?:,(\d+))?")
+
+ _CODE_MOVE_RE = re.compile(
+ r"code-move,(0x[a-f0-9]+),(0x[a-f0-9]+)")
+
+ _CODE_DELETE_RE = re.compile(
+ r"code-delete,(0x[a-f0-9]+)")
+
+ _SNAPSHOT_POS_RE = re.compile(
+ r"snapshot-pos,(0x[a-f0-9]+),(\d+)")
+
+ _CODE_MOVING_GC = "code-moving-gc"
+
+ def __init__(self, log_name, code_map, is_snapshot, snapshot_pos_to_name):
+ self.log = open(log_name, "r")
+ self.code_map = code_map
+ self.is_snapshot = is_snapshot
+ self.snapshot_pos_to_name = snapshot_pos_to_name
+ self.address_to_snapshot_name = {}
+
+ def ReadCodeInfo(self):
+ line = self.log.readline() or ""
+ match = CodeLogReader._CODE_INFO_RE.match(line)
+ assert match, "No code info in log"
+ return CodeInfo(arch=match.group(1), header_size=int(match.group(2)))
+
+ def ReadUpToGC(self, code_info):
+ made_progress = False
+ code_header_size = code_info.header_size
+ while True:
+ line = self.log.readline()
+ if not line:
+ return made_progress
+ made_progress = True
+
+ if line.startswith(CodeLogReader._CODE_MOVING_GC):
+ self.address_to_snapshot_name.clear()
+ return made_progress
+
+ match = CodeLogReader._CODE_CREATE_RE.match(line)
+ if match:
+ start_address = int(match.group(2), 16) + code_header_size
+ end_address = start_address + int(match.group(3)) - code_header_size
+ if start_address in self.address_to_snapshot_name:
+ name = self.address_to_snapshot_name[start_address]
+ origin = JS_SNAPSHOT_ORIGIN
+ else:
+ name = "%s:%s" % (match.group(1), match.group(4))
+ origin = JS_ORIGIN
+ if self.is_snapshot:
+ origin_offset = 0
+ else:
+ origin_offset = int(match.group(5))
+ code = Code(name, start_address, end_address, origin, origin_offset)
+ conficting_code = self.code_map.Find(start_address)
+ if conficting_code:
+ CodeLogReader._HandleCodeConflict(conficting_code, code)
+ # TODO(vitalyr): this warning is too noisy because of our
+ # attempts to reconstruct code log from the snapshot.
+ # print >>sys.stderr, \
+ # "Warning: Skipping duplicate code log entry %s" % code
+ continue
+ self.code_map.Add(code)
+ continue
+
+ match = CodeLogReader._CODE_MOVE_RE.match(line)
+ if match:
+ old_start_address = int(match.group(1), 16) + code_header_size
+ new_start_address = int(match.group(2), 16) + code_header_size
+ if old_start_address == new_start_address:
+ # Skip useless code move entries.
+ continue
+ code = self.code_map.Find(old_start_address)
+ if not code:
+ print >>sys.stderr, "Warning: Not found %x" % old_start_address
+ continue
+ assert code.start_address == old_start_address, \
+ "Inexact move address %x for %s" % (old_start_address, code)
+ self.code_map.Remove(code)
+ size = code.end_address - code.start_address
+ code.start_address = new_start_address
+ code.end_address = new_start_address + size
+ self.code_map.Add(code)
+ continue
+
+ match = CodeLogReader._CODE_DELETE_RE.match(line)
+ if match:
+ old_start_address = int(match.group(1), 16) + code_header_size
+ code = self.code_map.Find(old_start_address)
+ if not code:
+ print >>sys.stderr, "Warning: Not found %x" % old_start_address
+ continue
+ assert code.start_address == old_start_address, \
+ "Inexact delete address %x for %s" % (old_start_address, code)
+ self.code_map.Remove(code)
+ continue
+
+ match = CodeLogReader._SNAPSHOT_POS_RE.match(line)
+ if match:
+ start_address = int(match.group(1), 16) + code_header_size
+ snapshot_pos = int(match.group(2))
+ if self.is_snapshot:
+ code = self.code_map.Find(start_address)
+ if code:
+ assert code.start_address == start_address, \
+ "Inexact snapshot address %x for %s" % (start_address, code)
+ self.snapshot_pos_to_name[snapshot_pos] = code.name
+ else:
+ if snapshot_pos in self.snapshot_pos_to_name:
+ self.address_to_snapshot_name[start_address] = \
+ self.snapshot_pos_to_name[snapshot_pos]
+
+ def Dispose(self):
+ self.log.close()
+
+ @staticmethod
+ def _HandleCodeConflict(old_code, new_code):
+ assert (old_code.start_address == new_code.start_address and
+ old_code.end_address == new_code.end_address), \
+ "Conficting code log entries %s and %s" % (old_code, new_code)
+ CodeLogReader._UpdateNames(old_code, new_code)
+
+ @staticmethod
+ def _UpdateNames(old_code, new_code):
+ if old_code.name == new_code.name:
+ return
+ # Kludge: there are code objects with custom names that don't
+ # match their flags.
+ misnamed_code = set(["Builtin:CpuFeatures::Probe"])
+ if old_code.name in misnamed_code:
+ return
+ # Code object may be shared by a few functions. Collect the full
+ # set of names.
+ old_code.AddName(new_code.name)
+
+
+class Descriptor(object):
+ """Descriptor of a structure in the binary trace log."""
+
+ CTYPE_MAP = {
+ "u16": ctypes.c_uint16,
+ "u32": ctypes.c_uint32,
+ "u64": ctypes.c_uint64
+ }
+
+ def __init__(self, fields):
+ class TraceItem(ctypes.Structure):
+ _fields_ = Descriptor.CtypesFields(fields)
+
+ def __str__(self):
+ return ", ".join("%s: %s" % (field, self.__getattribute__(field))
+ for field, _ in TraceItem._fields_)
+
+ self.ctype = TraceItem
+
+ def Read(self, trace, offset):
+ return self.ctype.from_buffer(trace, offset)
+
+ @staticmethod
+ def CtypesFields(fields):
+ return [(field, Descriptor.CTYPE_MAP[format]) for (field, format) in fields]
+
+
+# Please see http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=tree;f=tools/perf
+# for the gory details.
+
+
+TRACE_HEADER_DESC = Descriptor([
+ ("magic", "u64"),
+ ("size", "u64"),
+ ("attr_size", "u64"),
+ ("attrs_offset", "u64"),
+ ("attrs_size", "u64"),
+ ("data_offset", "u64"),
+ ("data_size", "u64"),
+ ("event_types_offset", "u64"),
+ ("event_types_size", "u64")
+])
+
+
+PERF_EVENT_ATTR_DESC = Descriptor([
+ ("type", "u32"),
+ ("size", "u32"),
+ ("config", "u64"),
+ ("sample_period_or_freq", "u64"),
+ ("sample_type", "u64"),
+ ("read_format", "u64"),
+ ("flags", "u64"),
+ ("wakeup_events_or_watermark", "u32"),
+ ("bt_type", "u32"),
+ ("bp_addr", "u64"),
+ ("bp_len", "u64"),
+])
+
+
+PERF_EVENT_HEADER_DESC = Descriptor([
+ ("type", "u32"),
+ ("misc", "u16"),
+ ("size", "u16")
+])
+
+
+PERF_MMAP_EVENT_BODY_DESC = Descriptor([
+ ("pid", "u32"),
+ ("tid", "u32"),
+ ("addr", "u64"),
+ ("len", "u64"),
+ ("pgoff", "u64")
+])
+
+
+# perf_event_attr.sample_type bits control the set of
+# perf_sample_event fields.
+PERF_SAMPLE_IP = 1 << 0
+PERF_SAMPLE_TID = 1 << 1
+PERF_SAMPLE_TIME = 1 << 2
+PERF_SAMPLE_ADDR = 1 << 3
+PERF_SAMPLE_READ = 1 << 4
+PERF_SAMPLE_CALLCHAIN = 1 << 5
+PERF_SAMPLE_ID = 1 << 6
+PERF_SAMPLE_CPU = 1 << 7
+PERF_SAMPLE_PERIOD = 1 << 8
+PERF_SAMPLE_STREAM_ID = 1 << 9
+PERF_SAMPLE_RAW = 1 << 10
+
+
+PERF_SAMPLE_EVENT_BODY_FIELDS = [
+ ("ip", "u64", PERF_SAMPLE_IP),
+ ("pid", "u32", PERF_SAMPLE_TID),
+ ("tid", "u32", PERF_SAMPLE_TID),
+ ("time", "u64", PERF_SAMPLE_TIME),
+ ("addr", "u64", PERF_SAMPLE_ADDR),
+ ("id", "u64", PERF_SAMPLE_ID),
+ ("stream_id", "u64", PERF_SAMPLE_STREAM_ID),
+ ("cpu", "u32", PERF_SAMPLE_CPU),
+ ("res", "u32", PERF_SAMPLE_CPU),
+ ("period", "u64", PERF_SAMPLE_PERIOD),
+ # Don't want to handle read format that comes after the period and
+ # before the callchain and has variable size.
+ ("nr", "u64", PERF_SAMPLE_CALLCHAIN)
+ # Raw data follows the callchain and is ignored.
+]
+
+
+PERF_SAMPLE_EVENT_IP_FORMAT = "u64"
+
+
+PERF_RECORD_MMAP = 1
+PERF_RECORD_SAMPLE = 9
+
+
+class TraceReader(object):
+ """Perf (linux-2.6/tools/perf) trace file reader."""
+
+ _TRACE_HEADER_MAGIC = 4993446653023372624
+
+ def __init__(self, trace_name):
+ self.trace_file = open(trace_name, "r")
+ self.trace = mmap.mmap(self.trace_file.fileno(), 0, mmap.MAP_PRIVATE)
+ self.trace_header = TRACE_HEADER_DESC.Read(self.trace, 0)
+ if self.trace_header.magic != TraceReader._TRACE_HEADER_MAGIC:
+ print >>sys.stderr, "Warning: unsupported trace header magic"
+ self.offset = self.trace_header.data_offset
+ self.limit = self.trace_header.data_offset + self.trace_header.data_size
+ assert self.limit <= self.trace.size(), \
+ "Trace data limit exceeds trace file size"
+ self.header_size = ctypes.sizeof(PERF_EVENT_HEADER_DESC.ctype)
+ assert self.trace_header.attrs_size != 0, \
+ "No perf event attributes found in the trace"
+ perf_event_attr = PERF_EVENT_ATTR_DESC.Read(self.trace,
+ self.trace_header.attrs_offset)
+ self.sample_event_body_desc = self._SampleEventBodyDesc(
+ perf_event_attr.sample_type)
+ self.callchain_supported = \
+ (perf_event_attr.sample_type & PERF_SAMPLE_CALLCHAIN) != 0
+ if self.callchain_supported:
+ self.ip_struct = Descriptor.CTYPE_MAP[PERF_SAMPLE_EVENT_IP_FORMAT]
+ self.ip_size = ctypes.sizeof(self.ip_struct)
+
+ def ReadEventHeader(self):
+ if self.offset >= self.limit:
+ return None, 0
+ offset = self.offset
+ header = PERF_EVENT_HEADER_DESC.Read(self.trace, self.offset)
+ self.offset += header.size
+ return header, offset
+
+ def ReadMmap(self, header, offset):
+ mmap_info = PERF_MMAP_EVENT_BODY_DESC.Read(self.trace,
+ offset + self.header_size)
+ # Read null-padded filename.
+ filename = self.trace[offset + self.header_size + ctypes.sizeof(mmap_info):
+ offset + header.size].rstrip(chr(0))
+ mmap_info.filename = filename
+ return mmap_info
+
+ def ReadSample(self, header, offset):
+ sample = self.sample_event_body_desc.Read(self.trace,
+ offset + self.header_size)
+ if not self.callchain_supported:
+ return sample
+ sample.ips = []
+ offset += self.header_size + ctypes.sizeof(sample)
+ for _ in xrange(sample.nr):
+ sample.ips.append(
+ self.ip_struct.from_buffer(self.trace, offset).value)
+ offset += self.ip_size
+ return sample
+
+ def Dispose(self):
+ self.trace.close()
+ self.trace_file.close()
+
+ def _SampleEventBodyDesc(self, sample_type):
+ assert (sample_type & PERF_SAMPLE_READ) == 0, \
+ "Can't hande read format in samples"
+ fields = [(field, format)
+ for (field, format, bit) in PERF_SAMPLE_EVENT_BODY_FIELDS
+ if (bit & sample_type) != 0]
+ return Descriptor(fields)
+
+
+OBJDUMP_SECTION_HEADER_RE = re.compile(
+ r"^\s*\d+\s(\.\S+)\s+[a-f0-9]")
+OBJDUMP_SYMBOL_LINE_RE = re.compile(
+ r"^([a-f0-9]+)\s(.{7})\s(\S+)\s+([a-f0-9]+)\s+(?:\.hidden\s+)?(.*)$")
+OBJDUMP_DYNAMIC_SYMBOLS_START_RE = re.compile(
+ r"^DYNAMIC SYMBOL TABLE")
+KERNEL_ALLSYMS_FILE = "/proc/kallsyms"
+PERF_KERNEL_ALLSYMS_RE = re.compile(
+ r".*kallsyms.*")
+KERNEL_ALLSYMS_LINE_RE = re.compile(
+ r"^([a-f0-9]+)\s(?:t|T)\s(\S+)$")
+
+
+class LibraryRepo(object):
+ def __init__(self):
+ self.infos = []
+ self.names = set()
+ self.ticks = {}
+
+ def Load(self, mmap_info, code_map, options):
+ # Skip kernel mmaps when requested using the fact that their tid
+ # is 0.
+ if mmap_info.tid == 0 and not options.kernel:
+ return True
+ if PERF_KERNEL_ALLSYMS_RE.match(mmap_info.filename):
+ return self._LoadKernelSymbols(code_map)
+ self.infos.append(mmap_info)
+ mmap_info.ticks = 0
+ mmap_info.unique_name = self._UniqueMmapName(mmap_info)
+ if not os.path.exists(mmap_info.filename):
+ return True
+ # Request section headers (-h), symbols (-t), and dynamic symbols
+ # (-T) from objdump.
+ # Unfortunately, section headers span two lines, so we have to
+ # keep the just seen section name (from the first line in each
+ # section header) in the after_section variable.
+ process = subprocess.Popen(
+ "%s -h -t -T -C %s" % (OBJDUMP_BIN, mmap_info.filename),
+ shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+ pipe = process.stdout
+ after_section = None
+ code_sections = set()
+ reloc_sections = set()
+ dynamic = False
+ try:
+ for line in pipe:
+ if after_section:
+ if line.find("CODE") != -1:
+ code_sections.add(after_section)
+ if line.find("RELOC") != -1:
+ reloc_sections.add(after_section)
+ after_section = None
+ continue
+
+ match = OBJDUMP_SECTION_HEADER_RE.match(line)
+ if match:
+ after_section = match.group(1)
+ continue
+
+ if OBJDUMP_DYNAMIC_SYMBOLS_START_RE.match(line):
+ dynamic = True
+ continue
+
+ match = OBJDUMP_SYMBOL_LINE_RE.match(line)
+ if match:
+ start_address = int(match.group(1), 16)
+ origin_offset = start_address
+ flags = match.group(2)
+ section = match.group(3)
+ if section in code_sections:
+ if dynamic or section in reloc_sections:
+ start_address += mmap_info.addr
+ size = int(match.group(4), 16)
+ name = match.group(5)
+ origin = mmap_info.filename
+ code_map.Add(Code(name, start_address, start_address + size,
+ origin, origin_offset))
+ finally:
+ pipe.close()
+ assert process.wait() == 0, "Failed to objdump %s" % mmap_info.filename
+
+ def Tick(self, pc):
+ for i, mmap_info in enumerate(self.infos):
+ if mmap_info.addr <= pc < (mmap_info.addr + mmap_info.len):
+ mmap_info.ticks += 1
+ self.infos[0], self.infos[i] = mmap_info, self.infos[0]
+ return True
+ return False
+
+ def _UniqueMmapName(self, mmap_info):
+ name = mmap_info.filename
+ index = 1
+ while name in self.names:
+ name = "%s-%d" % (mmap_info.filename, index)
+ index += 1
+ self.names.add(name)
+ return name
+
+ def _LoadKernelSymbols(self, code_map):
+ if not os.path.exists(KERNEL_ALLSYMS_FILE):
+ print >>sys.stderr, "Warning: %s not found" % KERNEL_ALLSYMS_FILE
+ return False
+ kallsyms = open(KERNEL_ALLSYMS_FILE, "r")
+ code = None
+ for line in kallsyms:
+ match = KERNEL_ALLSYMS_LINE_RE.match(line)
+ if match:
+ start_address = int(match.group(1), 16)
+ end_address = start_address
+ name = match.group(2)
+ if code:
+ code.end_address = start_address
+ code_map.Add(code, 16)
+ code = Code(name, start_address, end_address, "kernel", 0)
+ return True
+
+
+def PrintReport(code_map, library_repo, code_info, options):
+ print "Ticks per symbol:"
+ used_code = [code for code in code_map.UsedCode()]
+ used_code.sort(key=lambda x: x.self_ticks, reverse=True)
+ for i, code in enumerate(used_code):
+ print "%10d %s [%s]" % (code.self_ticks, code.FullName(), code.origin)
+ if options.disasm_all or i < options.disasm_top:
+ code.PrintAnnotated(code_info, options)
+ print
+ print "Ticks per library:"
+ mmap_infos = [m for m in library_repo.infos]
+ mmap_infos.sort(key=lambda m: m.ticks, reverse=True)
+ for mmap_info in mmap_infos:
+ print "%10d %s" % (mmap_info.ticks, mmap_info.unique_name)
+
+
+def PrintDot(code_map, options):
+ print "digraph G {"
+ for code in code_map.UsedCode():
+ if code.self_ticks < 10:
+ continue
+ print "n%d [shape=box,label=\"%s\"];" % (code.id, code.name)
+ if code.callee_ticks:
+ for callee, ticks in code.callee_ticks.iteritems():
+ print "n%d -> n%d [label=\"%d\"];" % (code.id, callee.id, ticks)
+ print "}"
+
+
+if __name__ == "__main__":
+ parser = optparse.OptionParser(USAGE)
+ parser.add_option("--snapshot-log",
+ default="obj/release/snapshot.log",
+ help="V8 snapshot log file name [default: %default]")
+ parser.add_option("--log",
+ default="v8.log",
+ help="V8 log file name [default: %default]")
+ parser.add_option("--snapshot",
+ default=False,
+ action="store_true",
+ help="process V8 snapshot log [default: %default]")
+ parser.add_option("--trace",
+ default="perf.data",
+ help="perf trace file name [default: %default]")
+ parser.add_option("--kernel",
+ default=False,
+ action="store_true",
+ help="process kernel entries [default: %default]")
+ parser.add_option("--disasm-top",
+ default=0,
+ type="int",
+ help=("number of top symbols to disassemble and annotate "
+ "[default: %default]"))
+ parser.add_option("--disasm-all",
+ default=False,
+ action="store_true",
+ help=("disassemble and annotate all used symbols "
+ "[default: %default]"))
+ parser.add_option("--dot",
+ default=False,
+ action="store_true",
+ help="produce dot output (WIP) [default: %default]")
+ parser.add_option("--quiet", "-q",
+ default=False,
+ action="store_true",
+ help="no auxiliary messages [default: %default]")
+ options, args = parser.parse_args()
+
+ if not options.quiet:
+ if options.snapshot:
+ print "V8 logs: %s, %s, %s.code" % (options.snapshot_log,
+ options.log,
+ options.log)
+ else:
+ print "V8 log: %s, %s.code (no snapshot)" % (options.log, options.log)
+ print "Perf trace file: %s" % options.trace
+
+ # Stats.
+ events = 0
+ ticks = 0
+ missed_ticks = 0
+ really_missed_ticks = 0
+ mmap_time = 0
+ sample_time = 0
+
+ # Initialize the log reader and get the code info.
+ code_map = CodeMap()
+ snapshot_name_map = {}
+ log_reader = CodeLogReader(log_name=options.log,
+ code_map=code_map,
+ is_snapshot=False,
+ snapshot_pos_to_name=snapshot_name_map)
+ code_info = log_reader.ReadCodeInfo()
+ if not options.quiet:
+ print "Generated code architecture: %s" % code_info.arch
+ print
+
+ # Process the snapshot log to fill the snapshot name map.
+ if options.snapshot:
+ snapshot_log_reader = CodeLogReader(log_name=options.snapshot_log,
+ code_map=CodeMap(),
+ is_snapshot=True,
+ snapshot_pos_to_name=snapshot_name_map)
+ while snapshot_log_reader.ReadUpToGC(code_info):
+ pass
+
+ # Process the code and trace logs.
+ library_repo = LibraryRepo()
+ log_reader.ReadUpToGC(code_info)
+ trace_reader = TraceReader(options.trace)
+ while True:
+ header, offset = trace_reader.ReadEventHeader()
+ if not header:
+ break
+ events += 1
+ if header.type == PERF_RECORD_MMAP:
+ start = time.time()
+ mmap_info = trace_reader.ReadMmap(header, offset)
+ if mmap_info.filename == V8_GC_FAKE_MMAP:
+ log_reader.ReadUpToGC()
+ else:
+ library_repo.Load(mmap_info, code_map, options)
+ mmap_time += time.time() - start
+ elif header.type == PERF_RECORD_SAMPLE:
+ ticks += 1
+ start = time.time()
+ sample = trace_reader.ReadSample(header, offset)
+ code = code_map.Find(sample.ip)
+ if code:
+ code.Tick(sample.ip)
+ else:
+ missed_ticks += 1
+ if not library_repo.Tick(sample.ip) and not code:
+ really_missed_ticks += 1
+ if trace_reader.callchain_supported:
+ for ip in sample.ips:
+ caller_code = code_map.Find(ip)
+ if caller_code:
+ if code:
+ caller_code.CalleeTick(code)
+ code = caller_code
+ sample_time += time.time() - start
+
+ if options.dot:
+ PrintDot(code_map, options)
+ else:
+ PrintReport(code_map, library_repo, code_info, options)
+
+ if not options.quiet:
+ print
+ print "Stats:"
+ print "%10d total trace events" % events
+ print "%10d total ticks" % ticks
+ print "%10d ticks not in symbols" % missed_ticks
+ print "%10d unaccounted ticks" % really_missed_ticks
+ print "%10d total symbols" % len([c for c in code_map.AllCode()])
+ print "%10d used symbols" % len([c for c in code_map.UsedCode()])
+ print "%9.2fs library processing time" % mmap_time
+ print "%9.2fs tick processing time" % sample_time
+
+ log_reader.Dispose()
+ trace_reader.Dispose()
« no previous file with comments | « src/platform-win32.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698