Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(505)

Side by Side Diff: tools/ll_prof.py

Issue 3831002: Support profiling based on linux kernel performance events. (Closed)
Patch Set: More fixes Created 10 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/platform-win32.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 #!/usr/bin/env python
2 #
3 # Copyright 2010 the V8 project authors. All rights reserved.
4 # Redistribution and use in source and binary forms, with or without
5 # modification, are permitted provided that the following conditions are
6 # met:
7 #
8 # * Redistributions of source code must retain the above copyright
9 # notice, this list of conditions and the following disclaimer.
10 # * Redistributions in binary form must reproduce the above
11 # copyright notice, this list of conditions and the following
12 # disclaimer in the documentation and/or other materials provided
13 # with the distribution.
14 # * Neither the name of Google Inc. nor the names of its
15 # contributors may be used to endorse or promote products derived
16 # from this software without specific prior written permission.
17 #
18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 import bisect
31 import collections
32 import ctypes
33 import mmap
34 import optparse
35 import os
36 import re
37 import subprocess
38 import sys
39 import tempfile
40 import time
41
42
43 USAGE="""usage: %prog [OPTION]...
44
45 Analyses V8 and perf logs to produce profiles.
46
47 Perf logs can be collected using a command like:
48 $ perf record -R -e cycles -c 10000 -f -i ./shell bench.js --ll-prof
49 # -R: collect all data
50 # -e cycles: use cpu-cycles event (run "perf list" for details)
51 # -c 10000: write a sample after each 10000 events
52 # -f: force output file overwrite
53 # -i: limit profiling to our process and the kernel
54 # --ll-prof shell flag enables the right V8 logs
55 This will produce a binary trace file (perf.data) that %prog can analyse.
56
57 Examples:
58 # Print flat profile with annotated disassembly for the 10 top
59 # symbols. Use default log names and include the snapshot log.
60 $ %prog --snapshot --disasm-top=10
61
62 # Print flat profile with annotated disassembly for all used symbols.
63 # Use default log names and include kernel symbols into analysis.
64 $ %prog --disasm-all --kernel
65
66 # Print flat profile. Use custom log names.
67 $ %prog --log=foo.log --snapshot-log=snap-foo.log --trace=foo.data --snapshot
68 """
69
70
71 # Must match kGcFakeMmap.
72 V8_GC_FAKE_MMAP = "/tmp/__v8_gc__"
73
74 JS_ORIGIN = "js"
75 JS_SNAPSHOT_ORIGIN = "js-snapshot"
76
77 # Avoid using the slow (google-specific) wrapper around objdump.
78 OBJDUMP_BIN = "/usr/bin/objdump"
79 if not os.path.exists(OBJDUMP_BIN):
80 OBJDUMP_BIN = "objdump"
81
82
83 class Code(object):
84 """Code object."""
85
86 _COMMON_DISASM_OPTIONS = ["-M", "intel-mnemonic", "-C"]
87
88 _DISASM_HEADER_RE = re.compile(r"[a-f0-9]+\s+<.*:$")
89 _DISASM_LINE_RE = re.compile(r"\s*([a-f0-9]+):.*")
90
91 # Keys must match constants in Logger::LogCodeInfo.
92 _ARCH_MAP = {
93 "ia32": "-m i386",
94 "x64": "-m i386 -M x86-64",
95 "arm": "-m arm" # Not supported by our objdump build.
96 }
97
98 _id = 0
99
100 def __init__(self, name, start_address, end_address, origin, origin_offset):
101 self.id = Code._id
102 Code._id += 1
103 self.name = name
104 self.other_names = None
105 self.start_address = start_address
106 self.end_address = end_address
107 self.origin = origin
108 self.origin_offset = origin_offset
109 self.self_ticks = 0
110 self.self_ticks_map = None
111 self.callee_ticks = None
112
113 def AddName(self, name):
114 assert self.name != name
115 if self.other_names is None:
116 self.other_names = [name]
117 return
118 if not name in self.other_names:
119 self.other_names.append(name)
120
121 def FullName(self):
122 if self.other_names is None:
123 return self.name
124 self.other_names.sort()
125 return "%s (aka %s)" % (self.name, ", ".join(self.other_names))
126
127 def IsUsed(self):
128 return self.self_ticks > 0 or self.callee_ticks is not None
129
130 def Tick(self, pc):
131 self.self_ticks += 1
132 if self.self_ticks_map is None:
133 self.self_ticks_map = collections.defaultdict(lambda: 0)
134 offset = pc - self.start_address
135 self.self_ticks_map[offset] += 1
136
137 def CalleeTick(self, callee):
138 if self.callee_ticks is None:
139 self.callee_ticks = collections.defaultdict(lambda: 0)
140 self.callee_ticks[callee] += 1
141
142 def PrintAnnotated(self, code_info, options):
143 if self.self_ticks_map is None:
144 ticks_map = []
145 else:
146 ticks_map = self.self_ticks_map.items()
147 # Convert the ticks map to offsets and counts arrays so that later
148 # we can do binary search in the offsets array.
149 ticks_map.sort(key=lambda t: t[0])
150 ticks_offsets = [t[0] for t in ticks_map]
151 ticks_counts = [t[1] for t in ticks_map]
152 # Get a list of disassembled lines and their addresses.
153 lines = []
154 for line in self._GetDisasmLines(code_info, options):
155 match = Code._DISASM_LINE_RE.match(line)
156 if match:
157 line_address = int(match.group(1), 16)
158 lines.append((line_address, line))
159 if len(lines) == 0:
160 return
161 # Print annotated lines.
162 address = lines[0][0]
163 total_count = 0
164 for i in xrange(len(lines)):
165 start_offset = lines[i][0] - address
166 if i == len(lines) - 1:
167 end_offset = self.end_address - self.start_address
168 else:
169 end_offset = lines[i + 1][0] - address
170 # Ticks (reported pc values) are not always precise, i.e. not
171 # necessarily point at instruction starts. So we have to search
172 # for ticks that touch the current instruction line.
173 j = bisect.bisect_left(ticks_offsets, end_offset)
174 count = 0
175 for offset, cnt in reversed(zip(ticks_offsets[:j], ticks_counts[:j])):
176 if offset < start_offset:
177 break
178 count += cnt
179 total_count += count
180 count = 100.0 * count / self.self_ticks
181 if count >= 0.01:
182 print "%15.2f %s" % (count, lines[i][1])
183 else:
184 print "%s %s" % (" " * 15, lines[i][1])
185 print
186 assert total_count == self.self_ticks, \
187 "Lost ticks (%d != %d) in %s" % (total_count, self.self_ticks, self)
188
189 def __str__(self):
190 return "%s [0x%x, 0x%x) size: %d origin: %s" % (
191 self.name,
192 self.start_address,
193 self.end_address,
194 self.end_address - self.start_address,
195 self.origin)
196
197 def _GetDisasmLines(self, code_info, options):
198 tmp_name = None
199 if self.origin == JS_ORIGIN or self.origin == JS_SNAPSHOT_ORIGIN:
200 assert code_info.arch in Code._ARCH_MAP, \
201 "Unsupported architecture '%s'" % arch
202 arch_flags = Code._ARCH_MAP[code_info.arch]
203 # Create a temporary file just with this code object.
204 tmp_name = tempfile.mktemp(".v8code")
205 size = self.end_address - self.start_address
206 command = "dd if=%s.code of=%s bs=1 count=%d skip=%d && " \
207 "%s %s -D -b binary %s %s" % (
208 options.log, tmp_name, size, self.origin_offset,
209 OBJDUMP_BIN, ' '.join(Code._COMMON_DISASM_OPTIONS), arch_flags,
210 tmp_name)
211 else:
212 command = "%s %s --start-address=%d --stop-address=%d -d %s " % (
213 OBJDUMP_BIN, ' '.join(Code._COMMON_DISASM_OPTIONS),
214 self.origin_offset,
215 self.origin_offset + self.end_address - self.start_address,
216 self.origin)
217 process = subprocess.Popen(command,
218 shell=True,
219 stdout=subprocess.PIPE,
220 stderr=subprocess.STDOUT)
221 out, err = process.communicate()
222 lines = out.split("\n")
223 header_line = 0
224 for i, line in enumerate(lines):
225 if Code._DISASM_HEADER_RE.match(line):
226 header_line = i
227 break
228 if tmp_name:
229 os.unlink(tmp_name)
230 return lines[header_line + 1:]
231
232
233 class CodePage(object):
234 """Group of adjacent code objects."""
235
236 SHIFT = 12 # 4K pages
237 SIZE = (1 << SHIFT)
238 MASK = ~(SIZE - 1)
239
240 @staticmethod
241 def PageAddress(address):
242 return address & CodePage.MASK
243
244 @staticmethod
245 def PageId(address):
246 return address >> CodePage.SHIFT
247
248 @staticmethod
249 def PageAddressFromId(id):
250 return id << CodePage.SHIFT
251
252 def __init__(self, address):
253 self.address = address
254 self.code_objects = []
255
256 def Add(self, code):
257 self.code_objects.append(code)
258
259 def Remove(self, code):
260 self.code_objects.remove(code)
261
262 def Find(self, pc):
263 code_objects = self.code_objects
264 for i, code in enumerate(code_objects):
265 if code.start_address <= pc < code.end_address:
266 code_objects[0], code_objects[i] = code, code_objects[0]
267 return code
268 return None
269
270 def __iter__(self):
271 return self.code_objects.__iter__()
272
273
274 class CodeMap(object):
275 """Code object map."""
276
277 def __init__(self):
278 self.pages = {}
279 self.min_address = 1 << 64
280 self.max_address = -1
281
282 def Add(self, code, max_pages=-1):
283 page_id = CodePage.PageId(code.start_address)
284 limit_id = CodePage.PageId(code.end_address + CodePage.SIZE - 1)
285 pages = 0
286 while page_id < limit_id:
287 if max_pages >= 0 and pages > max_pages:
288 print >>sys.stderr, \
289 "Warning: page limit (%d) reached for %s [%s]" % (
290 max_pages, code.name, code.origin)
291 break
292 if page_id in self.pages:
293 page = self.pages[page_id]
294 else:
295 page = CodePage(CodePage.PageAddressFromId(page_id))
296 self.pages[page_id] = page
297 page.Add(code)
298 page_id += 1
299 pages += 1
300 self.min_address = min(self.min_address, code.start_address)
301 self.max_address = max(self.max_address, code.end_address)
302
303 def Remove(self, code):
304 page_id = CodePage.PageId(code.start_address)
305 limit_id = CodePage.PageId(code.end_address + CodePage.SIZE - 1)
306 removed = False
307 while page_id < limit_id:
308 if page_id not in self.pages:
309 page_id += 1
310 continue
311 page = self.pages[page_id]
312 page.Remove(code)
313 removed = True
314 page_id += 1
315 return removed
316
317 def AllCode(self):
318 for page in self.pages.itervalues():
319 for code in page:
320 if CodePage.PageAddress(code.start_address) == page.address:
321 yield code
322
323 def UsedCode(self):
324 for code in self.AllCode():
325 if code.IsUsed():
326 yield code
327
328 def Print(self):
329 for code in self.AllCode():
330 print code
331
332 def Find(self, pc):
333 if pc < self.min_address or pc >= self.max_address:
334 return None
335 page_id = CodePage.PageId(pc)
336 if page_id not in self.pages:
337 return None
338 return self.pages[page_id].Find(pc)
339
340
341 class CodeInfo(object):
342 """Generic info about generated code objects."""
343
344 def __init__(self, arch, header_size):
345 self.arch = arch
346 self.header_size = header_size
347
348
349 class CodeLogReader(object):
350 """V8 code event log reader."""
351
352 _CODE_INFO_RE = re.compile(
353 r"code-info,([^,]+),(\d+)")
354
355 _CODE_CREATE_RE = re.compile(
356 r"code-creation,([^,]+),(0x[a-f0-9]+),(\d+),\"([^\"]*)\"(?:,(\d+))?")
357
358 _CODE_MOVE_RE = re.compile(
359 r"code-move,(0x[a-f0-9]+),(0x[a-f0-9]+)")
360
361 _CODE_DELETE_RE = re.compile(
362 r"code-delete,(0x[a-f0-9]+)")
363
364 _SNAPSHOT_POS_RE = re.compile(
365 r"snapshot-pos,(0x[a-f0-9]+),(\d+)")
366
367 _CODE_MOVING_GC = "code-moving-gc"
368
369 def __init__(self, log_name, code_map, is_snapshot, snapshot_pos_to_name):
370 self.log = open(log_name, "r")
371 self.code_map = code_map
372 self.is_snapshot = is_snapshot
373 self.snapshot_pos_to_name = snapshot_pos_to_name
374 self.address_to_snapshot_name = {}
375
376 def ReadCodeInfo(self):
377 line = self.log.readline() or ""
378 match = CodeLogReader._CODE_INFO_RE.match(line)
379 assert match, "No code info in log"
380 return CodeInfo(arch=match.group(1), header_size=int(match.group(2)))
381
382 def ReadUpToGC(self, code_info):
383 made_progress = False
384 code_header_size = code_info.header_size
385 while True:
386 line = self.log.readline()
387 if not line:
388 return made_progress
389 made_progress = True
390
391 if line.startswith(CodeLogReader._CODE_MOVING_GC):
392 self.address_to_snapshot_name.clear()
393 return made_progress
394
395 match = CodeLogReader._CODE_CREATE_RE.match(line)
396 if match:
397 start_address = int(match.group(2), 16) + code_header_size
398 end_address = start_address + int(match.group(3)) - code_header_size
399 if start_address in self.address_to_snapshot_name:
400 name = self.address_to_snapshot_name[start_address]
401 origin = JS_SNAPSHOT_ORIGIN
402 else:
403 name = "%s:%s" % (match.group(1), match.group(4))
404 origin = JS_ORIGIN
405 if self.is_snapshot:
406 origin_offset = 0
407 else:
408 origin_offset = int(match.group(5))
409 code = Code(name, start_address, end_address, origin, origin_offset)
410 conficting_code = self.code_map.Find(start_address)
411 if conficting_code:
412 CodeLogReader._HandleCodeConflict(conficting_code, code)
413 # TODO(vitalyr): this warning is too noisy because of our
414 # attempts to reconstruct code log from the snapshot.
415 # print >>sys.stderr, \
416 # "Warning: Skipping duplicate code log entry %s" % code
417 continue
418 self.code_map.Add(code)
419 continue
420
421 match = CodeLogReader._CODE_MOVE_RE.match(line)
422 if match:
423 old_start_address = int(match.group(1), 16) + code_header_size
424 new_start_address = int(match.group(2), 16) + code_header_size
425 if old_start_address == new_start_address:
426 # Skip useless code move entries.
427 continue
428 code = self.code_map.Find(old_start_address)
429 if not code:
430 print >>sys.stderr, "Warning: Not found %x" % old_start_address
431 continue
432 assert code.start_address == old_start_address, \
433 "Inexact move address %x for %s" % (old_start_address, code)
434 self.code_map.Remove(code)
435 size = code.end_address - code.start_address
436 code.start_address = new_start_address
437 code.end_address = new_start_address + size
438 self.code_map.Add(code)
439 continue
440
441 match = CodeLogReader._CODE_DELETE_RE.match(line)
442 if match:
443 old_start_address = int(match.group(1), 16) + code_header_size
444 code = self.code_map.Find(old_start_address)
445 if not code:
446 print >>sys.stderr, "Warning: Not found %x" % old_start_address
447 continue
448 assert code.start_address == old_start_address, \
449 "Inexact delete address %x for %s" % (old_start_address, code)
450 self.code_map.Remove(code)
451 continue
452
453 match = CodeLogReader._SNAPSHOT_POS_RE.match(line)
454 if match:
455 start_address = int(match.group(1), 16) + code_header_size
456 snapshot_pos = int(match.group(2))
457 if self.is_snapshot:
458 code = self.code_map.Find(start_address)
459 if code:
460 assert code.start_address == start_address, \
461 "Inexact snapshot address %x for %s" % (start_address, code)
462 self.snapshot_pos_to_name[snapshot_pos] = code.name
463 else:
464 if snapshot_pos in self.snapshot_pos_to_name:
465 self.address_to_snapshot_name[start_address] = \
466 self.snapshot_pos_to_name[snapshot_pos]
467
468 def Dispose(self):
469 self.log.close()
470
471 @staticmethod
472 def _HandleCodeConflict(old_code, new_code):
473 assert (old_code.start_address == new_code.start_address and
474 old_code.end_address == new_code.end_address), \
475 "Conficting code log entries %s and %s" % (old_code, new_code)
476 CodeLogReader._UpdateNames(old_code, new_code)
477
478 @staticmethod
479 def _UpdateNames(old_code, new_code):
480 if old_code.name == new_code.name:
481 return
482 # Kludge: there are code objects with custom names that don't
483 # match their flags.
484 misnamed_code = set(["Builtin:CpuFeatures::Probe"])
485 if old_code.name in misnamed_code:
486 return
487 # Code object may be shared by a few functions. Collect the full
488 # set of names.
489 old_code.AddName(new_code.name)
490
491
492 class Descriptor(object):
493 """Descriptor of a structure in the binary trace log."""
494
495 CTYPE_MAP = {
496 "u16": ctypes.c_uint16,
497 "u32": ctypes.c_uint32,
498 "u64": ctypes.c_uint64
499 }
500
501 def __init__(self, fields):
502 class TraceItem(ctypes.Structure):
503 _fields_ = Descriptor.CtypesFields(fields)
504
505 def __str__(self):
506 return ", ".join("%s: %s" % (field, self.__getattribute__(field))
507 for field, _ in TraceItem._fields_)
508
509 self.ctype = TraceItem
510
511 def Read(self, trace, offset):
512 return self.ctype.from_buffer(trace, offset)
513
514 @staticmethod
515 def CtypesFields(fields):
516 return [(field, Descriptor.CTYPE_MAP[format]) for (field, format) in fields]
517
518
519 # Please see http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a= tree;f=tools/perf
520 # for the gory details.
521
522
523 TRACE_HEADER_DESC = Descriptor([
524 ("magic", "u64"),
525 ("size", "u64"),
526 ("attr_size", "u64"),
527 ("attrs_offset", "u64"),
528 ("attrs_size", "u64"),
529 ("data_offset", "u64"),
530 ("data_size", "u64"),
531 ("event_types_offset", "u64"),
532 ("event_types_size", "u64")
533 ])
534
535
536 PERF_EVENT_ATTR_DESC = Descriptor([
537 ("type", "u32"),
538 ("size", "u32"),
539 ("config", "u64"),
540 ("sample_period_or_freq", "u64"),
541 ("sample_type", "u64"),
542 ("read_format", "u64"),
543 ("flags", "u64"),
544 ("wakeup_events_or_watermark", "u32"),
545 ("bt_type", "u32"),
546 ("bp_addr", "u64"),
547 ("bp_len", "u64"),
548 ])
549
550
551 PERF_EVENT_HEADER_DESC = Descriptor([
552 ("type", "u32"),
553 ("misc", "u16"),
554 ("size", "u16")
555 ])
556
557
558 PERF_MMAP_EVENT_BODY_DESC = Descriptor([
559 ("pid", "u32"),
560 ("tid", "u32"),
561 ("addr", "u64"),
562 ("len", "u64"),
563 ("pgoff", "u64")
564 ])
565
566
567 # perf_event_attr.sample_type bits control the set of
568 # perf_sample_event fields.
569 PERF_SAMPLE_IP = 1 << 0
570 PERF_SAMPLE_TID = 1 << 1
571 PERF_SAMPLE_TIME = 1 << 2
572 PERF_SAMPLE_ADDR = 1 << 3
573 PERF_SAMPLE_READ = 1 << 4
574 PERF_SAMPLE_CALLCHAIN = 1 << 5
575 PERF_SAMPLE_ID = 1 << 6
576 PERF_SAMPLE_CPU = 1 << 7
577 PERF_SAMPLE_PERIOD = 1 << 8
578 PERF_SAMPLE_STREAM_ID = 1 << 9
579 PERF_SAMPLE_RAW = 1 << 10
580
581
582 PERF_SAMPLE_EVENT_BODY_FIELDS = [
583 ("ip", "u64", PERF_SAMPLE_IP),
584 ("pid", "u32", PERF_SAMPLE_TID),
585 ("tid", "u32", PERF_SAMPLE_TID),
586 ("time", "u64", PERF_SAMPLE_TIME),
587 ("addr", "u64", PERF_SAMPLE_ADDR),
588 ("id", "u64", PERF_SAMPLE_ID),
589 ("stream_id", "u64", PERF_SAMPLE_STREAM_ID),
590 ("cpu", "u32", PERF_SAMPLE_CPU),
591 ("res", "u32", PERF_SAMPLE_CPU),
592 ("period", "u64", PERF_SAMPLE_PERIOD),
593 # Don't want to handle read format that comes after the period and
594 # before the callchain and has variable size.
595 ("nr", "u64", PERF_SAMPLE_CALLCHAIN)
596 # Raw data follows the callchain and is ignored.
597 ]
598
599
600 PERF_SAMPLE_EVENT_IP_FORMAT = "u64"
601
602
603 PERF_RECORD_MMAP = 1
604 PERF_RECORD_SAMPLE = 9
605
606
607 class TraceReader(object):
608 """Perf (linux-2.6/tools/perf) trace file reader."""
609
610 _TRACE_HEADER_MAGIC = 4993446653023372624
611
612 def __init__(self, trace_name):
613 self.trace_file = open(trace_name, "r")
614 self.trace = mmap.mmap(self.trace_file.fileno(), 0, mmap.MAP_PRIVATE)
615 self.trace_header = TRACE_HEADER_DESC.Read(self.trace, 0)
616 if self.trace_header.magic != TraceReader._TRACE_HEADER_MAGIC:
617 print >>sys.stderr, "Warning: unsupported trace header magic"
618 self.offset = self.trace_header.data_offset
619 self.limit = self.trace_header.data_offset + self.trace_header.data_size
620 assert self.limit <= self.trace.size(), \
621 "Trace data limit exceeds trace file size"
622 self.header_size = ctypes.sizeof(PERF_EVENT_HEADER_DESC.ctype)
623 assert self.trace_header.attrs_size != 0, \
624 "No perf event attributes found in the trace"
625 perf_event_attr = PERF_EVENT_ATTR_DESC.Read(self.trace,
626 self.trace_header.attrs_offset)
627 self.sample_event_body_desc = self._SampleEventBodyDesc(
628 perf_event_attr.sample_type)
629 self.callchain_supported = \
630 (perf_event_attr.sample_type & PERF_SAMPLE_CALLCHAIN) != 0
631 if self.callchain_supported:
632 self.ip_struct = Descriptor.CTYPE_MAP[PERF_SAMPLE_EVENT_IP_FORMAT]
633 self.ip_size = ctypes.sizeof(self.ip_struct)
634
635 def ReadEventHeader(self):
636 if self.offset >= self.limit:
637 return None, 0
638 offset = self.offset
639 header = PERF_EVENT_HEADER_DESC.Read(self.trace, self.offset)
640 self.offset += header.size
641 return header, offset
642
643 def ReadMmap(self, header, offset):
644 mmap_info = PERF_MMAP_EVENT_BODY_DESC.Read(self.trace,
645 offset + self.header_size)
646 # Read null-padded filename.
647 filename = self.trace[offset + self.header_size + ctypes.sizeof(mmap_info):
648 offset + header.size].rstrip(chr(0))
649 mmap_info.filename = filename
650 return mmap_info
651
652 def ReadSample(self, header, offset):
653 sample = self.sample_event_body_desc.Read(self.trace,
654 offset + self.header_size)
655 if not self.callchain_supported:
656 return sample
657 sample.ips = []
658 offset += self.header_size + ctypes.sizeof(sample)
659 for _ in xrange(sample.nr):
660 sample.ips.append(
661 self.ip_struct.from_buffer(self.trace, offset).value)
662 offset += self.ip_size
663 return sample
664
665 def Dispose(self):
666 self.trace.close()
667 self.trace_file.close()
668
669 def _SampleEventBodyDesc(self, sample_type):
670 assert (sample_type & PERF_SAMPLE_READ) == 0, \
671 "Can't hande read format in samples"
672 fields = [(field, format)
673 for (field, format, bit) in PERF_SAMPLE_EVENT_BODY_FIELDS
674 if (bit & sample_type) != 0]
675 return Descriptor(fields)
676
677
678 OBJDUMP_SECTION_HEADER_RE = re.compile(
679 r"^\s*\d+\s(\.\S+)\s+[a-f0-9]")
680 OBJDUMP_SYMBOL_LINE_RE = re.compile(
681 r"^([a-f0-9]+)\s(.{7})\s(\S+)\s+([a-f0-9]+)\s+(?:\.hidden\s+)?(.*)$")
682 OBJDUMP_DYNAMIC_SYMBOLS_START_RE = re.compile(
683 r"^DYNAMIC SYMBOL TABLE")
684 KERNEL_ALLSYMS_FILE = "/proc/kallsyms"
685 PERF_KERNEL_ALLSYMS_RE = re.compile(
686 r".*kallsyms.*")
687 KERNEL_ALLSYMS_LINE_RE = re.compile(
688 r"^([a-f0-9]+)\s(?:t|T)\s(\S+)$")
689
690
691 class LibraryRepo(object):
692 def __init__(self):
693 self.infos = []
694 self.names = set()
695 self.ticks = {}
696
697 def Load(self, mmap_info, code_map, options):
698 # Skip kernel mmaps when requested using the fact that their tid
699 # is 0.
700 if mmap_info.tid == 0 and not options.kernel:
701 return True
702 if PERF_KERNEL_ALLSYMS_RE.match(mmap_info.filename):
703 return self._LoadKernelSymbols(code_map)
704 self.infos.append(mmap_info)
705 mmap_info.ticks = 0
706 mmap_info.unique_name = self._UniqueMmapName(mmap_info)
707 if not os.path.exists(mmap_info.filename):
708 return True
709 # Request section headers (-h), symbols (-t), and dynamic symbols
710 # (-T) from objdump.
711 # Unfortunately, section headers span two lines, so we have to
712 # keep the just seen section name (from the first line in each
713 # section header) in the after_section variable.
714 process = subprocess.Popen(
715 "%s -h -t -T -C %s" % (OBJDUMP_BIN, mmap_info.filename),
716 shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
717 pipe = process.stdout
718 after_section = None
719 code_sections = set()
720 reloc_sections = set()
721 dynamic = False
722 try:
723 for line in pipe:
724 if after_section:
725 if line.find("CODE") != -1:
726 code_sections.add(after_section)
727 if line.find("RELOC") != -1:
728 reloc_sections.add(after_section)
729 after_section = None
730 continue
731
732 match = OBJDUMP_SECTION_HEADER_RE.match(line)
733 if match:
734 after_section = match.group(1)
735 continue
736
737 if OBJDUMP_DYNAMIC_SYMBOLS_START_RE.match(line):
738 dynamic = True
739 continue
740
741 match = OBJDUMP_SYMBOL_LINE_RE.match(line)
742 if match:
743 start_address = int(match.group(1), 16)
744 origin_offset = start_address
745 flags = match.group(2)
746 section = match.group(3)
747 if section in code_sections:
748 if dynamic or section in reloc_sections:
749 start_address += mmap_info.addr
750 size = int(match.group(4), 16)
751 name = match.group(5)
752 origin = mmap_info.filename
753 code_map.Add(Code(name, start_address, start_address + size,
754 origin, origin_offset))
755 finally:
756 pipe.close()
757 assert process.wait() == 0, "Failed to objdump %s" % mmap_info.filename
758
759 def Tick(self, pc):
760 for i, mmap_info in enumerate(self.infos):
761 if mmap_info.addr <= pc < (mmap_info.addr + mmap_info.len):
762 mmap_info.ticks += 1
763 self.infos[0], self.infos[i] = mmap_info, self.infos[0]
764 return True
765 return False
766
767 def _UniqueMmapName(self, mmap_info):
768 name = mmap_info.filename
769 index = 1
770 while name in self.names:
771 name = "%s-%d" % (mmap_info.filename, index)
772 index += 1
773 self.names.add(name)
774 return name
775
776 def _LoadKernelSymbols(self, code_map):
777 if not os.path.exists(KERNEL_ALLSYMS_FILE):
778 print >>sys.stderr, "Warning: %s not found" % KERNEL_ALLSYMS_FILE
779 return False
780 kallsyms = open(KERNEL_ALLSYMS_FILE, "r")
781 code = None
782 for line in kallsyms:
783 match = KERNEL_ALLSYMS_LINE_RE.match(line)
784 if match:
785 start_address = int(match.group(1), 16)
786 end_address = start_address
787 name = match.group(2)
788 if code:
789 code.end_address = start_address
790 code_map.Add(code, 16)
791 code = Code(name, start_address, end_address, "kernel", 0)
792 return True
793
794
795 def PrintReport(code_map, library_repo, code_info, options):
796 print "Ticks per symbol:"
797 used_code = [code for code in code_map.UsedCode()]
798 used_code.sort(key=lambda x: x.self_ticks, reverse=True)
799 for i, code in enumerate(used_code):
800 print "%10d %s [%s]" % (code.self_ticks, code.FullName(), code.origin)
801 if options.disasm_all or i < options.disasm_top:
802 code.PrintAnnotated(code_info, options)
803 print
804 print "Ticks per library:"
805 mmap_infos = [m for m in library_repo.infos]
806 mmap_infos.sort(key=lambda m: m.ticks, reverse=True)
807 for mmap_info in mmap_infos:
808 print "%10d %s" % (mmap_info.ticks, mmap_info.unique_name)
809
810
811 def PrintDot(code_map, options):
812 print "digraph G {"
813 for code in code_map.UsedCode():
814 if code.self_ticks < 10:
815 continue
816 print "n%d [shape=box,label=\"%s\"];" % (code.id, code.name)
817 if code.callee_ticks:
818 for callee, ticks in code.callee_ticks.iteritems():
819 print "n%d -> n%d [label=\"%d\"];" % (code.id, callee.id, ticks)
820 print "}"
821
822
823 if __name__ == "__main__":
824 parser = optparse.OptionParser(USAGE)
825 parser.add_option("--snapshot-log",
826 default="obj/release/snapshot.log",
827 help="V8 snapshot log file name [default: %default]")
828 parser.add_option("--log",
829 default="v8.log",
830 help="V8 log file name [default: %default]")
831 parser.add_option("--snapshot",
832 default=False,
833 action="store_true",
834 help="process V8 snapshot log [default: %default]")
835 parser.add_option("--trace",
836 default="perf.data",
837 help="perf trace file name [default: %default]")
838 parser.add_option("--kernel",
839 default=False,
840 action="store_true",
841 help="process kernel entries [default: %default]")
842 parser.add_option("--disasm-top",
843 default=0,
844 type="int",
845 help=("number of top symbols to disassemble and annotate "
846 "[default: %default]"))
847 parser.add_option("--disasm-all",
848 default=False,
849 action="store_true",
850 help=("disassemble and annotate all used symbols "
851 "[default: %default]"))
852 parser.add_option("--dot",
853 default=False,
854 action="store_true",
855 help="produce dot output (WIP) [default: %default]")
856 parser.add_option("--quiet", "-q",
857 default=False,
858 action="store_true",
859 help="no auxiliary messages [default: %default]")
860 options, args = parser.parse_args()
861
862 if not options.quiet:
863 if options.snapshot:
864 print "V8 logs: %s, %s, %s.code" % (options.snapshot_log,
865 options.log,
866 options.log)
867 else:
868 print "V8 log: %s, %s.code (no snapshot)" % (options.log, options.log)
869 print "Perf trace file: %s" % options.trace
870
871 # Stats.
872 events = 0
873 ticks = 0
874 missed_ticks = 0
875 really_missed_ticks = 0
876 mmap_time = 0
877 sample_time = 0
878
879 # Initialize the log reader and get the code info.
880 code_map = CodeMap()
881 snapshot_name_map = {}
882 log_reader = CodeLogReader(log_name=options.log,
883 code_map=code_map,
884 is_snapshot=False,
885 snapshot_pos_to_name=snapshot_name_map)
886 code_info = log_reader.ReadCodeInfo()
887 if not options.quiet:
888 print "Generated code architecture: %s" % code_info.arch
889 print
890
891 # Process the snapshot log to fill the snapshot name map.
892 if options.snapshot:
893 snapshot_log_reader = CodeLogReader(log_name=options.snapshot_log,
894 code_map=CodeMap(),
895 is_snapshot=True,
896 snapshot_pos_to_name=snapshot_name_map)
897 while snapshot_log_reader.ReadUpToGC(code_info):
898 pass
899
900 # Process the code and trace logs.
901 library_repo = LibraryRepo()
902 log_reader.ReadUpToGC(code_info)
903 trace_reader = TraceReader(options.trace)
904 while True:
905 header, offset = trace_reader.ReadEventHeader()
906 if not header:
907 break
908 events += 1
909 if header.type == PERF_RECORD_MMAP:
910 start = time.time()
911 mmap_info = trace_reader.ReadMmap(header, offset)
912 if mmap_info.filename == V8_GC_FAKE_MMAP:
913 log_reader.ReadUpToGC()
914 else:
915 library_repo.Load(mmap_info, code_map, options)
916 mmap_time += time.time() - start
917 elif header.type == PERF_RECORD_SAMPLE:
918 ticks += 1
919 start = time.time()
920 sample = trace_reader.ReadSample(header, offset)
921 code = code_map.Find(sample.ip)
922 if code:
923 code.Tick(sample.ip)
924 else:
925 missed_ticks += 1
926 if not library_repo.Tick(sample.ip) and not code:
927 really_missed_ticks += 1
928 if trace_reader.callchain_supported:
929 for ip in sample.ips:
930 caller_code = code_map.Find(ip)
931 if caller_code:
932 if code:
933 caller_code.CalleeTick(code)
934 code = caller_code
935 sample_time += time.time() - start
936
937 if options.dot:
938 PrintDot(code_map, options)
939 else:
940 PrintReport(code_map, library_repo, code_info, options)
941
942 if not options.quiet:
943 print
944 print "Stats:"
945 print "%10d total trace events" % events
946 print "%10d total ticks" % ticks
947 print "%10d ticks not in symbols" % missed_ticks
948 print "%10d unaccounted ticks" % really_missed_ticks
949 print "%10d total symbols" % len([c for c in code_map.AllCode()])
950 print "%10d used symbols" % len([c for c in code_map.UsedCode()])
951 print "%9.2fs library processing time" % mmap_time
952 print "%9.2fs tick processing time" % sample_time
953
954 log_reader.Dispose()
955 trace_reader.Dispose()
OLDNEW
« no previous file with comments | « src/platform-win32.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698