OLD | NEW |
(Empty) | |
| 1 #!/usr/bin/env python |
| 2 # |
| 3 # Copyright 2010 the V8 project authors. All rights reserved. |
| 4 # Redistribution and use in source and binary forms, with or without |
| 5 # modification, are permitted provided that the following conditions are |
| 6 # met: |
| 7 # |
| 8 # * Redistributions of source code must retain the above copyright |
| 9 # notice, this list of conditions and the following disclaimer. |
| 10 # * Redistributions in binary form must reproduce the above |
| 11 # copyright notice, this list of conditions and the following |
| 12 # disclaimer in the documentation and/or other materials provided |
| 13 # with the distribution. |
| 14 # * Neither the name of Google Inc. nor the names of its |
| 15 # contributors may be used to endorse or promote products derived |
| 16 # from this software without specific prior written permission. |
| 17 # |
| 18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 19 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 20 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 21 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 22 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 23 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 24 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 25 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 26 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 28 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 29 |
| 30 import bisect |
| 31 import collections |
| 32 import ctypes |
| 33 import mmap |
| 34 import optparse |
| 35 import os |
| 36 import re |
| 37 import subprocess |
| 38 import sys |
| 39 import tempfile |
| 40 import time |
| 41 |
| 42 |
| 43 USAGE="""usage: %prog [OPTION]... |
| 44 |
| 45 Analyses V8 and perf logs to produce profiles. |
| 46 |
| 47 Perf logs can be collected using a command like: |
| 48 $ perf record -R -e cycles -c 10000 -f -i ./shell bench.js --ll-prof |
| 49 # -R: collect all data |
| 50 # -e cycles: use cpu-cycles event (run "perf list" for details) |
| 51 # -c 10000: write a sample after each 10000 events |
| 52 # -f: force output file overwrite |
| 53 # -i: limit profiling to our process and the kernel |
| 54 # --ll-prof shell flag enables the right V8 logs |
| 55 This will produce a binary trace file (perf.data) that %prog can analyse. |
| 56 |
| 57 Examples: |
| 58 # Print flat profile with annotated disassembly for the 10 top |
| 59 # symbols. Use default log names and include the snapshot log. |
| 60 $ %prog --snapshot --disasm-top=10 |
| 61 |
| 62 # Print flat profile with annotated disassembly for all used symbols. |
| 63 # Use default log names and include kernel symbols into analysis. |
| 64 $ %prog --disasm-all --kernel |
| 65 |
| 66 # Print flat profile. Use custom log names. |
| 67 $ %prog --log=foo.log --snapshot-log=snap-foo.log --trace=foo.data --snapshot |
| 68 """ |
| 69 |
| 70 |
| 71 # Must match kGcFakeMmap. |
| 72 V8_GC_FAKE_MMAP = "/tmp/__v8_gc__" |
| 73 |
| 74 JS_ORIGIN = "js" |
| 75 JS_SNAPSHOT_ORIGIN = "js-snapshot" |
| 76 |
| 77 # Avoid using the slow (google-specific) wrapper around objdump. |
| 78 OBJDUMP_BIN = "/usr/bin/objdump" |
| 79 if not os.path.exists(OBJDUMP_BIN): |
| 80 OBJDUMP_BIN = "objdump" |
| 81 |
| 82 |
| 83 class Code(object): |
| 84 """Code object.""" |
| 85 |
| 86 _COMMON_DISASM_OPTIONS = ["-M", "intel-mnemonic", "-C"] |
| 87 |
| 88 _DISASM_HEADER_RE = re.compile(r"[a-f0-9]+\s+<.*:$") |
| 89 _DISASM_LINE_RE = re.compile(r"\s*([a-f0-9]+):.*") |
| 90 |
| 91 # Keys must match constants in Logger::LogCodeInfo. |
| 92 _ARCH_MAP = { |
| 93 "ia32": "-m i386", |
| 94 "x64": "-m i386 -M x86-64", |
| 95 "arm": "-m arm" # Not supported by our objdump build. |
| 96 } |
| 97 |
| 98 _id = 0 |
| 99 |
| 100 def __init__(self, name, start_address, end_address, origin, origin_offset): |
| 101 self.id = Code._id |
| 102 Code._id += 1 |
| 103 self.name = name |
| 104 self.other_names = None |
| 105 self.start_address = start_address |
| 106 self.end_address = end_address |
| 107 self.origin = origin |
| 108 self.origin_offset = origin_offset |
| 109 self.self_ticks = 0 |
| 110 self.self_ticks_map = None |
| 111 self.callee_ticks = None |
| 112 |
| 113 def AddName(self, name): |
| 114 assert self.name != name |
| 115 if self.other_names is None: |
| 116 self.other_names = [name] |
| 117 return |
| 118 if not name in self.other_names: |
| 119 self.other_names.append(name) |
| 120 |
| 121 def FullName(self): |
| 122 if self.other_names is None: |
| 123 return self.name |
| 124 self.other_names.sort() |
| 125 return "%s (aka %s)" % (self.name, ", ".join(self.other_names)) |
| 126 |
| 127 def IsUsed(self): |
| 128 return self.self_ticks > 0 or self.callee_ticks is not None |
| 129 |
| 130 def Tick(self, pc): |
| 131 self.self_ticks += 1 |
| 132 if self.self_ticks_map is None: |
| 133 self.self_ticks_map = collections.defaultdict(lambda: 0) |
| 134 offset = pc - self.start_address |
| 135 self.self_ticks_map[offset] += 1 |
| 136 |
| 137 def CalleeTick(self, callee): |
| 138 if self.callee_ticks is None: |
| 139 self.callee_ticks = collections.defaultdict(lambda: 0) |
| 140 self.callee_ticks[callee] += 1 |
| 141 |
| 142 def PrintAnnotated(self, code_info, options): |
| 143 if self.self_ticks_map is None: |
| 144 ticks_map = [] |
| 145 else: |
| 146 ticks_map = self.self_ticks_map.items() |
| 147 # Convert the ticks map to offsets and counts arrays so that later |
| 148 # we can do binary search in the offsets array. |
| 149 ticks_map.sort(key=lambda t: t[0]) |
| 150 ticks_offsets = [t[0] for t in ticks_map] |
| 151 ticks_counts = [t[1] for t in ticks_map] |
| 152 # Get a list of disassembled lines and their addresses. |
| 153 lines = [] |
| 154 for line in self._GetDisasmLines(code_info, options): |
| 155 match = Code._DISASM_LINE_RE.match(line) |
| 156 if match: |
| 157 line_address = int(match.group(1), 16) |
| 158 lines.append((line_address, line)) |
| 159 if len(lines) == 0: |
| 160 return |
| 161 # Print annotated lines. |
| 162 address = lines[0][0] |
| 163 total_count = 0 |
| 164 for i in xrange(len(lines)): |
| 165 start_offset = lines[i][0] - address |
| 166 if i == len(lines) - 1: |
| 167 end_offset = self.end_address - self.start_address |
| 168 else: |
| 169 end_offset = lines[i + 1][0] - address |
| 170 # Ticks (reported pc values) are not always precise, i.e. not |
| 171 # necessarily point at instruction starts. So we have to search |
| 172 # for ticks that touch the current instruction line. |
| 173 j = bisect.bisect_left(ticks_offsets, end_offset) |
| 174 count = 0 |
| 175 for offset, cnt in reversed(zip(ticks_offsets[:j], ticks_counts[:j])): |
| 176 if offset < start_offset: |
| 177 break |
| 178 count += cnt |
| 179 total_count += count |
| 180 count = 100.0 * count / self.self_ticks |
| 181 if count >= 0.01: |
| 182 print "%15.2f %s" % (count, lines[i][1]) |
| 183 else: |
| 184 print "%s %s" % (" " * 15, lines[i][1]) |
| 185 print |
| 186 assert total_count == self.self_ticks, \ |
| 187 "Lost ticks (%d != %d) in %s" % (total_count, self.self_ticks, self) |
| 188 |
| 189 def __str__(self): |
| 190 return "%s [0x%x, 0x%x) size: %d origin: %s" % ( |
| 191 self.name, |
| 192 self.start_address, |
| 193 self.end_address, |
| 194 self.end_address - self.start_address, |
| 195 self.origin) |
| 196 |
| 197 def _GetDisasmLines(self, code_info, options): |
| 198 tmp_name = None |
| 199 if self.origin == JS_ORIGIN or self.origin == JS_SNAPSHOT_ORIGIN: |
| 200 assert code_info.arch in Code._ARCH_MAP, \ |
| 201 "Unsupported architecture '%s'" % arch |
| 202 arch_flags = Code._ARCH_MAP[code_info.arch] |
| 203 # Create a temporary file just with this code object. |
| 204 tmp_name = tempfile.mktemp(".v8code") |
| 205 size = self.end_address - self.start_address |
| 206 command = "dd if=%s.code of=%s bs=1 count=%d skip=%d && " \ |
| 207 "%s %s -D -b binary %s %s" % ( |
| 208 options.log, tmp_name, size, self.origin_offset, |
| 209 OBJDUMP_BIN, ' '.join(Code._COMMON_DISASM_OPTIONS), arch_flags, |
| 210 tmp_name) |
| 211 else: |
| 212 command = "%s %s --start-address=%d --stop-address=%d -d %s " % ( |
| 213 OBJDUMP_BIN, ' '.join(Code._COMMON_DISASM_OPTIONS), |
| 214 self.origin_offset, |
| 215 self.origin_offset + self.end_address - self.start_address, |
| 216 self.origin) |
| 217 process = subprocess.Popen(command, |
| 218 shell=True, |
| 219 stdout=subprocess.PIPE, |
| 220 stderr=subprocess.STDOUT) |
| 221 out, err = process.communicate() |
| 222 lines = out.split("\n") |
| 223 header_line = 0 |
| 224 for i, line in enumerate(lines): |
| 225 if Code._DISASM_HEADER_RE.match(line): |
| 226 header_line = i |
| 227 break |
| 228 if tmp_name: |
| 229 os.unlink(tmp_name) |
| 230 return lines[header_line + 1:] |
| 231 |
| 232 |
| 233 class CodePage(object): |
| 234 """Group of adjacent code objects.""" |
| 235 |
| 236 SHIFT = 12 # 4K pages |
| 237 SIZE = (1 << SHIFT) |
| 238 MASK = ~(SIZE - 1) |
| 239 |
| 240 @staticmethod |
| 241 def PageAddress(address): |
| 242 return address & CodePage.MASK |
| 243 |
| 244 @staticmethod |
| 245 def PageId(address): |
| 246 return address >> CodePage.SHIFT |
| 247 |
| 248 @staticmethod |
| 249 def PageAddressFromId(id): |
| 250 return id << CodePage.SHIFT |
| 251 |
| 252 def __init__(self, address): |
| 253 self.address = address |
| 254 self.code_objects = [] |
| 255 |
| 256 def Add(self, code): |
| 257 self.code_objects.append(code) |
| 258 |
| 259 def Remove(self, code): |
| 260 self.code_objects.remove(code) |
| 261 |
| 262 def Find(self, pc): |
| 263 code_objects = self.code_objects |
| 264 for i, code in enumerate(code_objects): |
| 265 if code.start_address <= pc < code.end_address: |
| 266 code_objects[0], code_objects[i] = code, code_objects[0] |
| 267 return code |
| 268 return None |
| 269 |
| 270 def __iter__(self): |
| 271 return self.code_objects.__iter__() |
| 272 |
| 273 |
| 274 class CodeMap(object): |
| 275 """Code object map.""" |
| 276 |
| 277 def __init__(self): |
| 278 self.pages = {} |
| 279 self.min_address = 1 << 64 |
| 280 self.max_address = -1 |
| 281 |
| 282 def Add(self, code, max_pages=-1): |
| 283 page_id = CodePage.PageId(code.start_address) |
| 284 limit_id = CodePage.PageId(code.end_address + CodePage.SIZE - 1) |
| 285 pages = 0 |
| 286 while page_id < limit_id: |
| 287 if max_pages >= 0 and pages > max_pages: |
| 288 print >>sys.stderr, \ |
| 289 "Warning: page limit (%d) reached for %s [%s]" % ( |
| 290 max_pages, code.name, code.origin) |
| 291 break |
| 292 if page_id in self.pages: |
| 293 page = self.pages[page_id] |
| 294 else: |
| 295 page = CodePage(CodePage.PageAddressFromId(page_id)) |
| 296 self.pages[page_id] = page |
| 297 page.Add(code) |
| 298 page_id += 1 |
| 299 pages += 1 |
| 300 self.min_address = min(self.min_address, code.start_address) |
| 301 self.max_address = max(self.max_address, code.end_address) |
| 302 |
| 303 def Remove(self, code): |
| 304 page_id = CodePage.PageId(code.start_address) |
| 305 limit_id = CodePage.PageId(code.end_address + CodePage.SIZE - 1) |
| 306 removed = False |
| 307 while page_id < limit_id: |
| 308 if page_id not in self.pages: |
| 309 page_id += 1 |
| 310 continue |
| 311 page = self.pages[page_id] |
| 312 page.Remove(code) |
| 313 removed = True |
| 314 page_id += 1 |
| 315 return removed |
| 316 |
| 317 def AllCode(self): |
| 318 for page in self.pages.itervalues(): |
| 319 for code in page: |
| 320 if CodePage.PageAddress(code.start_address) == page.address: |
| 321 yield code |
| 322 |
| 323 def UsedCode(self): |
| 324 for code in self.AllCode(): |
| 325 if code.IsUsed(): |
| 326 yield code |
| 327 |
| 328 def Print(self): |
| 329 for code in self.AllCode(): |
| 330 print code |
| 331 |
| 332 def Find(self, pc): |
| 333 if pc < self.min_address or pc >= self.max_address: |
| 334 return None |
| 335 page_id = CodePage.PageId(pc) |
| 336 if page_id not in self.pages: |
| 337 return None |
| 338 return self.pages[page_id].Find(pc) |
| 339 |
| 340 |
| 341 class CodeInfo(object): |
| 342 """Generic info about generated code objects.""" |
| 343 |
| 344 def __init__(self, arch, header_size): |
| 345 self.arch = arch |
| 346 self.header_size = header_size |
| 347 |
| 348 |
| 349 class CodeLogReader(object): |
| 350 """V8 code event log reader.""" |
| 351 |
| 352 _CODE_INFO_RE = re.compile( |
| 353 r"code-info,([^,]+),(\d+)") |
| 354 |
| 355 _CODE_CREATE_RE = re.compile( |
| 356 r"code-creation,([^,]+),(0x[a-f0-9]+),(\d+),\"([^\"]*)\"(?:,(\d+))?") |
| 357 |
| 358 _CODE_MOVE_RE = re.compile( |
| 359 r"code-move,(0x[a-f0-9]+),(0x[a-f0-9]+)") |
| 360 |
| 361 _CODE_DELETE_RE = re.compile( |
| 362 r"code-delete,(0x[a-f0-9]+)") |
| 363 |
| 364 _SNAPSHOT_POS_RE = re.compile( |
| 365 r"snapshot-pos,(0x[a-f0-9]+),(\d+)") |
| 366 |
| 367 _CODE_MOVING_GC = "code-moving-gc" |
| 368 |
| 369 def __init__(self, log_name, code_map, is_snapshot, snapshot_pos_to_name): |
| 370 self.log = open(log_name, "r") |
| 371 self.code_map = code_map |
| 372 self.is_snapshot = is_snapshot |
| 373 self.snapshot_pos_to_name = snapshot_pos_to_name |
| 374 self.address_to_snapshot_name = {} |
| 375 |
| 376 def ReadCodeInfo(self): |
| 377 line = self.log.readline() or "" |
| 378 match = CodeLogReader._CODE_INFO_RE.match(line) |
| 379 assert match, "No code info in log" |
| 380 return CodeInfo(arch=match.group(1), header_size=int(match.group(2))) |
| 381 |
| 382 def ReadUpToGC(self, code_info): |
| 383 made_progress = False |
| 384 code_header_size = code_info.header_size |
| 385 while True: |
| 386 line = self.log.readline() |
| 387 if not line: |
| 388 return made_progress |
| 389 made_progress = True |
| 390 |
| 391 if line.startswith(CodeLogReader._CODE_MOVING_GC): |
| 392 self.address_to_snapshot_name.clear() |
| 393 return made_progress |
| 394 |
| 395 match = CodeLogReader._CODE_CREATE_RE.match(line) |
| 396 if match: |
| 397 start_address = int(match.group(2), 16) + code_header_size |
| 398 end_address = start_address + int(match.group(3)) - code_header_size |
| 399 if start_address in self.address_to_snapshot_name: |
| 400 name = self.address_to_snapshot_name[start_address] |
| 401 origin = JS_SNAPSHOT_ORIGIN |
| 402 else: |
| 403 name = "%s:%s" % (match.group(1), match.group(4)) |
| 404 origin = JS_ORIGIN |
| 405 if self.is_snapshot: |
| 406 origin_offset = 0 |
| 407 else: |
| 408 origin_offset = int(match.group(5)) |
| 409 code = Code(name, start_address, end_address, origin, origin_offset) |
| 410 conficting_code = self.code_map.Find(start_address) |
| 411 if conficting_code: |
| 412 CodeLogReader._HandleCodeConflict(conficting_code, code) |
| 413 # TODO(vitalyr): this warning is too noisy because of our |
| 414 # attempts to reconstruct code log from the snapshot. |
| 415 # print >>sys.stderr, \ |
| 416 # "Warning: Skipping duplicate code log entry %s" % code |
| 417 continue |
| 418 self.code_map.Add(code) |
| 419 continue |
| 420 |
| 421 match = CodeLogReader._CODE_MOVE_RE.match(line) |
| 422 if match: |
| 423 old_start_address = int(match.group(1), 16) + code_header_size |
| 424 new_start_address = int(match.group(2), 16) + code_header_size |
| 425 if old_start_address == new_start_address: |
| 426 # Skip useless code move entries. |
| 427 continue |
| 428 code = self.code_map.Find(old_start_address) |
| 429 if not code: |
| 430 print >>sys.stderr, "Warning: Not found %x" % old_start_address |
| 431 continue |
| 432 assert code.start_address == old_start_address, \ |
| 433 "Inexact move address %x for %s" % (old_start_address, code) |
| 434 self.code_map.Remove(code) |
| 435 size = code.end_address - code.start_address |
| 436 code.start_address = new_start_address |
| 437 code.end_address = new_start_address + size |
| 438 self.code_map.Add(code) |
| 439 continue |
| 440 |
| 441 match = CodeLogReader._CODE_DELETE_RE.match(line) |
| 442 if match: |
| 443 old_start_address = int(match.group(1), 16) + code_header_size |
| 444 code = self.code_map.Find(old_start_address) |
| 445 if not code: |
| 446 print >>sys.stderr, "Warning: Not found %x" % old_start_address |
| 447 continue |
| 448 assert code.start_address == old_start_address, \ |
| 449 "Inexact delete address %x for %s" % (old_start_address, code) |
| 450 self.code_map.Remove(code) |
| 451 continue |
| 452 |
| 453 match = CodeLogReader._SNAPSHOT_POS_RE.match(line) |
| 454 if match: |
| 455 start_address = int(match.group(1), 16) + code_header_size |
| 456 snapshot_pos = int(match.group(2)) |
| 457 if self.is_snapshot: |
| 458 code = self.code_map.Find(start_address) |
| 459 if code: |
| 460 assert code.start_address == start_address, \ |
| 461 "Inexact snapshot address %x for %s" % (start_address, code) |
| 462 self.snapshot_pos_to_name[snapshot_pos] = code.name |
| 463 else: |
| 464 if snapshot_pos in self.snapshot_pos_to_name: |
| 465 self.address_to_snapshot_name[start_address] = \ |
| 466 self.snapshot_pos_to_name[snapshot_pos] |
| 467 |
| 468 def Dispose(self): |
| 469 self.log.close() |
| 470 |
| 471 @staticmethod |
| 472 def _HandleCodeConflict(old_code, new_code): |
| 473 assert (old_code.start_address == new_code.start_address and |
| 474 old_code.end_address == new_code.end_address), \ |
| 475 "Conficting code log entries %s and %s" % (old_code, new_code) |
| 476 CodeLogReader._UpdateNames(old_code, new_code) |
| 477 |
| 478 @staticmethod |
| 479 def _UpdateNames(old_code, new_code): |
| 480 if old_code.name == new_code.name: |
| 481 return |
| 482 # Kludge: there are code objects with custom names that don't |
| 483 # match their flags. |
| 484 misnamed_code = set(["Builtin:CpuFeatures::Probe"]) |
| 485 if old_code.name in misnamed_code: |
| 486 return |
| 487 # Code object may be shared by a few functions. Collect the full |
| 488 # set of names. |
| 489 old_code.AddName(new_code.name) |
| 490 |
| 491 |
| 492 class Descriptor(object): |
| 493 """Descriptor of a structure in the binary trace log.""" |
| 494 |
| 495 CTYPE_MAP = { |
| 496 "u16": ctypes.c_uint16, |
| 497 "u32": ctypes.c_uint32, |
| 498 "u64": ctypes.c_uint64 |
| 499 } |
| 500 |
| 501 def __init__(self, fields): |
| 502 class TraceItem(ctypes.Structure): |
| 503 _fields_ = Descriptor.CtypesFields(fields) |
| 504 |
| 505 def __str__(self): |
| 506 return ", ".join("%s: %s" % (field, self.__getattribute__(field)) |
| 507 for field, _ in TraceItem._fields_) |
| 508 |
| 509 self.ctype = TraceItem |
| 510 |
| 511 def Read(self, trace, offset): |
| 512 return self.ctype.from_buffer(trace, offset) |
| 513 |
| 514 @staticmethod |
| 515 def CtypesFields(fields): |
| 516 return [(field, Descriptor.CTYPE_MAP[format]) for (field, format) in fields] |
| 517 |
| 518 |
| 519 # Please see http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=
tree;f=tools/perf |
| 520 # for the gory details. |
| 521 |
| 522 |
| 523 TRACE_HEADER_DESC = Descriptor([ |
| 524 ("magic", "u64"), |
| 525 ("size", "u64"), |
| 526 ("attr_size", "u64"), |
| 527 ("attrs_offset", "u64"), |
| 528 ("attrs_size", "u64"), |
| 529 ("data_offset", "u64"), |
| 530 ("data_size", "u64"), |
| 531 ("event_types_offset", "u64"), |
| 532 ("event_types_size", "u64") |
| 533 ]) |
| 534 |
| 535 |
| 536 PERF_EVENT_ATTR_DESC = Descriptor([ |
| 537 ("type", "u32"), |
| 538 ("size", "u32"), |
| 539 ("config", "u64"), |
| 540 ("sample_period_or_freq", "u64"), |
| 541 ("sample_type", "u64"), |
| 542 ("read_format", "u64"), |
| 543 ("flags", "u64"), |
| 544 ("wakeup_events_or_watermark", "u32"), |
| 545 ("bt_type", "u32"), |
| 546 ("bp_addr", "u64"), |
| 547 ("bp_len", "u64"), |
| 548 ]) |
| 549 |
| 550 |
| 551 PERF_EVENT_HEADER_DESC = Descriptor([ |
| 552 ("type", "u32"), |
| 553 ("misc", "u16"), |
| 554 ("size", "u16") |
| 555 ]) |
| 556 |
| 557 |
| 558 PERF_MMAP_EVENT_BODY_DESC = Descriptor([ |
| 559 ("pid", "u32"), |
| 560 ("tid", "u32"), |
| 561 ("addr", "u64"), |
| 562 ("len", "u64"), |
| 563 ("pgoff", "u64") |
| 564 ]) |
| 565 |
| 566 |
| 567 # perf_event_attr.sample_type bits control the set of |
| 568 # perf_sample_event fields. |
| 569 PERF_SAMPLE_IP = 1 << 0 |
| 570 PERF_SAMPLE_TID = 1 << 1 |
| 571 PERF_SAMPLE_TIME = 1 << 2 |
| 572 PERF_SAMPLE_ADDR = 1 << 3 |
| 573 PERF_SAMPLE_READ = 1 << 4 |
| 574 PERF_SAMPLE_CALLCHAIN = 1 << 5 |
| 575 PERF_SAMPLE_ID = 1 << 6 |
| 576 PERF_SAMPLE_CPU = 1 << 7 |
| 577 PERF_SAMPLE_PERIOD = 1 << 8 |
| 578 PERF_SAMPLE_STREAM_ID = 1 << 9 |
| 579 PERF_SAMPLE_RAW = 1 << 10 |
| 580 |
| 581 |
| 582 PERF_SAMPLE_EVENT_BODY_FIELDS = [ |
| 583 ("ip", "u64", PERF_SAMPLE_IP), |
| 584 ("pid", "u32", PERF_SAMPLE_TID), |
| 585 ("tid", "u32", PERF_SAMPLE_TID), |
| 586 ("time", "u64", PERF_SAMPLE_TIME), |
| 587 ("addr", "u64", PERF_SAMPLE_ADDR), |
| 588 ("id", "u64", PERF_SAMPLE_ID), |
| 589 ("stream_id", "u64", PERF_SAMPLE_STREAM_ID), |
| 590 ("cpu", "u32", PERF_SAMPLE_CPU), |
| 591 ("res", "u32", PERF_SAMPLE_CPU), |
| 592 ("period", "u64", PERF_SAMPLE_PERIOD), |
| 593 # Don't want to handle read format that comes after the period and |
| 594 # before the callchain and has variable size. |
| 595 ("nr", "u64", PERF_SAMPLE_CALLCHAIN) |
| 596 # Raw data follows the callchain and is ignored. |
| 597 ] |
| 598 |
| 599 |
| 600 PERF_SAMPLE_EVENT_IP_FORMAT = "u64" |
| 601 |
| 602 |
| 603 PERF_RECORD_MMAP = 1 |
| 604 PERF_RECORD_SAMPLE = 9 |
| 605 |
| 606 |
| 607 class TraceReader(object): |
| 608 """Perf (linux-2.6/tools/perf) trace file reader.""" |
| 609 |
| 610 _TRACE_HEADER_MAGIC = 4993446653023372624 |
| 611 |
| 612 def __init__(self, trace_name): |
| 613 self.trace_file = open(trace_name, "r") |
| 614 self.trace = mmap.mmap(self.trace_file.fileno(), 0, mmap.MAP_PRIVATE) |
| 615 self.trace_header = TRACE_HEADER_DESC.Read(self.trace, 0) |
| 616 if self.trace_header.magic != TraceReader._TRACE_HEADER_MAGIC: |
| 617 print >>sys.stderr, "Warning: unsupported trace header magic" |
| 618 self.offset = self.trace_header.data_offset |
| 619 self.limit = self.trace_header.data_offset + self.trace_header.data_size |
| 620 assert self.limit <= self.trace.size(), \ |
| 621 "Trace data limit exceeds trace file size" |
| 622 self.header_size = ctypes.sizeof(PERF_EVENT_HEADER_DESC.ctype) |
| 623 assert self.trace_header.attrs_size != 0, \ |
| 624 "No perf event attributes found in the trace" |
| 625 perf_event_attr = PERF_EVENT_ATTR_DESC.Read(self.trace, |
| 626 self.trace_header.attrs_offset) |
| 627 self.sample_event_body_desc = self._SampleEventBodyDesc( |
| 628 perf_event_attr.sample_type) |
| 629 self.callchain_supported = \ |
| 630 (perf_event_attr.sample_type & PERF_SAMPLE_CALLCHAIN) != 0 |
| 631 if self.callchain_supported: |
| 632 self.ip_struct = Descriptor.CTYPE_MAP[PERF_SAMPLE_EVENT_IP_FORMAT] |
| 633 self.ip_size = ctypes.sizeof(self.ip_struct) |
| 634 |
| 635 def ReadEventHeader(self): |
| 636 if self.offset >= self.limit: |
| 637 return None, 0 |
| 638 offset = self.offset |
| 639 header = PERF_EVENT_HEADER_DESC.Read(self.trace, self.offset) |
| 640 self.offset += header.size |
| 641 return header, offset |
| 642 |
| 643 def ReadMmap(self, header, offset): |
| 644 mmap_info = PERF_MMAP_EVENT_BODY_DESC.Read(self.trace, |
| 645 offset + self.header_size) |
| 646 # Read null-padded filename. |
| 647 filename = self.trace[offset + self.header_size + ctypes.sizeof(mmap_info): |
| 648 offset + header.size].rstrip(chr(0)) |
| 649 mmap_info.filename = filename |
| 650 return mmap_info |
| 651 |
| 652 def ReadSample(self, header, offset): |
| 653 sample = self.sample_event_body_desc.Read(self.trace, |
| 654 offset + self.header_size) |
| 655 if not self.callchain_supported: |
| 656 return sample |
| 657 sample.ips = [] |
| 658 offset += self.header_size + ctypes.sizeof(sample) |
| 659 for _ in xrange(sample.nr): |
| 660 sample.ips.append( |
| 661 self.ip_struct.from_buffer(self.trace, offset).value) |
| 662 offset += self.ip_size |
| 663 return sample |
| 664 |
| 665 def Dispose(self): |
| 666 self.trace.close() |
| 667 self.trace_file.close() |
| 668 |
| 669 def _SampleEventBodyDesc(self, sample_type): |
| 670 assert (sample_type & PERF_SAMPLE_READ) == 0, \ |
| 671 "Can't hande read format in samples" |
| 672 fields = [(field, format) |
| 673 for (field, format, bit) in PERF_SAMPLE_EVENT_BODY_FIELDS |
| 674 if (bit & sample_type) != 0] |
| 675 return Descriptor(fields) |
| 676 |
| 677 |
| 678 OBJDUMP_SECTION_HEADER_RE = re.compile( |
| 679 r"^\s*\d+\s(\.\S+)\s+[a-f0-9]") |
| 680 OBJDUMP_SYMBOL_LINE_RE = re.compile( |
| 681 r"^([a-f0-9]+)\s(.{7})\s(\S+)\s+([a-f0-9]+)\s+(?:\.hidden\s+)?(.*)$") |
| 682 OBJDUMP_DYNAMIC_SYMBOLS_START_RE = re.compile( |
| 683 r"^DYNAMIC SYMBOL TABLE") |
| 684 KERNEL_ALLSYMS_FILE = "/proc/kallsyms" |
| 685 PERF_KERNEL_ALLSYMS_RE = re.compile( |
| 686 r".*kallsyms.*") |
| 687 KERNEL_ALLSYMS_LINE_RE = re.compile( |
| 688 r"^([a-f0-9]+)\s(?:t|T)\s(\S+)$") |
| 689 |
| 690 |
| 691 class LibraryRepo(object): |
| 692 def __init__(self): |
| 693 self.infos = [] |
| 694 self.names = set() |
| 695 self.ticks = {} |
| 696 |
| 697 def Load(self, mmap_info, code_map, options): |
| 698 # Skip kernel mmaps when requested using the fact that their tid |
| 699 # is 0. |
| 700 if mmap_info.tid == 0 and not options.kernel: |
| 701 return True |
| 702 if PERF_KERNEL_ALLSYMS_RE.match(mmap_info.filename): |
| 703 return self._LoadKernelSymbols(code_map) |
| 704 self.infos.append(mmap_info) |
| 705 mmap_info.ticks = 0 |
| 706 mmap_info.unique_name = self._UniqueMmapName(mmap_info) |
| 707 if not os.path.exists(mmap_info.filename): |
| 708 return True |
| 709 # Request section headers (-h), symbols (-t), and dynamic symbols |
| 710 # (-T) from objdump. |
| 711 # Unfortunately, section headers span two lines, so we have to |
| 712 # keep the just seen section name (from the first line in each |
| 713 # section header) in the after_section variable. |
| 714 process = subprocess.Popen( |
| 715 "%s -h -t -T -C %s" % (OBJDUMP_BIN, mmap_info.filename), |
| 716 shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) |
| 717 pipe = process.stdout |
| 718 after_section = None |
| 719 code_sections = set() |
| 720 reloc_sections = set() |
| 721 dynamic = False |
| 722 try: |
| 723 for line in pipe: |
| 724 if after_section: |
| 725 if line.find("CODE") != -1: |
| 726 code_sections.add(after_section) |
| 727 if line.find("RELOC") != -1: |
| 728 reloc_sections.add(after_section) |
| 729 after_section = None |
| 730 continue |
| 731 |
| 732 match = OBJDUMP_SECTION_HEADER_RE.match(line) |
| 733 if match: |
| 734 after_section = match.group(1) |
| 735 continue |
| 736 |
| 737 if OBJDUMP_DYNAMIC_SYMBOLS_START_RE.match(line): |
| 738 dynamic = True |
| 739 continue |
| 740 |
| 741 match = OBJDUMP_SYMBOL_LINE_RE.match(line) |
| 742 if match: |
| 743 start_address = int(match.group(1), 16) |
| 744 origin_offset = start_address |
| 745 flags = match.group(2) |
| 746 section = match.group(3) |
| 747 if section in code_sections: |
| 748 if dynamic or section in reloc_sections: |
| 749 start_address += mmap_info.addr |
| 750 size = int(match.group(4), 16) |
| 751 name = match.group(5) |
| 752 origin = mmap_info.filename |
| 753 code_map.Add(Code(name, start_address, start_address + size, |
| 754 origin, origin_offset)) |
| 755 finally: |
| 756 pipe.close() |
| 757 assert process.wait() == 0, "Failed to objdump %s" % mmap_info.filename |
| 758 |
| 759 def Tick(self, pc): |
| 760 for i, mmap_info in enumerate(self.infos): |
| 761 if mmap_info.addr <= pc < (mmap_info.addr + mmap_info.len): |
| 762 mmap_info.ticks += 1 |
| 763 self.infos[0], self.infos[i] = mmap_info, self.infos[0] |
| 764 return True |
| 765 return False |
| 766 |
| 767 def _UniqueMmapName(self, mmap_info): |
| 768 name = mmap_info.filename |
| 769 index = 1 |
| 770 while name in self.names: |
| 771 name = "%s-%d" % (mmap_info.filename, index) |
| 772 index += 1 |
| 773 self.names.add(name) |
| 774 return name |
| 775 |
| 776 def _LoadKernelSymbols(self, code_map): |
| 777 if not os.path.exists(KERNEL_ALLSYMS_FILE): |
| 778 print >>sys.stderr, "Warning: %s not found" % KERNEL_ALLSYMS_FILE |
| 779 return False |
| 780 kallsyms = open(KERNEL_ALLSYMS_FILE, "r") |
| 781 code = None |
| 782 for line in kallsyms: |
| 783 match = KERNEL_ALLSYMS_LINE_RE.match(line) |
| 784 if match: |
| 785 start_address = int(match.group(1), 16) |
| 786 end_address = start_address |
| 787 name = match.group(2) |
| 788 if code: |
| 789 code.end_address = start_address |
| 790 code_map.Add(code, 16) |
| 791 code = Code(name, start_address, end_address, "kernel", 0) |
| 792 return True |
| 793 |
| 794 |
| 795 def PrintReport(code_map, library_repo, code_info, options): |
| 796 print "Ticks per symbol:" |
| 797 used_code = [code for code in code_map.UsedCode()] |
| 798 used_code.sort(key=lambda x: x.self_ticks, reverse=True) |
| 799 for i, code in enumerate(used_code): |
| 800 print "%10d %s [%s]" % (code.self_ticks, code.FullName(), code.origin) |
| 801 if options.disasm_all or i < options.disasm_top: |
| 802 code.PrintAnnotated(code_info, options) |
| 803 print |
| 804 print "Ticks per library:" |
| 805 mmap_infos = [m for m in library_repo.infos] |
| 806 mmap_infos.sort(key=lambda m: m.ticks, reverse=True) |
| 807 for mmap_info in mmap_infos: |
| 808 print "%10d %s" % (mmap_info.ticks, mmap_info.unique_name) |
| 809 |
| 810 |
| 811 def PrintDot(code_map, options): |
| 812 print "digraph G {" |
| 813 for code in code_map.UsedCode(): |
| 814 if code.self_ticks < 10: |
| 815 continue |
| 816 print "n%d [shape=box,label=\"%s\"];" % (code.id, code.name) |
| 817 if code.callee_ticks: |
| 818 for callee, ticks in code.callee_ticks.iteritems(): |
| 819 print "n%d -> n%d [label=\"%d\"];" % (code.id, callee.id, ticks) |
| 820 print "}" |
| 821 |
| 822 |
| 823 if __name__ == "__main__": |
| 824 parser = optparse.OptionParser(USAGE) |
| 825 parser.add_option("--snapshot-log", |
| 826 default="obj/release/snapshot.log", |
| 827 help="V8 snapshot log file name [default: %default]") |
| 828 parser.add_option("--log", |
| 829 default="v8.log", |
| 830 help="V8 log file name [default: %default]") |
| 831 parser.add_option("--snapshot", |
| 832 default=False, |
| 833 action="store_true", |
| 834 help="process V8 snapshot log [default: %default]") |
| 835 parser.add_option("--trace", |
| 836 default="perf.data", |
| 837 help="perf trace file name [default: %default]") |
| 838 parser.add_option("--kernel", |
| 839 default=False, |
| 840 action="store_true", |
| 841 help="process kernel entries [default: %default]") |
| 842 parser.add_option("--disasm-top", |
| 843 default=0, |
| 844 type="int", |
| 845 help=("number of top symbols to disassemble and annotate " |
| 846 "[default: %default]")) |
| 847 parser.add_option("--disasm-all", |
| 848 default=False, |
| 849 action="store_true", |
| 850 help=("disassemble and annotate all used symbols " |
| 851 "[default: %default]")) |
| 852 parser.add_option("--dot", |
| 853 default=False, |
| 854 action="store_true", |
| 855 help="produce dot output (WIP) [default: %default]") |
| 856 parser.add_option("--quiet", "-q", |
| 857 default=False, |
| 858 action="store_true", |
| 859 help="no auxiliary messages [default: %default]") |
| 860 options, args = parser.parse_args() |
| 861 |
| 862 if not options.quiet: |
| 863 if options.snapshot: |
| 864 print "V8 logs: %s, %s, %s.code" % (options.snapshot_log, |
| 865 options.log, |
| 866 options.log) |
| 867 else: |
| 868 print "V8 log: %s, %s.code (no snapshot)" % (options.log, options.log) |
| 869 print "Perf trace file: %s" % options.trace |
| 870 |
| 871 # Stats. |
| 872 events = 0 |
| 873 ticks = 0 |
| 874 missed_ticks = 0 |
| 875 really_missed_ticks = 0 |
| 876 mmap_time = 0 |
| 877 sample_time = 0 |
| 878 |
| 879 # Initialize the log reader and get the code info. |
| 880 code_map = CodeMap() |
| 881 snapshot_name_map = {} |
| 882 log_reader = CodeLogReader(log_name=options.log, |
| 883 code_map=code_map, |
| 884 is_snapshot=False, |
| 885 snapshot_pos_to_name=snapshot_name_map) |
| 886 code_info = log_reader.ReadCodeInfo() |
| 887 if not options.quiet: |
| 888 print "Generated code architecture: %s" % code_info.arch |
| 889 print |
| 890 |
| 891 # Process the snapshot log to fill the snapshot name map. |
| 892 if options.snapshot: |
| 893 snapshot_log_reader = CodeLogReader(log_name=options.snapshot_log, |
| 894 code_map=CodeMap(), |
| 895 is_snapshot=True, |
| 896 snapshot_pos_to_name=snapshot_name_map) |
| 897 while snapshot_log_reader.ReadUpToGC(code_info): |
| 898 pass |
| 899 |
| 900 # Process the code and trace logs. |
| 901 library_repo = LibraryRepo() |
| 902 log_reader.ReadUpToGC(code_info) |
| 903 trace_reader = TraceReader(options.trace) |
| 904 while True: |
| 905 header, offset = trace_reader.ReadEventHeader() |
| 906 if not header: |
| 907 break |
| 908 events += 1 |
| 909 if header.type == PERF_RECORD_MMAP: |
| 910 start = time.time() |
| 911 mmap_info = trace_reader.ReadMmap(header, offset) |
| 912 if mmap_info.filename == V8_GC_FAKE_MMAP: |
| 913 log_reader.ReadUpToGC() |
| 914 else: |
| 915 library_repo.Load(mmap_info, code_map, options) |
| 916 mmap_time += time.time() - start |
| 917 elif header.type == PERF_RECORD_SAMPLE: |
| 918 ticks += 1 |
| 919 start = time.time() |
| 920 sample = trace_reader.ReadSample(header, offset) |
| 921 code = code_map.Find(sample.ip) |
| 922 if code: |
| 923 code.Tick(sample.ip) |
| 924 else: |
| 925 missed_ticks += 1 |
| 926 if not library_repo.Tick(sample.ip) and not code: |
| 927 really_missed_ticks += 1 |
| 928 if trace_reader.callchain_supported: |
| 929 for ip in sample.ips: |
| 930 caller_code = code_map.Find(ip) |
| 931 if caller_code: |
| 932 if code: |
| 933 caller_code.CalleeTick(code) |
| 934 code = caller_code |
| 935 sample_time += time.time() - start |
| 936 |
| 937 if options.dot: |
| 938 PrintDot(code_map, options) |
| 939 else: |
| 940 PrintReport(code_map, library_repo, code_info, options) |
| 941 |
| 942 if not options.quiet: |
| 943 print |
| 944 print "Stats:" |
| 945 print "%10d total trace events" % events |
| 946 print "%10d total ticks" % ticks |
| 947 print "%10d ticks not in symbols" % missed_ticks |
| 948 print "%10d unaccounted ticks" % really_missed_ticks |
| 949 print "%10d total symbols" % len([c for c in code_map.AllCode()]) |
| 950 print "%10d used symbols" % len([c for c in code_map.UsedCode()]) |
| 951 print "%9.2fs library processing time" % mmap_time |
| 952 print "%9.2fs tick processing time" % sample_time |
| 953 |
| 954 log_reader.Dispose() |
| 955 trace_reader.Dispose() |
OLD | NEW |