Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(8)

Side by Side Diff: tools/ll_prof.py

Issue 11444031: llprof improvements (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: Created 8 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | tools/run-llprof.sh » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 # 2 #
3 # Copyright 2012 the V8 project authors. All rights reserved. 3 # Copyright 2012 the V8 project authors. All rights reserved.
4 # Redistribution and use in source and binary forms, with or without 4 # Redistribution and use in source and binary forms, with or without
5 # modification, are permitted provided that the following conditions are 5 # modification, are permitted provided that the following conditions are
6 # met: 6 # met:
7 # 7 #
8 # * Redistributions of source code must retain the above copyright 8 # * Redistributions of source code must retain the above copyright
9 # notice, this list of conditions and the following disclaimer. 9 # notice, this list of conditions and the following disclaimer.
10 # * Redistributions in binary form must reproduce the above 10 # * Redistributions in binary form must reproduce the above
(...skipping 27 matching lines...) Expand all
38 import subprocess 38 import subprocess
39 import sys 39 import sys
40 import time 40 import time
41 41
42 42
43 USAGE="""usage: %prog [OPTION]... 43 USAGE="""usage: %prog [OPTION]...
44 44
45 Analyses V8 and perf logs to produce profiles. 45 Analyses V8 and perf logs to produce profiles.
46 46
47 Perf logs can be collected using a command like: 47 Perf logs can be collected using a command like:
48 $ perf record -R -e cycles -c 10000 -f -i ./shell bench.js --ll-prof 48 $ perf record -R -e cycles -c 10000 -f -i ./d8 bench.js --ll-prof
49 # -R: collect all data 49 # -R: collect all data
50 # -e cycles: use cpu-cycles event (run "perf list" for details) 50 # -e cycles: use cpu-cycles event (run "perf list" for details)
51 # -c 10000: write a sample after each 10000 events 51 # -c 10000: write a sample after each 10000 events
52 # -f: force output file overwrite 52 # -f: force output file overwrite
53 # -i: limit profiling to our process and the kernel 53 # -i: limit profiling to our process and the kernel
54 # --ll-prof shell flag enables the right V8 logs 54 # --ll-prof shell flag enables the right V8 logs
55 This will produce a binary trace file (perf.data) that %prog can analyse. 55 This will produce a binary trace file (perf.data) that %prog can analyse.
56 56
57 IMPORTANT:
58 The kernel has an internal maximum for events per second, it is 100K by
59 default. That's not enough for "-c 10000". Set it to some higher value:
60 $ echo 10000000 | sudo tee /proc/sys/kernel/perf_event_max_sample_rate
61 You can also make the warning about kernel address maps go away:
62 $ echo 0 | sudo tee /proc/sys/kernel/kptr_restrict
63
64 We have a convenience script that handles all of the above for you:
65 $ tools/run-llprof.sh ./d8 bench.js
66
57 Examples: 67 Examples:
58 # Print flat profile with annotated disassembly for the 10 top 68 # Print flat profile with annotated disassembly for the 10 top
59 # symbols. Use default log names and include the snapshot log. 69 # symbols. Use default log names and include the snapshot log.
60 $ %prog --snapshot --disasm-top=10 70 $ %prog --snapshot --disasm-top=10
61 71
62 # Print flat profile with annotated disassembly for all used symbols. 72 # Print flat profile with annotated disassembly for all used symbols.
63 # Use default log names and include kernel symbols into analysis. 73 # Use default log names and include kernel symbols into analysis.
64 $ %prog --disasm-all --kernel 74 $ %prog --disasm-all --kernel
65 75
66 # Print flat profile. Use custom log names. 76 # Print flat profile. Use custom log names.
67 $ %prog --log=foo.log --snapshot-log=snap-foo.log --trace=foo.data --snapshot 77 $ %prog --log=foo.log --snapshot-log=snap-foo.log --trace=foo.data --snapshot
68 """ 78 """
69 79
70 80
71 JS_ORIGIN = "js" 81 JS_ORIGIN = "js"
72 JS_SNAPSHOT_ORIGIN = "js-snapshot" 82 JS_SNAPSHOT_ORIGIN = "js-snapshot"
73 83
74 class Code(object): 84 class Code(object):
75 """Code object.""" 85 """Code object."""
76 86
77 _id = 0 87 _id = 0
88 UNKNOWN = 0
89 V8INTERNAL = 1
90 FULL_CODEGEN = 2
91 OPTIMIZED = 3
78 92
79 def __init__(self, name, start_address, end_address, origin, origin_offset): 93 def __init__(self, name, start_address, end_address, origin, origin_offset):
80 self.id = Code._id 94 self.id = Code._id
81 Code._id += 1 95 Code._id += 1
82 self.name = name 96 self.name = name
83 self.other_names = None 97 self.other_names = None
84 self.start_address = start_address 98 self.start_address = start_address
85 self.end_address = end_address 99 self.end_address = end_address
86 self.origin = origin 100 self.origin = origin
87 self.origin_offset = origin_offset 101 self.origin_offset = origin_offset
88 self.self_ticks = 0 102 self.self_ticks = 0
89 self.self_ticks_map = None 103 self.self_ticks_map = None
90 self.callee_ticks = None 104 self.callee_ticks = None
105 if name.startswith("LazyCompile:*"):
106 self.codetype = Code.OPTIMIZED
107 elif name.startswith("LazyCompile:"):
108 self.codetype = Code.FULL_CODEGEN
109 elif name.startswith("v8::internal::"):
110 self.codetype = Code.V8INTERNAL
111 else:
112 self.codetype = Code.UNKNOWN
91 113
92 def AddName(self, name): 114 def AddName(self, name):
93 assert self.name != name 115 assert self.name != name
94 if self.other_names is None: 116 if self.other_names is None:
95 self.other_names = [name] 117 self.other_names = [name]
96 return 118 return
97 if not name in self.other_names: 119 if not name in self.other_names:
98 self.other_names.append(name) 120 self.other_names.append(name)
99 121
100 def FullName(self): 122 def FullName(self):
(...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after
178 return disasm.GetDisasmLines(filename, 200 return disasm.GetDisasmLines(filename,
179 self.origin_offset, 201 self.origin_offset,
180 self.end_address - self.start_address, 202 self.end_address - self.start_address,
181 arch, 203 arch,
182 inplace) 204 inplace)
183 205
184 206
185 class CodePage(object): 207 class CodePage(object):
186 """Group of adjacent code objects.""" 208 """Group of adjacent code objects."""
187 209
188 SHIFT = 12 # 4K pages 210 SHIFT = 20 # 1M pages
189 SIZE = (1 << SHIFT) 211 SIZE = (1 << SHIFT)
190 MASK = ~(SIZE - 1) 212 MASK = ~(SIZE - 1)
191 213
192 @staticmethod 214 @staticmethod
193 def PageAddress(address): 215 def PageAddress(address):
194 return address & CodePage.MASK 216 return address & CodePage.MASK
195 217
196 @staticmethod 218 @staticmethod
197 def PageId(address): 219 def PageId(address):
198 return address >> CodePage.SHIFT 220 return address >> CodePage.SHIFT
(...skipping 301 matching lines...) Expand 10 before | Expand all | Expand 10 after
500 522
501 @staticmethod 523 @staticmethod
502 def CtypesFields(fields): 524 def CtypesFields(fields):
503 return [(field, Descriptor.CTYPE_MAP[format]) for (field, format) in fields] 525 return [(field, Descriptor.CTYPE_MAP[format]) for (field, format) in fields]
504 526
505 527
506 # Please see http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a= tree;f=tools/perf 528 # Please see http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a= tree;f=tools/perf
507 # for the gory details. 529 # for the gory details.
508 530
509 531
532 # Reference: struct perf_file_header in kernel/tools/perf/util/header.h
510 TRACE_HEADER_DESC = Descriptor([ 533 TRACE_HEADER_DESC = Descriptor([
511 ("magic", "u64"), 534 ("magic", "u64"),
512 ("size", "u64"), 535 ("size", "u64"),
513 ("attr_size", "u64"), 536 ("attr_size", "u64"),
514 ("attrs_offset", "u64"), 537 ("attrs_offset", "u64"),
515 ("attrs_size", "u64"), 538 ("attrs_size", "u64"),
516 ("data_offset", "u64"), 539 ("data_offset", "u64"),
517 ("data_size", "u64"), 540 ("data_size", "u64"),
518 ("event_types_offset", "u64"), 541 ("event_types_offset", "u64"),
519 ("event_types_size", "u64") 542 ("event_types_size", "u64")
520 ]) 543 ])
521 544
522 545
546 # Reference: /usr/include/linux/perf_event.h
523 PERF_EVENT_ATTR_DESC = Descriptor([ 547 PERF_EVENT_ATTR_DESC = Descriptor([
524 ("type", "u32"), 548 ("type", "u32"),
525 ("size", "u32"), 549 ("size", "u32"),
526 ("config", "u64"), 550 ("config", "u64"),
527 ("sample_period_or_freq", "u64"), 551 ("sample_period_or_freq", "u64"),
528 ("sample_type", "u64"), 552 ("sample_type", "u64"),
529 ("read_format", "u64"), 553 ("read_format", "u64"),
530 ("flags", "u64"), 554 ("flags", "u64"),
531 ("wakeup_events_or_watermark", "u32"), 555 ("wakeup_events_or_watermark", "u32"),
532 ("bt_type", "u32"), 556 ("bp_type", "u32"),
533 ("bp_addr", "u64"), 557 ("bp_addr", "u64"),
534 ("bp_len", "u64"), 558 ("bp_len", "u64")
535 ]) 559 ])
536 560
537 561
562 # Reference: /usr/include/linux/perf_event.h
538 PERF_EVENT_HEADER_DESC = Descriptor([ 563 PERF_EVENT_HEADER_DESC = Descriptor([
539 ("type", "u32"), 564 ("type", "u32"),
540 ("misc", "u16"), 565 ("misc", "u16"),
541 ("size", "u16") 566 ("size", "u16")
542 ]) 567 ])
543 568
544 569
570 # Reference: kernel/events/core.c
545 PERF_MMAP_EVENT_BODY_DESC = Descriptor([ 571 PERF_MMAP_EVENT_BODY_DESC = Descriptor([
546 ("pid", "u32"), 572 ("pid", "u32"),
547 ("tid", "u32"), 573 ("tid", "u32"),
548 ("addr", "u64"), 574 ("addr", "u64"),
549 ("len", "u64"), 575 ("len", "u64"),
550 ("pgoff", "u64") 576 ("pgoff", "u64")
551 ]) 577 ])
552 578
553 579
554 # perf_event_attr.sample_type bits control the set of 580 # perf_event_attr.sample_type bits control the set of
555 # perf_sample_event fields. 581 # perf_sample_event fields.
556 PERF_SAMPLE_IP = 1 << 0 582 PERF_SAMPLE_IP = 1 << 0
557 PERF_SAMPLE_TID = 1 << 1 583 PERF_SAMPLE_TID = 1 << 1
558 PERF_SAMPLE_TIME = 1 << 2 584 PERF_SAMPLE_TIME = 1 << 2
559 PERF_SAMPLE_ADDR = 1 << 3 585 PERF_SAMPLE_ADDR = 1 << 3
560 PERF_SAMPLE_READ = 1 << 4 586 PERF_SAMPLE_READ = 1 << 4
561 PERF_SAMPLE_CALLCHAIN = 1 << 5 587 PERF_SAMPLE_CALLCHAIN = 1 << 5
562 PERF_SAMPLE_ID = 1 << 6 588 PERF_SAMPLE_ID = 1 << 6
563 PERF_SAMPLE_CPU = 1 << 7 589 PERF_SAMPLE_CPU = 1 << 7
564 PERF_SAMPLE_PERIOD = 1 << 8 590 PERF_SAMPLE_PERIOD = 1 << 8
565 PERF_SAMPLE_STREAM_ID = 1 << 9 591 PERF_SAMPLE_STREAM_ID = 1 << 9
566 PERF_SAMPLE_RAW = 1 << 10 592 PERF_SAMPLE_RAW = 1 << 10
567 593
568 594
595 # Reference: /usr/include/perf_event.h, the comment for PERF_RECORD_SAMPLE.
569 PERF_SAMPLE_EVENT_BODY_FIELDS = [ 596 PERF_SAMPLE_EVENT_BODY_FIELDS = [
570 ("ip", "u64", PERF_SAMPLE_IP), 597 ("ip", "u64", PERF_SAMPLE_IP),
571 ("pid", "u32", PERF_SAMPLE_TID), 598 ("pid", "u32", PERF_SAMPLE_TID),
572 ("tid", "u32", PERF_SAMPLE_TID), 599 ("tid", "u32", PERF_SAMPLE_TID),
573 ("time", "u64", PERF_SAMPLE_TIME), 600 ("time", "u64", PERF_SAMPLE_TIME),
574 ("addr", "u64", PERF_SAMPLE_ADDR), 601 ("addr", "u64", PERF_SAMPLE_ADDR),
575 ("id", "u64", PERF_SAMPLE_ID), 602 ("id", "u64", PERF_SAMPLE_ID),
576 ("stream_id", "u64", PERF_SAMPLE_STREAM_ID), 603 ("stream_id", "u64", PERF_SAMPLE_STREAM_ID),
577 ("cpu", "u32", PERF_SAMPLE_CPU), 604 ("cpu", "u32", PERF_SAMPLE_CPU),
578 ("res", "u32", PERF_SAMPLE_CPU), 605 ("res", "u32", PERF_SAMPLE_CPU),
(...skipping 116 matching lines...) Expand 10 before | Expand all | Expand 10 after
695 self.infos.append(mmap_info) 722 self.infos.append(mmap_info)
696 mmap_info.ticks = 0 723 mmap_info.ticks = 0
697 mmap_info.unique_name = self._UniqueMmapName(mmap_info) 724 mmap_info.unique_name = self._UniqueMmapName(mmap_info)
698 if not os.path.exists(mmap_info.filename): 725 if not os.path.exists(mmap_info.filename):
699 return True 726 return True
700 # Request section headers (-h), symbols (-t), and dynamic symbols 727 # Request section headers (-h), symbols (-t), and dynamic symbols
701 # (-T) from objdump. 728 # (-T) from objdump.
702 # Unfortunately, section headers span two lines, so we have to 729 # Unfortunately, section headers span two lines, so we have to
703 # keep the just seen section name (from the first line in each 730 # keep the just seen section name (from the first line in each
704 # section header) in the after_section variable. 731 # section header) in the after_section variable.
732 if mmap_info.filename.endswith(".ko"):
733 dynamic_symbols = ""
734 else:
735 dynamic_symbols = "-T"
705 process = subprocess.Popen( 736 process = subprocess.Popen(
706 "%s -h -t -T -C %s" % (OBJDUMP_BIN, mmap_info.filename), 737 "%s -h -t %s -C %s" % (OBJDUMP_BIN, dynamic_symbols, mmap_info.filename),
707 shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) 738 shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
708 pipe = process.stdout 739 pipe = process.stdout
709 after_section = None 740 after_section = None
710 code_sections = set() 741 code_sections = set()
711 reloc_sections = set() 742 reloc_sections = set()
712 dynamic = False 743 dynamic = False
713 try: 744 try:
714 for line in pipe: 745 for line in pipe:
715 if after_section: 746 if after_section:
716 if line.find("CODE") != -1: 747 if line.find("CODE") != -1:
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after
788 used_code = [code for code in code_map.UsedCode()] 819 used_code = [code for code in code_map.UsedCode()]
789 used_code.sort(key=lambda x: x.self_ticks, reverse=True) 820 used_code.sort(key=lambda x: x.self_ticks, reverse=True)
790 for i, code in enumerate(used_code): 821 for i, code in enumerate(used_code):
791 code_ticks = code.self_ticks 822 code_ticks = code.self_ticks
792 print "%10d %5.1f%% %s [%s]" % (code_ticks, 100. * code_ticks / ticks, 823 print "%10d %5.1f%% %s [%s]" % (code_ticks, 100. * code_ticks / ticks,
793 code.FullName(), code.origin) 824 code.FullName(), code.origin)
794 if options.disasm_all or i < options.disasm_top: 825 if options.disasm_all or i < options.disasm_top:
795 code.PrintAnnotated(arch, options) 826 code.PrintAnnotated(arch, options)
796 print 827 print
797 print "Ticks per library:" 828 print "Ticks per library:"
798 mmap_infos = [m for m in library_repo.infos] 829 mmap_infos = [m for m in library_repo.infos if m.ticks > 0]
799 mmap_infos.sort(key=lambda m: m.ticks, reverse=True) 830 mmap_infos.sort(key=lambda m: m.ticks, reverse=True)
800 for mmap_info in mmap_infos: 831 for mmap_info in mmap_infos:
801 mmap_ticks = mmap_info.ticks 832 mmap_ticks = mmap_info.ticks
802 print "%10d %5.1f%% %s" % (mmap_ticks, 100. * mmap_ticks / ticks, 833 print "%10d %5.1f%% %s" % (mmap_ticks, 100. * mmap_ticks / ticks,
803 mmap_info.unique_name) 834 mmap_info.unique_name)
804 835
805 836
806 def PrintDot(code_map, options): 837 def PrintDot(code_map, options):
807 print "digraph G {" 838 print "digraph G {"
808 for code in code_map.UsedCode(): 839 for code in code_map.UsedCode():
(...skipping 69 matching lines...) Expand 10 before | Expand all | Expand 10 after
878 disasm.OBJDUMP_BIN = options.objdump 909 disasm.OBJDUMP_BIN = options.objdump
879 OBJDUMP_BIN = options.objdump 910 OBJDUMP_BIN = options.objdump
880 else: 911 else:
881 print "Cannot find %s, falling back to default objdump" % options.objdump 912 print "Cannot find %s, falling back to default objdump" % options.objdump
882 913
883 # Stats. 914 # Stats.
884 events = 0 915 events = 0
885 ticks = 0 916 ticks = 0
886 missed_ticks = 0 917 missed_ticks = 0
887 really_missed_ticks = 0 918 really_missed_ticks = 0
919 optimized_ticks = 0
920 generated_ticks = 0
921 v8_internal_ticks = 0
888 mmap_time = 0 922 mmap_time = 0
889 sample_time = 0 923 sample_time = 0
890 924
891 # Process the snapshot log to fill the snapshot name map. 925 # Process the snapshot log to fill the snapshot name map.
892 snapshot_name_map = {} 926 snapshot_name_map = {}
893 if options.snapshot: 927 if options.snapshot:
894 snapshot_log_reader = SnapshotLogReader(log_name=options.snapshot_log) 928 snapshot_log_reader = SnapshotLogReader(log_name=options.snapshot_log)
895 snapshot_name_map = snapshot_log_reader.ReadNameMap() 929 snapshot_name_map = snapshot_log_reader.ReadNameMap()
896 930
897 # Initialize the log reader. 931 # Initialize the log reader.
(...skipping 23 matching lines...) Expand all
921 else: 955 else:
922 library_repo.Load(mmap_info, code_map, options) 956 library_repo.Load(mmap_info, code_map, options)
923 mmap_time += time.time() - start 957 mmap_time += time.time() - start
924 elif header.type == PERF_RECORD_SAMPLE: 958 elif header.type == PERF_RECORD_SAMPLE:
925 ticks += 1 959 ticks += 1
926 start = time.time() 960 start = time.time()
927 sample = trace_reader.ReadSample(header, offset) 961 sample = trace_reader.ReadSample(header, offset)
928 code = code_map.Find(sample.ip) 962 code = code_map.Find(sample.ip)
929 if code: 963 if code:
930 code.Tick(sample.ip) 964 code.Tick(sample.ip)
965 if code.codetype == Code.OPTIMIZED:
966 optimized_ticks += 1
967 elif code.codetype == Code.FULL_CODEGEN:
968 generated_ticks += 1
969 elif code.codetype == Code.V8INTERNAL:
970 v8_internal_ticks += 1
931 else: 971 else:
932 missed_ticks += 1 972 missed_ticks += 1
933 if not library_repo.Tick(sample.ip) and not code: 973 if not library_repo.Tick(sample.ip) and not code:
934 really_missed_ticks += 1 974 really_missed_ticks += 1
935 if trace_reader.callchain_supported: 975 if trace_reader.callchain_supported:
936 for ip in sample.ips: 976 for ip in sample.ips:
937 caller_code = code_map.Find(ip) 977 caller_code = code_map.Find(ip)
938 if caller_code: 978 if caller_code:
939 if code: 979 if code:
940 caller_code.CalleeTick(code) 980 caller_code.CalleeTick(code)
941 code = caller_code 981 code = caller_code
942 sample_time += time.time() - start 982 sample_time += time.time() - start
943 983
944 if options.dot: 984 if options.dot:
945 PrintDot(code_map, options) 985 PrintDot(code_map, options)
946 else: 986 else:
947 PrintReport(code_map, library_repo, log_reader.arch, ticks, options) 987 PrintReport(code_map, library_repo, log_reader.arch, ticks, options)
948 988
949 if not options.quiet: 989 if not options.quiet:
990 def PrintTicks(number, total, description):
991 print("%10d %5.1f%% ticks in %s" %
992 (number, 100.0*number/total, description))
950 print 993 print
951 print "Stats:" 994 print "Stats:"
952 print "%10d total trace events" % events 995 print "%10d total trace events" % events
953 print "%10d total ticks" % ticks 996 print "%10d total ticks" % ticks
954 print "%10d ticks not in symbols" % missed_ticks 997 print "%10d ticks not in symbols" % missed_ticks
955 print "%10d unaccounted ticks" % really_missed_ticks 998 unaccounted = "unaccounted ticks"
999 if really_missed_ticks > 0:
1000 unaccounted += " (probably in the kernel, try --kernel)"
1001 PrintTicks(really_missed_ticks, ticks, unaccounted)
1002 PrintTicks(optimized_ticks, ticks, "ticks in optimized code")
1003 PrintTicks(generated_ticks, ticks, "ticks in other lazily compiled code")
1004 PrintTicks(v8_internal_ticks, ticks, "ticks in v8::internal::*")
956 print "%10d total symbols" % len([c for c in code_map.AllCode()]) 1005 print "%10d total symbols" % len([c for c in code_map.AllCode()])
957 print "%10d used symbols" % len([c for c in code_map.UsedCode()]) 1006 print "%10d used symbols" % len([c for c in code_map.UsedCode()])
958 print "%9.2fs library processing time" % mmap_time 1007 print "%9.2fs library processing time" % mmap_time
959 print "%9.2fs tick processing time" % sample_time 1008 print "%9.2fs tick processing time" % sample_time
960 1009
961 log_reader.Dispose() 1010 log_reader.Dispose()
962 trace_reader.Dispose() 1011 trace_reader.Dispose()
OLDNEW
« no previous file with comments | « no previous file | tools/run-llprof.sh » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698