Index: tools/ll_prof.py
|
diff --git a/tools/ll_prof.py b/tools/ll_prof.py
|
index 3afe179d2f8adbc2935135c4efa9cb2f899c92a4..216929d1e20171452e73082976778f8db4c603c6 100755
|
--- a/tools/ll_prof.py
|
+++ b/tools/ll_prof.py
|
@@ -45,7 +45,7 @@ USAGE="""usage: %prog [OPTION]...
|
Analyses V8 and perf logs to produce profiles.
|
|
Perf logs can be collected using a command like:
|
- $ perf record -R -e cycles -c 10000 -f -i ./shell bench.js --ll-prof
|
+ $ perf record -R -e cycles -c 10000 -f -i ./d8 bench.js --ll-prof
|
# -R: collect all data
|
# -e cycles: use cpu-cycles event (run "perf list" for details)
|
# -c 10000: write a sample after each 10000 events
|
@@ -54,6 +54,16 @@ Perf logs can be collected using a command like:
|
# --ll-prof shell flag enables the right V8 logs
|
This will produce a binary trace file (perf.data) that %prog can analyse.
|
|
+IMPORTANT:
|
+ The kernel has an internal maximum for events per second, it is 100K by
|
+ default. That's not enough for "-c 10000". Set it to some higher value:
|
+ $ echo 10000000 | sudo tee /proc/sys/kernel/perf_event_max_sample_rate
|
+ You can also make the warning about kernel address maps go away:
|
+ $ echo 0 | sudo tee /proc/sys/kernel/kptr_restrict
|
+
|
+We have a convenience script that handles all of the above for you:
|
+ $ tools/run-llprof.sh ./d8 bench.js
|
+
|
Examples:
|
# Print flat profile with annotated disassembly for the 10 top
|
# symbols. Use default log names and include the snapshot log.
|
@@ -75,6 +85,10 @@ class Code(object):
|
"""Code object."""
|
|
_id = 0
|
+ UNKNOWN = 0
|
+ V8INTERNAL = 1
|
+ FULL_CODEGEN = 2
|
+ OPTIMIZED = 3
|
|
def __init__(self, name, start_address, end_address, origin, origin_offset):
|
self.id = Code._id
|
@@ -88,6 +102,14 @@ class Code(object):
|
self.self_ticks = 0
|
self.self_ticks_map = None
|
self.callee_ticks = None
|
+ if name.startswith("LazyCompile:*"):
|
+ self.codetype = Code.OPTIMIZED
|
+ elif name.startswith("LazyCompile:"):
|
+ self.codetype = Code.FULL_CODEGEN
|
+ elif name.startswith("v8::internal::"):
|
+ self.codetype = Code.V8INTERNAL
|
+ else:
|
+ self.codetype = Code.UNKNOWN
|
|
def AddName(self, name):
|
assert self.name != name
|
@@ -185,7 +207,7 @@ class Code(object):
|
class CodePage(object):
|
"""Group of adjacent code objects."""
|
|
- SHIFT = 12 # 4K pages
|
+ SHIFT = 20 # 1M pages
|
SIZE = (1 << SHIFT)
|
MASK = ~(SIZE - 1)
|
|
@@ -507,6 +529,7 @@ class Descriptor(object):
|
# for the gory details.
|
|
|
+# Reference: struct perf_file_header in kernel/tools/perf/util/header.h
|
TRACE_HEADER_DESC = Descriptor([
|
("magic", "u64"),
|
("size", "u64"),
|
@@ -520,6 +543,7 @@ TRACE_HEADER_DESC = Descriptor([
|
])
|
|
|
+# Reference: /usr/include/linux/perf_event.h
|
PERF_EVENT_ATTR_DESC = Descriptor([
|
("type", "u32"),
|
("size", "u32"),
|
@@ -529,12 +553,13 @@ PERF_EVENT_ATTR_DESC = Descriptor([
|
("read_format", "u64"),
|
("flags", "u64"),
|
("wakeup_events_or_watermark", "u32"),
|
- ("bt_type", "u32"),
|
+ ("bp_type", "u32"),
|
("bp_addr", "u64"),
|
- ("bp_len", "u64"),
|
+ ("bp_len", "u64")
|
])
|
|
|
+# Reference: /usr/include/linux/perf_event.h
|
PERF_EVENT_HEADER_DESC = Descriptor([
|
("type", "u32"),
|
("misc", "u16"),
|
@@ -542,6 +567,7 @@ PERF_EVENT_HEADER_DESC = Descriptor([
|
])
|
|
|
+# Reference: kernel/events/core.c
|
PERF_MMAP_EVENT_BODY_DESC = Descriptor([
|
("pid", "u32"),
|
("tid", "u32"),
|
@@ -566,6 +592,7 @@ PERF_SAMPLE_STREAM_ID = 1 << 9
|
PERF_SAMPLE_RAW = 1 << 10
|
|
|
+# Reference: /usr/include/perf_event.h, the comment for PERF_RECORD_SAMPLE.
|
PERF_SAMPLE_EVENT_BODY_FIELDS = [
|
("ip", "u64", PERF_SAMPLE_IP),
|
("pid", "u32", PERF_SAMPLE_TID),
|
@@ -702,8 +729,12 @@ class LibraryRepo(object):
|
# Unfortunately, section headers span two lines, so we have to
|
# keep the just seen section name (from the first line in each
|
# section header) in the after_section variable.
|
+ if mmap_info.filename.endswith(".ko"):
|
+ dynamic_symbols = ""
|
+ else:
|
+ dynamic_symbols = "-T"
|
process = subprocess.Popen(
|
- "%s -h -t -T -C %s" % (OBJDUMP_BIN, mmap_info.filename),
|
+ "%s -h -t %s -C %s" % (OBJDUMP_BIN, dynamic_symbols, mmap_info.filename),
|
shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
pipe = process.stdout
|
after_section = None
|
@@ -795,7 +826,7 @@ def PrintReport(code_map, library_repo, arch, ticks, options):
|
code.PrintAnnotated(arch, options)
|
print
|
print "Ticks per library:"
|
- mmap_infos = [m for m in library_repo.infos]
|
+ mmap_infos = [m for m in library_repo.infos if m.ticks > 0]
|
mmap_infos.sort(key=lambda m: m.ticks, reverse=True)
|
for mmap_info in mmap_infos:
|
mmap_ticks = mmap_info.ticks
|
@@ -885,6 +916,9 @@ if __name__ == "__main__":
|
ticks = 0
|
missed_ticks = 0
|
really_missed_ticks = 0
|
+ optimized_ticks = 0
|
+ generated_ticks = 0
|
+ v8_internal_ticks = 0
|
mmap_time = 0
|
sample_time = 0
|
|
@@ -928,6 +962,12 @@ if __name__ == "__main__":
|
code = code_map.Find(sample.ip)
|
if code:
|
code.Tick(sample.ip)
|
+ if code.codetype == Code.OPTIMIZED:
|
+ optimized_ticks += 1
|
+ elif code.codetype == Code.FULL_CODEGEN:
|
+ generated_ticks += 1
|
+ elif code.codetype == Code.V8INTERNAL:
|
+ v8_internal_ticks += 1
|
else:
|
missed_ticks += 1
|
if not library_repo.Tick(sample.ip) and not code:
|
@@ -947,12 +987,21 @@ if __name__ == "__main__":
|
PrintReport(code_map, library_repo, log_reader.arch, ticks, options)
|
|
if not options.quiet:
|
+ def PrintTicks(number, total, description):
|
+ print("%10d %5.1f%% ticks in %s" %
|
+ (number, 100.0*number/total, description))
|
print
|
print "Stats:"
|
print "%10d total trace events" % events
|
print "%10d total ticks" % ticks
|
print "%10d ticks not in symbols" % missed_ticks
|
- print "%10d unaccounted ticks" % really_missed_ticks
|
+ unaccounted = "unaccounted ticks"
|
+ if really_missed_ticks > 0:
|
+ unaccounted += " (probably in the kernel, try --kernel)"
|
+ PrintTicks(really_missed_ticks, ticks, unaccounted)
|
+ PrintTicks(optimized_ticks, ticks, "ticks in optimized code")
|
+ PrintTicks(generated_ticks, ticks, "ticks in other lazily compiled code")
|
+ PrintTicks(v8_internal_ticks, ticks, "ticks in v8::internal::*")
|
print "%10d total symbols" % len([c for c in code_map.AllCode()])
|
print "%10d used symbols" % len([c for c in code_map.UsedCode()])
|
print "%9.2fs library processing time" % mmap_time
|
|