Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2938)

Unified Diff: base/trace_event/heap_profiler_heap_dump_writer.cc

Issue 1494293005: [Tracing] Enable heap dumps with both type info and backtraces (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@sfdedup-iter
Patch Set: 32-bit is still a thing Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: base/trace_event/heap_profiler_heap_dump_writer.cc
diff --git a/base/trace_event/heap_profiler_heap_dump_writer.cc b/base/trace_event/heap_profiler_heap_dump_writer.cc
index b0ed86f32326d0ec66be40b9eca8c1f1c1f2766c..2a629d45efced6c89944ec4262602481d38f46e2 100644
--- a/base/trace_event/heap_profiler_heap_dump_writer.cc
+++ b/base/trace_event/heap_profiler_heap_dump_writer.cc
@@ -6,44 +6,197 @@
#include <algorithm>
#include <iterator>
-#include <utility>
+#include <tuple>
#include <vector>
+#include "base/containers/hash_tables.h"
#include "base/format_macros.h"
+#include "base/logging.h"
#include "base/strings/stringprintf.h"
+#include "base/trace_event/heap_profiler_allocation_context.h"
#include "base/trace_event/heap_profiler_stack_frame_deduplicator.h"
#include "base/trace_event/heap_profiler_type_name_deduplicator.h"
#include "base/trace_event/trace_event_argument.h"
+// Most of what the |HeapDumpWriter| does is aggregating the detailed
petrcermak 2015/12/04 19:13:20 nit: I don't think there should be a definite arti
Ruud van Asseldonk 2015/12/07 13:53:59 As you wish.
+// information about the heap and deciding what to dump. Input to this
+// process is a list of (AllocationContext, size). The context has several
petrcermak 2015/12/04 19:13:20 supernit: s/several/two/ (I'd expect "several" to
petrcermak 2015/12/04 19:13:20 nit: I'd say "tuple" before the full stop.
Ruud van Asseldonk 2015/12/07 13:53:59 Yes only two is a bit of an anticlimax here after
+// properties:
+//
+// * A backtrace (list of strings).
+// * A type name (string).
+//
+// For uniformity, type name is also treated as a list (with one element). A
+// list is a path down a tree. For backtrace, this is the call tree. The tree
+// for type name has height one. There is one tree for every property, but
+// nodes of the trees can be shared among the trees. For instance, starting
+// from the root node, following the "BrowserMain" edge breaking down by
+// backtrace, and then following the "T" edge breaking down by type name, will
+// yield the same node as following the "T" edge from the root, and then
+// following the "BrowserMain" edge breaking down by backtrace. The size of
+// this node is the number of bytes allocated for type "T" when "BrowserMain"
+// was at the bottom of the call stack.
+//
+// The (AllocationContext, size) pairs are converted into |Part|s and grouped
+// into |Bucket|s. Parts are mutually exclusive parts of the heap. A bucket
+// is a group of parts of which the properties share a prefix.
petrcermak 2015/12/04 19:13:20 This is quite cryptic. Could you please provide an
petrcermak 2015/12/04 19:13:20 nit: s/of which the/whose/
Ruud van Asseldonk 2015/12/07 13:53:59 I tried an example, but even a very minimal exampl
+
namespace base {
namespace trace_event {
+using Entry = HeapDumpWriter::Entry;
+
namespace {
-template <typename T>
-bool PairSizeGt(const std::pair<T, size_t>& lhs,
- const std::pair<T, size_t>& rhs) {
- return lhs.second > rhs.second;
+// References an array of |const char*|s in two ranges: [begin, cursor), and
+// [cursor, end).
+struct Slice {
+ // Initializes the slice with the cursor at the first element.
+ Slice(const char* const* begin, const char* const* end)
+ : begin(begin), cursor(begin), end(end) {}
+
+ Slice(const char* const* begin,
+ const char* const* cursor,
+ const char* const* end)
+ : begin(begin), cursor(cursor), end(end) {}
+
+ // Returns a slice with cursor advanced by one.
+ Slice next() const { return Slice(begin, cursor + 1, end); }
+
+ // Pointer into const array (array cannot be modified through this struct) of
+ // |const char*| (immutable string). |begin| points at the first element,
+ // |end| points past the last element.
+ const char* const* begin;
+ const char* const* cursor;
+ const char* const* end;
+};
+
+struct Part;
+
+// Pointer to member of |Part| that has type |Slice|.
petrcermak 2015/12/04 19:13:20 nit: "a member"
Ruud van Asseldonk 2015/12/07 13:53:59 Done.
+using Property = Slice Part::*;
+
+// Helper for breaking down the heap. The slices reference an allocation
petrcermak 2015/12/04 19:13:20 Do multiple slices correspond to one allocation co
Ruud van Asseldonk 2015/12/07 13:53:59 Done.
+// context via the |begin| and |end| pointers. |size| is the number of bytes
+// allocated for this context. The |cursor| of the slices indicates the current
petrcermak 2015/12/04 19:13:20 If the previous comment applies, go to page 200 ..
Ruud van Asseldonk 2015/12/07 13:53:59 Done.
+// position in the tree when breaking down. The branches taken are [begin,
petrcermak 2015/12/04 19:13:20 "branches taken" is difficult to understand. What
Ruud van Asseldonk 2015/12/07 13:53:59 Done.
+// cursor). If |cursor| is not equal to |end|, it means that further breakdown
+// by that property is possible.
+struct Part {
+ Part(Slice backtrace, Slice type_name, size_t size)
+ : backtrace(backtrace), type_name(type_name), size(size) {}
+
+ // Constructs a part referencing an |AllocationContext|. Cursors are set to
+ // the beginning.
+ Part(const AllocationContext& ctx, size_t size)
+ : backtrace(std::begin(ctx.backtrace.frames),
+ std::end(ctx.backtrace.frames)),
+ type_name(&ctx.type_name, &ctx.type_name + 1),
+ size(size) {
+ // The backtrace of the allocation context is padded with null pointers,
+ // but for breaking down the heap this padding should not be taken into
+ // account, so adjust the end pointer to point past the last non-null frame.
+ while (backtrace.begin != backtrace.end && *(backtrace.end - 1) == nullptr)
+ backtrace.end--;
+ }
+
+ // Returns a copy of this part, with the cursor of the member pointed at
+ // by |property| advanced by one.
+ Part advance(Property property) const {
+ return Part(property == &Part::backtrace ? backtrace.next() : backtrace,
petrcermak 2015/12/04 19:13:20 Thought: Is it worth keeping copying backtrace/typ
Ruud van Asseldonk 2015/12/07 13:53:59 I did not measure, but I think it is. Instead of u
+ property == &Part::type_name ? type_name.next() : type_name,
+ size);
+ }
+
+ Slice backtrace;
+ Slice type_name;
+ size_t size;
+};
+
+// Group of parts where the [begin, cursor) range is equal for all properties
+// of all parts in the bucket.
+struct Bucket {
+ Bucket() : size(0) {}
+
+ std::vector<Part> parts;
+
+ // The sum of the sizes of the parts.
+ size_t size;
+};
+
+bool operator<(const Bucket& lhs, const Bucket& rhs) {
+ return lhs.size < rhs.size;
+}
+
+// Groups the list of heap parts by the value under the cursor of the member
+// pointed at by |groupBy|. The parts in the returned list will have the cursor
+// advanced for the member pointed at by |groupBy|.
+std::vector<Bucket> GroupBy(const std::vector<Part>& parts, Property groupBy) {
+ base::hash_map<const char*, Bucket> grouped;
+
+ for (const Part& part : parts) {
+ Slice property = part.*groupBy;
+ if (property.cursor != property.end) {
+ Bucket& bucket = grouped[*(property.cursor)];
+ bucket.size += part.size;
+ bucket.parts.push_back(part.advance(groupBy));
+ DCHECK_GT(bucket.size, 0u);
+ }
+ }
+
+ std::vector<Bucket> buckets;
+ buckets.reserve(grouped.size());
+ for (auto key_bucket : grouped)
+ buckets.push_back(key_bucket.second);
+
+ return buckets;
petrcermak 2015/12/04 19:13:20 Are move semantics already allowed in Chrome? If t
Ruud van Asseldonk 2015/12/07 13:53:59 Even before C++11, compilers would do Return Value
}
-// Converts a |hash_map<T, size_t>| into a vector of (T, size_t) pairs that is
-// ordered from high |size_t| to low |size_t|.
-template <typename T>
-std::vector<std::pair<T, size_t>> SortBySizeDescending(
- const hash_map<T, size_t>& grouped) {
- std::vector<std::pair<T, size_t>> sorted;
- sorted.reserve(grouped.size());
- std::copy(grouped.begin(), grouped.end(), std::back_inserter(sorted));
- std::sort(sorted.begin(), sorted.end(), PairSizeGt<T>);
- return sorted;
+// Breaks down the bucket by the member pointed at by |property|. Returns only
+// buckets that contribute significantly to the total size. The long tail is
+// omitted.
+std::vector<Bucket> BreakDownBy(const Bucket& bucket, Property property) {
+ std::vector<Bucket> buckets = GroupBy(bucket.parts, property);
+
+ // Ensure that |buckets| is a max-heap (the data structure, not memory heap),
+ // so its front contains the largest bucket. Buckets should be iterated
+ // ordered by size, but sorting the vector is overkill because the long tail
+ // of small buckets will be discarded. By using a max-heap, the optimal case
+ // where all but the first bucket are discarded is O(n). The worst case where
+ // no bucket is discarded is doing a heap sort, which is O(n log n).
+ std::make_heap(buckets.begin(), buckets.end());
+
+ // Keep including parts until adding a part would increase the number of
+ // bytes accounted for by less than a percent. The large buckets end up in
+ // [it, end()), [begin(), it) is the part that contains the max-heap of small
+ // buckets. TODO(ruuda): tweak the heuristic.
+ size_t accounted_for = 0;
+ std::vector<Bucket>::iterator it;
+ for (it = buckets.end(); it != buckets.begin(); --it) {
+ // Compute contribution to number of bytes accounted for in percent, rounded
+ // down due to integer division. Buckets are iterated by descending size,
+ // so later buckets cannot have a larger contribution than this one.
+ accounted_for += buckets.front().size;
+ size_t contribution = buckets.front().size * 100 / accounted_for;
+ if (contribution == 0)
+ break;
+
+ // Put the largest bucket in [begin, it) at |it - 1| and max-heapify
+ // [begin, it - 1). This puts the next largest bucket at |buckets.front()|.
+ std::pop_heap(buckets.begin(), it);
+ }
+
+ // Discard the long tail of buckets that contribute less than a percent.
+ buckets.erase(buckets.begin(), it);
+
+ return buckets;
petrcermak 2015/12/04 19:13:20 ditto
Ruud van Asseldonk 2015/12/07 13:53:59 See above.
}
} // namespace
HeapDumpWriter::HeapDumpWriter(StackFrameDeduplicator* stack_frame_deduplicator,
TypeNameDeduplicator* type_name_deduplicator)
- : traced_value_(new TracedValue()),
- stack_frame_deduplicator_(stack_frame_deduplicator),
+ : stack_frame_deduplicator_(stack_frame_deduplicator),
type_name_deduplicator_(type_name_deduplicator) {}
HeapDumpWriter::~HeapDumpWriter() {}
@@ -53,81 +206,113 @@ void HeapDumpWriter::InsertAllocation(const AllocationContext& context,
bytes_by_context_[context] += size;
}
-scoped_refptr<TracedValue> HeapDumpWriter::WriteHeapDump() {
- // Group by backtrace and by type ID, and compute the total heap size while
- // iterating anyway.
- size_t total_size = 0;
- hash_map<Backtrace, size_t> bytes_by_backtrace;
- hash_map<const char*, size_t> bytes_by_type;
-
- for (auto context_size : bytes_by_context_) {
- total_size += context_size.second;
- bytes_by_backtrace[context_size.first.backtrace] += context_size.second;
- bytes_by_type[context_size.first.type_name] += context_size.second;
- }
+bool HeapDumpWriter::InsertEntry(const Bucket& bucket) {
+ // The parts in the bucket are all different, but for all properties, the
+ // [begin, cursor) range is equal for all parts in the bucket.
+ DCHECK(!bucket.parts.empty());
+ Slice backtrace = bucket.parts.front().backtrace;
+ Slice type_name = bucket.parts.front().type_name;
- auto sorted_bytes_by_backtrace = SortBySizeDescending(bytes_by_backtrace);
- auto sorted_bytes_by_type = SortBySizeDescending(bytes_by_type);
+ Entry entry;
+ entry.stack_frame_id =
+ stack_frame_deduplicator_->Insert(backtrace.begin, backtrace.cursor);
- traced_value_->BeginArray("entries");
+ // If the type name is not set use ID -1, otherwise deduplicate the type name.
petrcermak 2015/12/04 19:13:20 supernit: I'd say "ID = -1" (multiple uses in this
Ruud van Asseldonk 2015/12/07 13:53:59 Ignored.
+ entry.type_id = (type_name.begin == type_name.cursor)
+ ? -1
+ : type_name_deduplicator_->Insert(*type_name.begin);
- // The global size, no column specified.
- {
- traced_value_->BeginDictionary();
- WriteSize(total_size);
- traced_value_->EndDictionary();
- }
+ entry.size = bucket.size;
- // Entries with the size per backtrace.
- for (const auto& entry : sorted_bytes_by_backtrace) {
- traced_value_->BeginDictionary();
- // Insert a forward reference to the backtrace that will be written to the
- // |stackFrames| dictionary later on.
- int idx = stack_frame_deduplicator_->Insert(std::begin(entry.first.frames),
- std::end(entry.first.frames));
- WriteStackFrameIndex(idx);
- WriteSize(entry.second);
- traced_value_->EndDictionary();
- }
+ auto position_and_inserted = entries_.insert(entry);
+ return position_and_inserted.second;
+}
- // Entries with the size per type.
- for (const auto& entry : sorted_bytes_by_type) {
- traced_value_->BeginDictionary();
- // Insert a forward reference to the type name that will be written to the
- // trace when it is flushed.
- WriteTypeId(type_name_deduplicator_->Insert(entry.first));
- WriteSize(entry.second);
- traced_value_->EndDictionary();
- }
+void HeapDumpWriter::BreakDown(const Bucket& bucket) {
+ std::vector<Bucket> by_backtrace = BreakDownBy(bucket, &Part::backtrace);
+ std::vector<Bucket> by_type_name = BreakDownBy(bucket, &Part::type_name);
- traced_value_->EndArray(); // "entries"
+ // Insert entries for the buckets. If a bucket was not present before, it has
+ // not been broken down before, so recursively continue breaking down in that
+ // case. There might be multiple routes to the same entry (first break down
+ // by type name, then by backtrace, or first by backtrace and then by type),
+ // so a set is used to avoid dumping and breaking down entries more than once.
- return traced_value_;
+ for (const Bucket& subbucket : by_backtrace)
+ if (InsertEntry(subbucket))
+ BreakDown(subbucket);
+
+ for (const Bucket& subbucket : by_type_name)
+ if (InsertEntry(subbucket))
+ BreakDown(subbucket);
}
-void HeapDumpWriter::WriteStackFrameIndex(int index) {
- if (index == -1) {
- // An empty backtrace (which will have index -1) is represented by the empty
- // string, because there is no leaf frame to reference in |stackFrames|.
- traced_value_->SetString("bt", "");
- } else {
- // Format index of the leaf frame as a string, because |stackFrames| is a
- // dictionary, not an array.
- SStringPrintf(&buffer_, "%i", index);
- traced_value_->SetString("bt", buffer_);
+const std::set<Entry>& HeapDumpWriter::Dump() {
+ // Start with one bucket that represents the entire heap. Iterate by
+ // reference, because the heap parts are going to point into allocation
+ // contexts stored in |bytes_by_context_|.
+ Bucket root_bucket;
+ for (const auto& context_and_size : bytes_by_context_) {
+ const AllocationContext& context = context_and_size.first;
+ const size_t size = context_and_size.second;
+ root_bucket.parts.push_back(Part(context, size));
+ root_bucket.size += size;
}
+
+ InsertEntry(root_bucket);
+
+ // Recursively break down the heap and fill |entries_| with entries to dump.
+ BreakDown(root_bucket);
+
+ return entries_;
}
-void HeapDumpWriter::WriteTypeId(int type_id) {
- // Format the type ID as a string.
- SStringPrintf(&buffer_, "%i", type_id);
- traced_value_->SetString("type", buffer_);
+// static
+scoped_refptr<TracedValue> HeapDumpWriter::Write(
+ const std::set<Entry>& entries) {
+ std::string buffer;
+ scoped_refptr<TracedValue> traced_value = new TracedValue;
+
+ traced_value->BeginArray("entries");
+
+ for (const Entry& entry : entries) {
+ traced_value->BeginDictionary();
+
+ // Format size as hexadecimal string into |buffer|.
+ SStringPrintf(&buffer, "%" PRIx64, static_cast<uint64_t>(entry.size));
+ traced_value->SetString("size", buffer);
+
+ if (entry.stack_frame_id == -1) {
+ // An empty backtrace (which will have ID -1) is represented by the empty
+ // string, because there is no leaf frame to reference in |stackFrames|.
+ traced_value->SetString("bt", "");
+ } else {
+ // Format index of the leaf frame as a string, because |stackFrames| is a
+ // dictionary, not an array.
+ SStringPrintf(&buffer, "%i", entry.stack_frame_id);
+ traced_value->SetString("bt", buffer);
+ }
+
+ // Type ID -1 (cumulative size for all types) is represented by the absence
+ // of the "type" key in the dictionary.
+ if (entry.type_id != -1) {
+ // Format the type ID as a string.
+ SStringPrintf(&buffer, "%i", entry.type_id);
+ traced_value->SetString("type", buffer);
+ }
+
+ traced_value->EndDictionary();
+ }
+
+ traced_value->EndArray(); // "entries"
+ return traced_value;
}
-void HeapDumpWriter::WriteSize(size_t size) {
- // Format size as hexadecimal string into |buffer_|.
- SStringPrintf(&buffer_, "%" PRIx64, static_cast<uint64_t>(size));
- traced_value_->SetString("size", buffer_);
+bool operator<(Entry lhs, Entry rhs) {
+ // There is no need to compare |size|. If the backtrace and type name are
+ // equal then the sizes must be equal as well.
+ return std::tie(lhs.stack_frame_id, lhs.type_id) <
+ std::tie(rhs.stack_frame_id, rhs.type_id);
}
} // namespace trace_event

Powered by Google App Engine
This is Rietveld 408576698