| Index: base/trace_event/heap_profiler_heap_dump_writer.cc
|
| diff --git a/base/trace_event/heap_profiler_heap_dump_writer.cc b/base/trace_event/heap_profiler_heap_dump_writer.cc
|
| index b0ed86f32326d0ec66be40b9eca8c1f1c1f2766c..e37c540cc22fd78119ffdc3d3293ff9e1245cd0e 100644
|
| --- a/base/trace_event/heap_profiler_heap_dump_writer.cc
|
| +++ b/base/trace_event/heap_profiler_heap_dump_writer.cc
|
| @@ -6,128 +6,291 @@
|
|
|
| #include <algorithm>
|
| #include <iterator>
|
| +#include <tuple>
|
| #include <utility>
|
| #include <vector>
|
|
|
| #include "base/format_macros.h"
|
| +#include "base/logging.h"
|
| #include "base/strings/stringprintf.h"
|
| #include "base/trace_event/heap_profiler_stack_frame_deduplicator.h"
|
| #include "base/trace_event/heap_profiler_type_name_deduplicator.h"
|
| #include "base/trace_event/trace_event_argument.h"
|
|
|
| +// Most of what the |HeapDumpWriter| does is aggregating detailed information
|
| +// about the heap and deciding what to dump. The Input to this process is a list
|
| +// of |AllocationContext|s and size pairs.
|
| +//
|
| +// The pairs are grouped into |Bucket|s. A bucket is a group of (context, size)
|
| +// pairs where the properties of the contexts share a prefix. (Type name is
|
| +// considered a list of length one here.) First all pairs are put into one
|
| +// bucket that represents the entire heap. Then this bucket is recursively
|
| +// broken down into smaller buckets. Each bucket keeps track of whether further
|
| +// breakdown is possible.
|
| +
|
| namespace base {
|
| namespace trace_event {
|
| -
|
| +namespace internal {
|
| namespace {
|
|
|
| -template <typename T>
|
| -bool PairSizeGt(const std::pair<T, size_t>& lhs,
|
| - const std::pair<T, size_t>& rhs) {
|
| - return lhs.second > rhs.second;
|
| +// Denotes a property of |AllocationContext| to break down by.
|
| +enum class BreakDownMode { kByBacktrace, kByTypeName };
|
| +
|
| +// A group of bytes for which the context shares a prefix.
|
| +struct Bucket {
|
| + Bucket() : size(0), backtrace_cursor(0), is_broken_down_by_type_name(false) {}
|
| +
|
| + std::vector<std::pair<const AllocationContext*, size_t>> bytes_by_context;
|
| +
|
| + // The sum of the sizes of |bytes_by_context|.
|
| + size_t size;
|
| +
|
| + // The index of the stack frame that has not yet been broken down by. For all
|
| + // elements in this bucket, the stack frames 0 up to (but not including) the
|
| + // cursor, must be equal.
|
| + size_t backtrace_cursor;
|
| +
|
| + // When true, the type name for all elements in this bucket must be equal.
|
| + bool is_broken_down_by_type_name;
|
| +};
|
| +
|
| +// Comparison operator to order buckets by their size.
|
| +bool operator<(const Bucket& lhs, const Bucket& rhs) {
|
| + return lhs.size < rhs.size;
|
| +}
|
| +
|
| +// Groups the allocations in the bucket by |breakBy|. The buckets in the
|
| +// returned list will have |backtrace_cursor| advanced or
|
| +// |is_broken_down_by_type_name| set depending on the property to group by.
|
| +std::vector<Bucket> GetSubbuckets(const Bucket& bucket, BreakDownMode breakBy) {
|
| + base::hash_map<const char*, Bucket> breakdown;
|
| +
|
| + if (breakBy == BreakDownMode::kByBacktrace) {
|
| + for (const auto& context_and_size : bucket.bytes_by_context) {
|
| + const Backtrace& backtrace = context_and_size.first->backtrace;
|
| + const char* const* begin = std::begin(backtrace.frames);
|
| + const char* const* end = std::end(backtrace.frames);
|
| + const char* const* cursor = begin + bucket.backtrace_cursor;
|
| +
|
| + // The backtrace in the context is padded with null pointers, but these
|
| + // should not be considered for breakdown. Adjust end to point past the
|
| + // last non-null frame.
|
| + while (begin != end && *(end - 1) == nullptr)
|
| + end--;
|
| +
|
| + DCHECK_LE(cursor, end);
|
| +
|
| + if (cursor != end) {
|
| + Bucket& subbucket = breakdown[*cursor];
|
| + subbucket.size += context_and_size.second;
|
| + subbucket.bytes_by_context.push_back(context_and_size);
|
| + subbucket.backtrace_cursor = bucket.backtrace_cursor + 1;
|
| + subbucket.is_broken_down_by_type_name =
|
| + bucket.is_broken_down_by_type_name;
|
| + DCHECK_GT(subbucket.size, 0u);
|
| + }
|
| + }
|
| + } else if (breakBy == BreakDownMode::kByTypeName) {
|
| + if (!bucket.is_broken_down_by_type_name) {
|
| + for (const auto& context_and_size : bucket.bytes_by_context) {
|
| + const AllocationContext* context = context_and_size.first;
|
| + Bucket& subbucket = breakdown[context->type_name];
|
| + subbucket.size += context_and_size.second;
|
| + subbucket.bytes_by_context.push_back(context_and_size);
|
| + subbucket.backtrace_cursor = bucket.backtrace_cursor;
|
| + subbucket.is_broken_down_by_type_name = true;
|
| + DCHECK_GT(subbucket.size, 0u);
|
| + }
|
| + }
|
| + }
|
| +
|
| + std::vector<Bucket> buckets;
|
| + buckets.reserve(breakdown.size());
|
| + for (auto key_bucket : breakdown)
|
| + buckets.push_back(key_bucket.second);
|
| +
|
| + return buckets;
|
| }
|
|
|
| -// Converts a |hash_map<T, size_t>| into a vector of (T, size_t) pairs that is
|
| -// ordered from high |size_t| to low |size_t|.
|
| -template <typename T>
|
| -std::vector<std::pair<T, size_t>> SortBySizeDescending(
|
| - const hash_map<T, size_t>& grouped) {
|
| - std::vector<std::pair<T, size_t>> sorted;
|
| - sorted.reserve(grouped.size());
|
| - std::copy(grouped.begin(), grouped.end(), std::back_inserter(sorted));
|
| - std::sort(sorted.begin(), sorted.end(), PairSizeGt<T>);
|
| - return sorted;
|
| +// Breaks down the bucket by |breakBy|. Returns only buckets that contribute
|
| +// significantly to the total size. The long tail is omitted.
|
| +std::vector<Bucket> BreakDownBy(const Bucket& bucket, BreakDownMode breakBy) {
|
| + std::vector<Bucket> buckets = GetSubbuckets(bucket, breakBy);
|
| +
|
| + // Ensure that |buckets| is a max-heap (the data structure, not memory heap),
|
| + // so its front contains the largest bucket. Buckets should be iterated
|
| + // ordered by size, but sorting the vector is overkill because the long tail
|
| + // of small buckets will be discarded. By using a max-heap, the optimal case
|
| + // where all but the first bucket are discarded is O(n). The worst case where
|
| + // no bucket is discarded is doing a heap sort, which is O(n log n).
|
| + std::make_heap(buckets.begin(), buckets.end());
|
| +
|
| + // Keep including buckets until adding one would increase the number of
|
| + // bytes accounted for by less than a percent. The large buckets end up in
|
| + // [it, end()), [begin(), it) is the part that contains the max-heap of small
|
| + // buckets. TODO(ruuda): tweak the heuristic.
|
| + size_t accounted_for = 0;
|
| + std::vector<Bucket>::iterator it;
|
| + for (it = buckets.end(); it != buckets.begin(); --it) {
|
| + // Compute contribution to number of bytes accounted for in percent, rounded
|
| + // down due to integer division. Buckets are iterated by descending size,
|
| + // so later buckets cannot have a larger contribution than this one.
|
| + accounted_for += buckets.front().size;
|
| + size_t contribution = buckets.front().size * 100 / accounted_for;
|
| + if (contribution == 0)
|
| + break;
|
| +
|
| + // Put the largest bucket in [begin, it) at |it - 1| and max-heapify
|
| + // [begin, it - 1). This puts the next largest bucket at |buckets.front()|.
|
| + std::pop_heap(buckets.begin(), it);
|
| + }
|
| +
|
| + // At this point, |buckets| looks like this (numbers are bucket sizes):
|
| + //
|
| + // <-- max-heap of small buckets --->
|
| + // <-- large buckets by ascending size -->
|
| + // [ 19 | 11 | 13 | 7 | 2 | 5 | ... | 83 | 89 | 97 ]
|
| + // ^ ^ ^
|
| + // | | |
|
| + // begin() it end()
|
| +
|
| + // Discard the long tail of buckets that contribute less than a percent.
|
| + buckets.erase(buckets.begin(), it);
|
| +
|
| + return buckets;
|
| }
|
|
|
| } // namespace
|
|
|
| +bool operator<(Entry lhs, Entry rhs) {
|
| + // There is no need to compare |size|. If the backtrace and type name are
|
| + // equal then the sizes must be equal as well.
|
| + return std::tie(lhs.stack_frame_id, lhs.type_id) <
|
| + std::tie(rhs.stack_frame_id, rhs.type_id);
|
| +}
|
| +
|
| HeapDumpWriter::HeapDumpWriter(StackFrameDeduplicator* stack_frame_deduplicator,
|
| TypeNameDeduplicator* type_name_deduplicator)
|
| - : traced_value_(new TracedValue()),
|
| - stack_frame_deduplicator_(stack_frame_deduplicator),
|
| + : stack_frame_deduplicator_(stack_frame_deduplicator),
|
| type_name_deduplicator_(type_name_deduplicator) {}
|
|
|
| HeapDumpWriter::~HeapDumpWriter() {}
|
|
|
| -void HeapDumpWriter::InsertAllocation(const AllocationContext& context,
|
| - size_t size) {
|
| - bytes_by_context_[context] += size;
|
| +bool HeapDumpWriter::AddEntryForBucket(const Bucket& bucket) {
|
| + // The contexts in the bucket are all different, but the [begin, cursor) range
|
| + // is equal for all contexts in the bucket, and the type names are the same if
|
| + // |is_broken_down_by_type_name| is set.
|
| + DCHECK(!bucket.bytes_by_context.empty());
|
| +
|
| + const AllocationContext* context = bucket.bytes_by_context.front().first;
|
| +
|
| + const char* const* backtrace_begin = std::begin(context->backtrace.frames);
|
| + const char* const* backtrace_end = backtrace_begin + bucket.backtrace_cursor;
|
| + DCHECK_LE(bucket.backtrace_cursor, arraysize(context->backtrace.frames));
|
| +
|
| + Entry entry;
|
| + entry.stack_frame_id =
|
| + stack_frame_deduplicator_->Insert(backtrace_begin, backtrace_end);
|
| +
|
| + // Deduplicate the type name, or use ID -1 if type name is not set.
|
| + entry.type_id = bucket.is_broken_down_by_type_name
|
| + ? type_name_deduplicator_->Insert(context->type_name)
|
| + : -1;
|
| +
|
| + entry.size = bucket.size;
|
| +
|
| + auto position_and_inserted = entries_.insert(entry);
|
| + return position_and_inserted.second;
|
| }
|
|
|
| -scoped_refptr<TracedValue> HeapDumpWriter::WriteHeapDump() {
|
| - // Group by backtrace and by type ID, and compute the total heap size while
|
| - // iterating anyway.
|
| - size_t total_size = 0;
|
| - hash_map<Backtrace, size_t> bytes_by_backtrace;
|
| - hash_map<const char*, size_t> bytes_by_type;
|
| -
|
| - for (auto context_size : bytes_by_context_) {
|
| - total_size += context_size.second;
|
| - bytes_by_backtrace[context_size.first.backtrace] += context_size.second;
|
| - bytes_by_type[context_size.first.type_name] += context_size.second;
|
| - }
|
| +void HeapDumpWriter::BreakDown(const Bucket& bucket) {
|
| + auto by_backtrace = BreakDownBy(bucket, BreakDownMode::kByBacktrace);
|
| + auto by_type_name = BreakDownBy(bucket, BreakDownMode::kByTypeName);
|
|
|
| - auto sorted_bytes_by_backtrace = SortBySizeDescending(bytes_by_backtrace);
|
| - auto sorted_bytes_by_type = SortBySizeDescending(bytes_by_type);
|
| + // Insert entries for the buckets. If a bucket was not present before, it has
|
| + // not been broken down before, so recursively continue breaking down in that
|
| + // case. There might be multiple routes to the same entry (first break down
|
| + // by type name, then by backtrace, or first by backtrace and then by type),
|
| + // so a set is used to avoid dumping and breaking down entries more than once.
|
|
|
| - traced_value_->BeginArray("entries");
|
| + for (const Bucket& subbucket : by_backtrace)
|
| + if (AddEntryForBucket(subbucket))
|
| + BreakDown(subbucket);
|
|
|
| - // The global size, no column specified.
|
| - {
|
| - traced_value_->BeginDictionary();
|
| - WriteSize(total_size);
|
| - traced_value_->EndDictionary();
|
| - }
|
| + for (const Bucket& subbucket : by_type_name)
|
| + if (AddEntryForBucket(subbucket))
|
| + BreakDown(subbucket);
|
| +}
|
|
|
| - // Entries with the size per backtrace.
|
| - for (const auto& entry : sorted_bytes_by_backtrace) {
|
| - traced_value_->BeginDictionary();
|
| - // Insert a forward reference to the backtrace that will be written to the
|
| - // |stackFrames| dictionary later on.
|
| - int idx = stack_frame_deduplicator_->Insert(std::begin(entry.first.frames),
|
| - std::end(entry.first.frames));
|
| - WriteStackFrameIndex(idx);
|
| - WriteSize(entry.second);
|
| - traced_value_->EndDictionary();
|
| +const std::set<Entry>& HeapDumpWriter::Summarize(
|
| + const hash_map<AllocationContext, size_t>& bytes_by_context) {
|
| + // Start with one bucket that represents the entire heap. Iterate by
|
| + // reference, because the allocation contexts are going to point to allocation
|
| + // contexts stored in |bytes_by_context|.
|
| + Bucket root_bucket;
|
| + for (const auto& context_and_size : bytes_by_context) {
|
| + const AllocationContext* context = &context_and_size.first;
|
| + const size_t size = context_and_size.second;
|
| + root_bucket.bytes_by_context.push_back(std::make_pair(context, size));
|
| + root_bucket.size += size;
|
| }
|
|
|
| - // Entries with the size per type.
|
| - for (const auto& entry : sorted_bytes_by_type) {
|
| - traced_value_->BeginDictionary();
|
| - // Insert a forward reference to the type name that will be written to the
|
| - // trace when it is flushed.
|
| - WriteTypeId(type_name_deduplicator_->Insert(entry.first));
|
| - WriteSize(entry.second);
|
| - traced_value_->EndDictionary();
|
| - }
|
| + AddEntryForBucket(root_bucket);
|
|
|
| - traced_value_->EndArray(); // "entries"
|
| + // Recursively break down the heap and fill |entries_| with entries to dump.
|
| + BreakDown(root_bucket);
|
|
|
| - return traced_value_;
|
| + return entries_;
|
| }
|
|
|
| -void HeapDumpWriter::WriteStackFrameIndex(int index) {
|
| - if (index == -1) {
|
| - // An empty backtrace (which will have index -1) is represented by the empty
|
| - // string, because there is no leaf frame to reference in |stackFrames|.
|
| - traced_value_->SetString("bt", "");
|
| - } else {
|
| - // Format index of the leaf frame as a string, because |stackFrames| is a
|
| - // dictionary, not an array.
|
| - SStringPrintf(&buffer_, "%i", index);
|
| - traced_value_->SetString("bt", buffer_);
|
| +scoped_refptr<TracedValue> Serialize(const std::set<Entry>& entries) {
|
| + std::string buffer;
|
| + scoped_refptr<TracedValue> traced_value = new TracedValue;
|
| +
|
| + traced_value->BeginArray("entries");
|
| +
|
| + for (const Entry& entry : entries) {
|
| + traced_value->BeginDictionary();
|
| +
|
| + // Format size as hexadecimal string into |buffer|.
|
| + SStringPrintf(&buffer, "%" PRIx64, static_cast<uint64_t>(entry.size));
|
| + traced_value->SetString("size", buffer);
|
| +
|
| + if (entry.stack_frame_id == -1) {
|
| + // An empty backtrace (which will have ID -1) is represented by the empty
|
| + // string, because there is no leaf frame to reference in |stackFrames|.
|
| + traced_value->SetString("bt", "");
|
| + } else {
|
| + // Format index of the leaf frame as a string, because |stackFrames| is a
|
| + // dictionary, not an array.
|
| + SStringPrintf(&buffer, "%i", entry.stack_frame_id);
|
| + traced_value->SetString("bt", buffer);
|
| + }
|
| +
|
| + // Type ID -1 (cumulative size for all types) is represented by the absence
|
| + // of the "type" key in the dictionary.
|
| + if (entry.type_id != -1) {
|
| + // Format the type ID as a string.
|
| + SStringPrintf(&buffer, "%i", entry.type_id);
|
| + traced_value->SetString("type", buffer);
|
| + }
|
| +
|
| + traced_value->EndDictionary();
|
| }
|
| -}
|
|
|
| -void HeapDumpWriter::WriteTypeId(int type_id) {
|
| - // Format the type ID as a string.
|
| - SStringPrintf(&buffer_, "%i", type_id);
|
| - traced_value_->SetString("type", buffer_);
|
| + traced_value->EndArray(); // "entries"
|
| + return traced_value;
|
| }
|
|
|
| -void HeapDumpWriter::WriteSize(size_t size) {
|
| - // Format size as hexadecimal string into |buffer_|.
|
| - SStringPrintf(&buffer_, "%" PRIx64, static_cast<uint64_t>(size));
|
| - traced_value_->SetString("size", buffer_);
|
| +} // namespace internal
|
| +
|
| +scoped_refptr<TracedValue> ExportHeapDump(
|
| + const hash_map<AllocationContext, size_t>& bytes_by_size,
|
| + StackFrameDeduplicator* stack_frame_deduplicator,
|
| + TypeNameDeduplicator* type_name_deduplicator) {
|
| + internal::HeapDumpWriter writer(stack_frame_deduplicator,
|
| + type_name_deduplicator);
|
| + return Serialize(writer.Summarize(bytes_by_size));
|
| }
|
|
|
| } // namespace trace_event
|
|
|