base/trace_event/heap_profiler_heap_dump_writer.cc - Issue 1494293005: [Tracing] Enable heap dumps with both type info and backtraces

Unified Diff: base/trace_event/heap_profiler_heap_dump_writer.cc

Issue 1494293005: [Tracing] Enable heap dumps with both type info and backtraces (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@sfdedup-iter

Patch Set: Address most primiano comments Created 5 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« no previous file with comments | « base/trace_event/heap_profiler_heap_dump_writer.h ('k') | base/trace_event/heap_profiler_heap_dump_writer_unittest.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: base/trace_event/heap_profiler_heap_dump_writer.cc

diff --git a/base/trace_event/heap_profiler_heap_dump_writer.cc b/base/trace_event/heap_profiler_heap_dump_writer.cc

index b0ed86f32326d0ec66be40b9eca8c1f1c1f2766c..da0fdfb0a616a16eda73d2352ea1e99b9ac71a25 100644

--- a/base/trace_event/heap_profiler_heap_dump_writer.cc

+++ b/base/trace_event/heap_profiler_heap_dump_writer.cc

@@ -6,128 +6,304 @@

#include <algorithm>

#include <iterator>

-#include <utility>

+#include <tuple>

#include <vector>

+#include "base/containers/hash_tables.h"

#include "base/format_macros.h"

+#include "base/logging.h"

+#include "base/macros.h"

#include "base/strings/stringprintf.h"

+#include "base/trace_event/heap_profiler_allocation_context.h"

#include "base/trace_event/heap_profiler_stack_frame_deduplicator.h"

#include "base/trace_event/heap_profiler_type_name_deduplicator.h"

#include "base/trace_event/trace_event_argument.h"

+// Most of what this file does is aggregating detailed information about the

+// heap and deciding what to dump. Input to this process is a list of

petrcermak 2015/12/09 15:02:31 nit: I think there should be "The" in front of "In

Ruud van Asseldonk 2015/12/09 16:16:01 Done.

+// |AllocationContext|s and size pairs.

+//

+// The pairs are grouped into |Bucket|s. A bucket is a group of contexts with

+// sizes of which the properties share a prefix. (Type name is considered a

petrcermak 2015/12/09 15:02:31 hnit: It sounds like you're talking about "propert

Ruud van Asseldonk 2015/12/09 16:16:01 Done.

+// list of length one here.) First all pairs are put into one bucket that

+// represents the entire heap. Then this bucket is recursively broken down into

+// smaller buckets. Each bucket keeps track of whether further breakdown is

+// possible.

namespace base {

namespace trace_event {

+namespace heap_dump {

namespace {

-template <typename T>

-bool PairSizeGt(const std::pair<T, size_t>& lhs,

- const std::pair<T, size_t>& rhs) {

- return lhs.second > rhs.second;

+// Denotes a property of |AllocationContext|.

+enum class BreakDownMode { kByBacktrace, kByTypeName };

+struct Bucket {

+ Bucket() : size(0), backtrace_cursor(0), is_broken_down_by_type_name(false) {}

+ // List of context and number of bytes allocated for that context.

+ std::vector<std::pair<const AllocationContext*, size_t>> allocations;

+ // The sum of the sizes of the allocations.

+ size_t size;

+ // The index of the stack frame that has not yet been broken down by. For

+ // all contexts in this bucket, the stack frames 0 up to (but not including)

+ // the cursor, must be equal.

+ size_t backtrace_cursor;

+ // When true, the type name for all contexts in this bucket must be equal.

+ bool is_broken_down_by_type_name;

+};

+// Comparison operator used to order buckets by size so that buckets of

+// insignificant size might be discarded.

+bool operator<(const Bucket& lhs, const Bucket& rhs) {

+ return lhs.size < rhs.size;

+// Groups the allocations in the bucket by |breakDownBy|. The buckets in the

+// returned list will have |backtrace_cursor| advanced or

+// |is_broken_down_by_type_name| set depending the property to group by.

petrcermak 2015/12/09 15:02:32 nit: "depending ON"

Ruud van Asseldonk 2015/12/09 16:16:01 Done.

+std::vector<Bucket> BreakDownBy(const Bucket& bucket,

+ BreakDownMode breakDownBy) {

+ base::hash_map<const char*, Bucket> breakdown;

+ if (breakDownBy == BreakDownMode::kByBacktrace) {

petrcermak 2015/12/09 15:02:32 Wouldn't a switch make more sense here?

Ruud van Asseldonk 2015/12/09 16:16:01 I don’t have a strong opinion. I find a switch to

+ for (const auto& context_and_size : bucket.allocations) {

+ const AllocationContext* context = context_and_size.first;

petrcermak 2015/12/09 15:02:31 You always use |context| to get the backtrace so y

Ruud van Asseldonk 2015/12/09 16:16:01 Done.

+ const char* const* begin = std::begin(context->backtrace.frames);

+ const char* const* end = std::end(context->backtrace.frames);

+ const char* const* cursor = begin + bucket.backtrace_cursor;

+ // The backtrace in the context is padded with null pointers, but these

+ // should not be considered for breakdown. Adjust end to point past the

+ // last non-null frame.

+ while (begin != end && *(end - 1) == nullptr)

+ end--;

+ DCHECK_LE(cursor, end);

+ if (cursor != end) {

+ Bucket& subbucket = breakdown[*cursor];

+ subbucket.size += context_and_size.second;

+ subbucket.allocations.push_back(context_and_size);

+ subbucket.backtrace_cursor = bucket.backtrace_cursor + 1;

+ subbucket.is_broken_down_by_type_name =

+ bucket.is_broken_down_by_type_name;

+ DCHECK_GT(subbucket.size, 0u);

+ }

+ } else if (breakDownBy == BreakDownMode::kByTypeName) {

+ if (!bucket.is_broken_down_by_type_name) {

+ for (const auto& context_and_size : bucket.allocations) {

+ Bucket& subbucket = breakdown[context_and_size.first->type_name];

+ subbucket.size += context_and_size.second;

+ subbucket.allocations.push_back(context_and_size);

+ subbucket.backtrace_cursor = bucket.backtrace_cursor;

+ subbucket.is_broken_down_by_type_name = true;

+ DCHECK_GT(subbucket.size, 0u);

+ }

+ std::vector<Bucket> buckets;

+ buckets.reserve(breakdown.size());

+ for (auto key_bucket : breakdown)

+ buckets.push_back(key_bucket.second);

+ return buckets;

+// Breaks down the bucket by |property|. Returns only buckets that contribute

+// significantly to the total size. The long tail is omitted.

+std::vector<Bucket> DiscardSmallBuckets(std::vector<Bucket> buckets) {

+ // Ensure that |buckets| is a max-heap (the data structure, not memory heap),

+ // so its front contains the largest bucket. Buckets should be iterated

+ // ordered by size, but sorting the vector is overkill because the long tail

+ // of small buckets will be discarded. By using a max-heap, the optimal case

+ // where all but the first bucket are discarded is O(n). The worst case where

+ // no bucket is discarded is doing a heap sort, which is O(n log n).

+ std::make_heap(buckets.begin(), buckets.end());

+ // Keep including allocations until adding one would increase the number of

+ // bytes accounted for by less than a percent. The large buckets end up in

+ // [it, end()), [begin(), it) is the part that contains the max-heap of small

+ // buckets. TODO(ruuda): tweak the heuristic.

+ size_t accounted_for = 0;

+ std::vector<Bucket>::iterator it;

+ for (it = buckets.end(); it != buckets.begin(); --it) {

+ // Compute contribution to number of bytes accounted for in percent, rounded

+ // down due to integer division. Buckets are iterated by descending size,

+ // so later buckets cannot have a larger contribution than this one.

+ accounted_for += buckets.front().size;

+ size_t contribution = buckets.front().size * 100 / accounted_for;

+ if (contribution == 0)

+ break;

+ // Put the largest bucket in [begin, it) at |it - 1| and max-heapify

+ // [begin, it - 1). This puts the next largest bucket at |buckets.front()|.

+ std::pop_heap(buckets.begin(), it);

+ }

+ // At this point, |buckets| looks like this (numbers are bucket sizes):

+ //

+ // <-- max-heap of small buckets --->

+ // <-- large buckets by descending size -->

+ // [ 19 | 11 | 13 | 7 | 2 | 5 | ... | 83 | 89 | 97 ]

+ // ^ ^ ^

+ // | | |

+ // begin() it end()

+ // Discard the long tail of buckets that contribute less than a percent.

+ buckets.erase(buckets.begin(), it);

+ return buckets;

+// Inserts an entry for |bucket| into |entries|.

+bool InsertEntry(const Bucket& bucket,

+ std::set<HeapEntry>* entries,

+ StackFrameDeduplicator* stack_frame_deduplicator,

+ TypeNameDeduplicator* type_name_deduplicator) {

+ DCHECK(!bucket.allocations.empty());

+ // The contexts in the bucket are all different, but for all properties, the

+ // [begin, cursor) range is equal for all contexts in the bucket.

petrcermak 2015/12/09 15:02:32 This comment is outdated - it doesn't capture the

Ruud van Asseldonk 2015/12/09 16:16:01 Fixed.

+ const AllocationContext& context = *bucket.allocations.front().first;

+ const char* const* backtrace_begin = std::begin(context.backtrace.frames);

+ const char* const* backtrace_end = backtrace_begin + bucket.backtrace_cursor;

+ DCHECK_LE(bucket.backtrace_cursor, arraysize(context.backtrace.frames));

+ HeapEntry entry;

+ entry.stack_frame_id =

+ stack_frame_deduplicator->Insert(backtrace_begin, backtrace_end);

+ // Deduplicate the type name, or use ID -1 if type name is not set.

+ entry.type_id = bucket.is_broken_down_by_type_name

+ ? type_name_deduplicator->Insert(context.type_name)

+ : -1;

+ entry.size = bucket.size;

+ auto position_and_inserted = entries->insert(entry);

+ return position_and_inserted.second;

}

-// Converts a |hash_map<T, size_t>| into a vector of (T, size_t) pairs that is

-// ordered from high |size_t| to low |size_t|.

-template <typename T>

-std::vector<std::pair<T, size_t>> SortBySizeDescending(

- const hash_map<T, size_t>& grouped) {

- std::vector<std::pair<T, size_t>> sorted;

- sorted.reserve(grouped.size());

- std::copy(grouped.begin(), grouped.end(), std::back_inserter(sorted));

- std::sort(sorted.begin(), sorted.end(), PairSizeGt<T>);

- return sorted;

+// Recursively breaks down |bucket| into smaller buckets, and adds the buckets

+// of significant size to |entries|.

+void BreakDown(const Bucket& bucket,

+ std::set<HeapEntry>* entries,

+ StackFrameDeduplicator* stack_frame_deduplicator,

+ TypeNameDeduplicator* type_name_deduplicator) {

+ std::vector<Bucket> by_backtrace =

+ DiscardSmallBuckets(BreakDownBy(bucket, BreakDownMode::kByBacktrace));

+ std::vector<Bucket> by_type_name =

+ DiscardSmallBuckets(BreakDownBy(bucket, BreakDownMode::kByTypeName));

+ // Insert entries for the buckets. If a bucket was not present before, it has

+ // not been broken down before, so recursively continue breaking down in that

+ // case. There might be multiple routes to the same entry (first break down

+ // by type name, then by backtrace, or first by backtrace and then by type),

+ // so a set is used to avoid dumping and breaking down entries more than once.

+ for (const Bucket& subbucket : by_backtrace)

+ if (InsertEntry(subbucket, entries, stack_frame_deduplicator,

+ type_name_deduplicator))

+ BreakDown(subbucket, entries, stack_frame_deduplicator,

+ type_name_deduplicator);

+ for (const Bucket& subbucket : by_type_name)

+ if (InsertEntry(subbucket, entries, stack_frame_deduplicator,

+ type_name_deduplicator))

+ BreakDown(subbucket, entries, stack_frame_deduplicator,

+ type_name_deduplicator);

}

} // namespace

-HeapDumpWriter::HeapDumpWriter(StackFrameDeduplicator* stack_frame_deduplicator,

- TypeNameDeduplicator* type_name_deduplicator)

- : traced_value_(new TracedValue()),

- stack_frame_deduplicator_(stack_frame_deduplicator),

- type_name_deduplicator_(type_name_deduplicator) {}

-HeapDumpWriter::~HeapDumpWriter() {}

-void HeapDumpWriter::InsertAllocation(const AllocationContext& context,

- size_t size) {

- bytes_by_context_[context] += size;

+bool operator<(HeapEntry lhs, HeapEntry rhs) {

+ // There is no need to compare |size|. If the backtrace and type name are

+ // equal then the sizes must be equal as well.

+ return std::tie(lhs.stack_frame_id, lhs.type_id) <

+ std::tie(rhs.stack_frame_id, rhs.type_id);

}

-scoped_refptr<TracedValue> HeapDumpWriter::WriteHeapDump() {

- // Group by backtrace and by type ID, and compute the total heap size while

- // iterating anyway.

- size_t total_size = 0;

- hash_map<Backtrace, size_t> bytes_by_backtrace;

- hash_map<const char*, size_t> bytes_by_type;

- for (auto context_size : bytes_by_context_) {

- total_size += context_size.second;

- bytes_by_backtrace[context_size.first.backtrace] += context_size.second;

- bytes_by_type[context_size.first.type_name] += context_size.second;

+std::set<HeapEntry> Dump(

+ const base::hash_map<AllocationContext, size_t>& allocations,

+ StackFrameDeduplicator* stack_frame_deduplicator,

+ TypeNameDeduplicator* type_name_deduplicator) {

+ // Start with one bucket that represents the entire heap. Iterate by

+ // reference, because the pairs are going to point to allocation contexts

+ // stored in |allocations|.

+ Bucket root_bucket;

+ for (const auto& context_and_size : allocations) {

+ const AllocationContext* context = &context_and_size.first;

+ const size_t size = context_and_size.second;

+ root_bucket.allocations.push_back(std::make_pair(context, size));

+ root_bucket.size += size;

}

- auto sorted_bytes_by_backtrace = SortBySizeDescending(bytes_by_backtrace);

- auto sorted_bytes_by_type = SortBySizeDescending(bytes_by_type);

+ std::set<HeapEntry> entries;

+ InsertEntry(root_bucket, &entries, stack_frame_deduplicator,

+ type_name_deduplicator);

- traced_value_->BeginArray("entries");

+ // Recursively break down the heap and fill |entries| with entries to dump.

+ BreakDown(root_bucket, &entries, stack_frame_deduplicator,

+ type_name_deduplicator);

- // The global size, no column specified.

- {

- traced_value_->BeginDictionary();

- WriteSize(total_size);

- traced_value_->EndDictionary();

- }

+ return entries;

- // Entries with the size per backtrace.

- for (const auto& entry : sorted_bytes_by_backtrace) {

- traced_value_->BeginDictionary();

- // Insert a forward reference to the backtrace that will be written to the

- // |stackFrames| dictionary later on.

- int idx = stack_frame_deduplicator_->Insert(std::begin(entry.first.frames),

- std::end(entry.first.frames));

- WriteStackFrameIndex(idx);

- WriteSize(entry.second);

- traced_value_->EndDictionary();

- }

+scoped_refptr<TracedValue> Write(const std::set<HeapEntry>& entries) {

+ std::string buffer;

+ scoped_refptr<TracedValue> traced_value = new TracedValue;

- // Entries with the size per type.

- for (const auto& entry : sorted_bytes_by_type) {

- traced_value_->BeginDictionary();

- // Insert a forward reference to the type name that will be written to the

- // trace when it is flushed.

- WriteTypeId(type_name_deduplicator_->Insert(entry.first));

- WriteSize(entry.second);

- traced_value_->EndDictionary();

- }

+ traced_value->BeginArray("entries");

- traced_value_->EndArray(); // "entries"

+ for (const HeapEntry& entry : entries) {

+ traced_value->BeginDictionary();

- return traced_value_;

+ // Format size as hexadecimal string into |buffer|.

+ SStringPrintf(&buffer, "%" PRIx64, static_cast<uint64_t>(entry.size));

+ traced_value->SetString("size", buffer);

+ if (entry.stack_frame_id == -1) {

+ // An empty backtrace (which will have ID -1) is represented by the empty

+ // string, because there is no leaf frame to reference in |stackFrames|.

+ traced_value->SetString("bt", "");

+ } else {

+ // Format index of the leaf frame as a string, because |stackFrames| is a

+ // dictionary, not an array.

+ SStringPrintf(&buffer, "%i", entry.stack_frame_id);

+ traced_value->SetString("bt", buffer);

+ }

-void HeapDumpWriter::WriteStackFrameIndex(int index) {

- if (index == -1) {

- // An empty backtrace (which will have index -1) is represented by the empty

- // string, because there is no leaf frame to reference in |stackFrames|.

- traced_value_->SetString("bt", "");

- } else {

- // Format index of the leaf frame as a string, because |stackFrames| is a

- // dictionary, not an array.

- SStringPrintf(&buffer_, "%i", index);

- traced_value_->SetString("bt", buffer_);

+ // Type ID -1 (cumulative size for all types) is represented by the absence

+ // of the "type" key in the dictionary.

+ if (entry.type_id != -1) {

+ // Format the type ID as a string.

+ SStringPrintf(&buffer, "%i", entry.type_id);

+ traced_value->SetString("type", buffer);

+ }

+ traced_value->EndDictionary();

}

-void HeapDumpWriter::WriteTypeId(int type_id) {

- // Format the type ID as a string.

- SStringPrintf(&buffer_, "%i", type_id);

- traced_value_->SetString("type", buffer_);

+ traced_value->EndArray(); // "entries"

+ return traced_value;

}

-void HeapDumpWriter::WriteSize(size_t size) {

- // Format size as hexadecimal string into |buffer_|.

- SStringPrintf(&buffer_, "%" PRIx64, static_cast<uint64_t>(size));

- traced_value_->SetString("size", buffer_);

+} // namespace heap_dump

+scoped_refptr<TracedValue> WriteHeapDump(

+ const base::hash_map<AllocationContext, size_t>& allocations,

+ StackFrameDeduplicator* stack_frame_deduplicator,

+ TypeNameDeduplicator* type_name_deduplicator) {

+ return heap_dump::Write(heap_dump::Dump(allocations, stack_frame_deduplicator,

+ type_name_deduplicator));

}

} // namespace trace_event

« no previous file with comments | « base/trace_event/heap_profiler_heap_dump_writer.h ('k') | base/trace_event/heap_profiler_heap_dump_writer_unittest.cc » ('j') | no next file with comments »