Index: base/trace_event/heap_profiler_heap_dump_writer.cc |
diff --git a/base/trace_event/heap_profiler_heap_dump_writer.cc b/base/trace_event/heap_profiler_heap_dump_writer.cc |
index b0ed86f32326d0ec66be40b9eca8c1f1c1f2766c..da0fdfb0a616a16eda73d2352ea1e99b9ac71a25 100644 |
--- a/base/trace_event/heap_profiler_heap_dump_writer.cc |
+++ b/base/trace_event/heap_profiler_heap_dump_writer.cc |
@@ -6,128 +6,304 @@ |
#include <algorithm> |
#include <iterator> |
-#include <utility> |
+#include <tuple> |
#include <vector> |
+#include "base/containers/hash_tables.h" |
#include "base/format_macros.h" |
+#include "base/logging.h" |
+#include "base/macros.h" |
#include "base/strings/stringprintf.h" |
+#include "base/trace_event/heap_profiler_allocation_context.h" |
#include "base/trace_event/heap_profiler_stack_frame_deduplicator.h" |
#include "base/trace_event/heap_profiler_type_name_deduplicator.h" |
#include "base/trace_event/trace_event_argument.h" |
+// Most of what this file does is aggregating detailed information about the |
+// heap and deciding what to dump. Input to this process is a list of |
petrcermak
2015/12/09 15:02:31
nit: I think there should be "The" in front of "In
Ruud van Asseldonk
2015/12/09 16:16:01
Done.
|
+// |AllocationContext|s and size pairs. |
+// |
+// The pairs are grouped into |Bucket|s. A bucket is a group of contexts with |
+// sizes of which the properties share a prefix. (Type name is considered a |
petrcermak
2015/12/09 15:02:31
hnit: It sounds like you're talking about "propert
Ruud van Asseldonk
2015/12/09 16:16:01
Done.
|
+// list of length one here.) First all pairs are put into one bucket that |
+// represents the entire heap. Then this bucket is recursively broken down into |
+// smaller buckets. Each bucket keeps track of whether further breakdown is |
+// possible. |
+ |
namespace base { |
namespace trace_event { |
+namespace heap_dump { |
namespace { |
-template <typename T> |
-bool PairSizeGt(const std::pair<T, size_t>& lhs, |
- const std::pair<T, size_t>& rhs) { |
- return lhs.second > rhs.second; |
+// Denotes a property of |AllocationContext|. |
+enum class BreakDownMode { kByBacktrace, kByTypeName }; |
+ |
+struct Bucket { |
+ Bucket() : size(0), backtrace_cursor(0), is_broken_down_by_type_name(false) {} |
+ |
+ // List of context and number of bytes allocated for that context. |
+ std::vector<std::pair<const AllocationContext*, size_t>> allocations; |
+ |
+ // The sum of the sizes of the allocations. |
+ size_t size; |
+ |
+ // The index of the stack frame that has not yet been broken down by. For |
+ // all contexts in this bucket, the stack frames 0 up to (but not including) |
+ // the cursor, must be equal. |
+ size_t backtrace_cursor; |
+ |
+ // When true, the type name for all contexts in this bucket must be equal. |
+ bool is_broken_down_by_type_name; |
+}; |
+ |
+// Comparison operator used to order buckets by size so that buckets of |
+// insignificant size might be discarded. |
+bool operator<(const Bucket& lhs, const Bucket& rhs) { |
+ return lhs.size < rhs.size; |
+} |
+ |
+// Groups the allocations in the bucket by |breakDownBy|. The buckets in the |
+// returned list will have |backtrace_cursor| advanced or |
+// |is_broken_down_by_type_name| set depending the property to group by. |
petrcermak
2015/12/09 15:02:32
nit: "depending ON"
Ruud van Asseldonk
2015/12/09 16:16:01
Done.
|
+std::vector<Bucket> BreakDownBy(const Bucket& bucket, |
+ BreakDownMode breakDownBy) { |
+ base::hash_map<const char*, Bucket> breakdown; |
+ |
+ if (breakDownBy == BreakDownMode::kByBacktrace) { |
petrcermak
2015/12/09 15:02:32
Wouldn't a switch make more sense here?
Ruud van Asseldonk
2015/12/09 16:16:01
I don’t have a strong opinion. I find a switch to
|
+ for (const auto& context_and_size : bucket.allocations) { |
+ const AllocationContext* context = context_and_size.first; |
petrcermak
2015/12/09 15:02:31
You always use |context| to get the backtrace so y
Ruud van Asseldonk
2015/12/09 16:16:01
Done.
|
+ const char* const* begin = std::begin(context->backtrace.frames); |
+ const char* const* end = std::end(context->backtrace.frames); |
+ const char* const* cursor = begin + bucket.backtrace_cursor; |
+ |
+ // The backtrace in the context is padded with null pointers, but these |
+ // should not be considered for breakdown. Adjust end to point past the |
+ // last non-null frame. |
+ while (begin != end && *(end - 1) == nullptr) |
+ end--; |
+ |
+ DCHECK_LE(cursor, end); |
+ |
+ if (cursor != end) { |
+ Bucket& subbucket = breakdown[*cursor]; |
+ subbucket.size += context_and_size.second; |
+ subbucket.allocations.push_back(context_and_size); |
+ subbucket.backtrace_cursor = bucket.backtrace_cursor + 1; |
+ subbucket.is_broken_down_by_type_name = |
+ bucket.is_broken_down_by_type_name; |
+ DCHECK_GT(subbucket.size, 0u); |
+ } |
+ } |
+ } else if (breakDownBy == BreakDownMode::kByTypeName) { |
+ if (!bucket.is_broken_down_by_type_name) { |
+ for (const auto& context_and_size : bucket.allocations) { |
+ Bucket& subbucket = breakdown[context_and_size.first->type_name]; |
+ subbucket.size += context_and_size.second; |
+ subbucket.allocations.push_back(context_and_size); |
+ subbucket.backtrace_cursor = bucket.backtrace_cursor; |
+ subbucket.is_broken_down_by_type_name = true; |
+ DCHECK_GT(subbucket.size, 0u); |
+ } |
+ } |
+ } |
+ |
+ std::vector<Bucket> buckets; |
+ buckets.reserve(breakdown.size()); |
+ for (auto key_bucket : breakdown) |
+ buckets.push_back(key_bucket.second); |
+ |
+ return buckets; |
+} |
+ |
+// Breaks down the bucket by |property|. Returns only buckets that contribute |
+// significantly to the total size. The long tail is omitted. |
+std::vector<Bucket> DiscardSmallBuckets(std::vector<Bucket> buckets) { |
+ // Ensure that |buckets| is a max-heap (the data structure, not memory heap), |
+ // so its front contains the largest bucket. Buckets should be iterated |
+ // ordered by size, but sorting the vector is overkill because the long tail |
+ // of small buckets will be discarded. By using a max-heap, the optimal case |
+ // where all but the first bucket are discarded is O(n). The worst case where |
+ // no bucket is discarded is doing a heap sort, which is O(n log n). |
+ std::make_heap(buckets.begin(), buckets.end()); |
+ |
+ // Keep including allocations until adding one would increase the number of |
+ // bytes accounted for by less than a percent. The large buckets end up in |
+ // [it, end()), [begin(), it) is the part that contains the max-heap of small |
+ // buckets. TODO(ruuda): tweak the heuristic. |
+ size_t accounted_for = 0; |
+ std::vector<Bucket>::iterator it; |
+ for (it = buckets.end(); it != buckets.begin(); --it) { |
+ // Compute contribution to number of bytes accounted for in percent, rounded |
+ // down due to integer division. Buckets are iterated by descending size, |
+ // so later buckets cannot have a larger contribution than this one. |
+ accounted_for += buckets.front().size; |
+ size_t contribution = buckets.front().size * 100 / accounted_for; |
+ if (contribution == 0) |
+ break; |
+ |
+ // Put the largest bucket in [begin, it) at |it - 1| and max-heapify |
+ // [begin, it - 1). This puts the next largest bucket at |buckets.front()|. |
+ std::pop_heap(buckets.begin(), it); |
+ } |
+ |
+ // At this point, |buckets| looks like this (numbers are bucket sizes): |
+ // |
+ // <-- max-heap of small buckets ---> |
+ // <-- large buckets by descending size --> |
+ // [ 19 | 11 | 13 | 7 | 2 | 5 | ... | 83 | 89 | 97 ] |
+ // ^ ^ ^ |
+ // | | | |
+ // begin() it end() |
+ |
+ // Discard the long tail of buckets that contribute less than a percent. |
+ buckets.erase(buckets.begin(), it); |
+ |
+ return buckets; |
+} |
+ |
+// Inserts an entry for |bucket| into |entries|. |
+bool InsertEntry(const Bucket& bucket, |
+ std::set<HeapEntry>* entries, |
+ StackFrameDeduplicator* stack_frame_deduplicator, |
+ TypeNameDeduplicator* type_name_deduplicator) { |
+ DCHECK(!bucket.allocations.empty()); |
+ |
+ // The contexts in the bucket are all different, but for all properties, the |
+ // [begin, cursor) range is equal for all contexts in the bucket. |
petrcermak
2015/12/09 15:02:32
This comment is outdated - it doesn't capture the
Ruud van Asseldonk
2015/12/09 16:16:01
Fixed.
|
+ const AllocationContext& context = *bucket.allocations.front().first; |
+ |
+ const char* const* backtrace_begin = std::begin(context.backtrace.frames); |
+ const char* const* backtrace_end = backtrace_begin + bucket.backtrace_cursor; |
+ DCHECK_LE(bucket.backtrace_cursor, arraysize(context.backtrace.frames)); |
+ |
+ HeapEntry entry; |
+ entry.stack_frame_id = |
+ stack_frame_deduplicator->Insert(backtrace_begin, backtrace_end); |
+ |
+ // Deduplicate the type name, or use ID -1 if type name is not set. |
+ entry.type_id = bucket.is_broken_down_by_type_name |
+ ? type_name_deduplicator->Insert(context.type_name) |
+ : -1; |
+ |
+ entry.size = bucket.size; |
+ |
+ auto position_and_inserted = entries->insert(entry); |
+ return position_and_inserted.second; |
} |
-// Converts a |hash_map<T, size_t>| into a vector of (T, size_t) pairs that is |
-// ordered from high |size_t| to low |size_t|. |
-template <typename T> |
-std::vector<std::pair<T, size_t>> SortBySizeDescending( |
- const hash_map<T, size_t>& grouped) { |
- std::vector<std::pair<T, size_t>> sorted; |
- sorted.reserve(grouped.size()); |
- std::copy(grouped.begin(), grouped.end(), std::back_inserter(sorted)); |
- std::sort(sorted.begin(), sorted.end(), PairSizeGt<T>); |
- return sorted; |
+// Recursively breaks down |bucket| into smaller buckets, and adds the buckets |
+// of significant size to |entries|. |
+void BreakDown(const Bucket& bucket, |
+ std::set<HeapEntry>* entries, |
+ StackFrameDeduplicator* stack_frame_deduplicator, |
+ TypeNameDeduplicator* type_name_deduplicator) { |
+ std::vector<Bucket> by_backtrace = |
+ DiscardSmallBuckets(BreakDownBy(bucket, BreakDownMode::kByBacktrace)); |
+ std::vector<Bucket> by_type_name = |
+ DiscardSmallBuckets(BreakDownBy(bucket, BreakDownMode::kByTypeName)); |
+ |
+ // Insert entries for the buckets. If a bucket was not present before, it has |
+ // not been broken down before, so recursively continue breaking down in that |
+ // case. There might be multiple routes to the same entry (first break down |
+ // by type name, then by backtrace, or first by backtrace and then by type), |
+ // so a set is used to avoid dumping and breaking down entries more than once. |
+ |
+ for (const Bucket& subbucket : by_backtrace) |
+ if (InsertEntry(subbucket, entries, stack_frame_deduplicator, |
+ type_name_deduplicator)) |
+ BreakDown(subbucket, entries, stack_frame_deduplicator, |
+ type_name_deduplicator); |
+ |
+ for (const Bucket& subbucket : by_type_name) |
+ if (InsertEntry(subbucket, entries, stack_frame_deduplicator, |
+ type_name_deduplicator)) |
+ BreakDown(subbucket, entries, stack_frame_deduplicator, |
+ type_name_deduplicator); |
} |
} // namespace |
-HeapDumpWriter::HeapDumpWriter(StackFrameDeduplicator* stack_frame_deduplicator, |
- TypeNameDeduplicator* type_name_deduplicator) |
- : traced_value_(new TracedValue()), |
- stack_frame_deduplicator_(stack_frame_deduplicator), |
- type_name_deduplicator_(type_name_deduplicator) {} |
- |
-HeapDumpWriter::~HeapDumpWriter() {} |
- |
-void HeapDumpWriter::InsertAllocation(const AllocationContext& context, |
- size_t size) { |
- bytes_by_context_[context] += size; |
+bool operator<(HeapEntry lhs, HeapEntry rhs) { |
+ // There is no need to compare |size|. If the backtrace and type name are |
+ // equal then the sizes must be equal as well. |
+ return std::tie(lhs.stack_frame_id, lhs.type_id) < |
+ std::tie(rhs.stack_frame_id, rhs.type_id); |
} |
-scoped_refptr<TracedValue> HeapDumpWriter::WriteHeapDump() { |
- // Group by backtrace and by type ID, and compute the total heap size while |
- // iterating anyway. |
- size_t total_size = 0; |
- hash_map<Backtrace, size_t> bytes_by_backtrace; |
- hash_map<const char*, size_t> bytes_by_type; |
- |
- for (auto context_size : bytes_by_context_) { |
- total_size += context_size.second; |
- bytes_by_backtrace[context_size.first.backtrace] += context_size.second; |
- bytes_by_type[context_size.first.type_name] += context_size.second; |
+std::set<HeapEntry> Dump( |
+ const base::hash_map<AllocationContext, size_t>& allocations, |
+ StackFrameDeduplicator* stack_frame_deduplicator, |
+ TypeNameDeduplicator* type_name_deduplicator) { |
+ // Start with one bucket that represents the entire heap. Iterate by |
+ // reference, because the pairs are going to point to allocation contexts |
+ // stored in |allocations|. |
+ Bucket root_bucket; |
+ for (const auto& context_and_size : allocations) { |
+ const AllocationContext* context = &context_and_size.first; |
+ const size_t size = context_and_size.second; |
+ root_bucket.allocations.push_back(std::make_pair(context, size)); |
+ root_bucket.size += size; |
} |
- auto sorted_bytes_by_backtrace = SortBySizeDescending(bytes_by_backtrace); |
- auto sorted_bytes_by_type = SortBySizeDescending(bytes_by_type); |
+ std::set<HeapEntry> entries; |
+ InsertEntry(root_bucket, &entries, stack_frame_deduplicator, |
+ type_name_deduplicator); |
- traced_value_->BeginArray("entries"); |
+ // Recursively break down the heap and fill |entries| with entries to dump. |
+ BreakDown(root_bucket, &entries, stack_frame_deduplicator, |
+ type_name_deduplicator); |
- // The global size, no column specified. |
- { |
- traced_value_->BeginDictionary(); |
- WriteSize(total_size); |
- traced_value_->EndDictionary(); |
- } |
+ return entries; |
+} |
- // Entries with the size per backtrace. |
- for (const auto& entry : sorted_bytes_by_backtrace) { |
- traced_value_->BeginDictionary(); |
- // Insert a forward reference to the backtrace that will be written to the |
- // |stackFrames| dictionary later on. |
- int idx = stack_frame_deduplicator_->Insert(std::begin(entry.first.frames), |
- std::end(entry.first.frames)); |
- WriteStackFrameIndex(idx); |
- WriteSize(entry.second); |
- traced_value_->EndDictionary(); |
- } |
+scoped_refptr<TracedValue> Write(const std::set<HeapEntry>& entries) { |
+ std::string buffer; |
+ scoped_refptr<TracedValue> traced_value = new TracedValue; |
- // Entries with the size per type. |
- for (const auto& entry : sorted_bytes_by_type) { |
- traced_value_->BeginDictionary(); |
- // Insert a forward reference to the type name that will be written to the |
- // trace when it is flushed. |
- WriteTypeId(type_name_deduplicator_->Insert(entry.first)); |
- WriteSize(entry.second); |
- traced_value_->EndDictionary(); |
- } |
+ traced_value->BeginArray("entries"); |
- traced_value_->EndArray(); // "entries" |
+ for (const HeapEntry& entry : entries) { |
+ traced_value->BeginDictionary(); |
- return traced_value_; |
-} |
+ // Format size as hexadecimal string into |buffer|. |
+ SStringPrintf(&buffer, "%" PRIx64, static_cast<uint64_t>(entry.size)); |
+ traced_value->SetString("size", buffer); |
+ |
+ if (entry.stack_frame_id == -1) { |
+ // An empty backtrace (which will have ID -1) is represented by the empty |
+ // string, because there is no leaf frame to reference in |stackFrames|. |
+ traced_value->SetString("bt", ""); |
+ } else { |
+ // Format index of the leaf frame as a string, because |stackFrames| is a |
+ // dictionary, not an array. |
+ SStringPrintf(&buffer, "%i", entry.stack_frame_id); |
+ traced_value->SetString("bt", buffer); |
+ } |
-void HeapDumpWriter::WriteStackFrameIndex(int index) { |
- if (index == -1) { |
- // An empty backtrace (which will have index -1) is represented by the empty |
- // string, because there is no leaf frame to reference in |stackFrames|. |
- traced_value_->SetString("bt", ""); |
- } else { |
- // Format index of the leaf frame as a string, because |stackFrames| is a |
- // dictionary, not an array. |
- SStringPrintf(&buffer_, "%i", index); |
- traced_value_->SetString("bt", buffer_); |
+ // Type ID -1 (cumulative size for all types) is represented by the absence |
+ // of the "type" key in the dictionary. |
+ if (entry.type_id != -1) { |
+ // Format the type ID as a string. |
+ SStringPrintf(&buffer, "%i", entry.type_id); |
+ traced_value->SetString("type", buffer); |
+ } |
+ |
+ traced_value->EndDictionary(); |
} |
-} |
-void HeapDumpWriter::WriteTypeId(int type_id) { |
- // Format the type ID as a string. |
- SStringPrintf(&buffer_, "%i", type_id); |
- traced_value_->SetString("type", buffer_); |
+ traced_value->EndArray(); // "entries" |
+ return traced_value; |
} |
-void HeapDumpWriter::WriteSize(size_t size) { |
- // Format size as hexadecimal string into |buffer_|. |
- SStringPrintf(&buffer_, "%" PRIx64, static_cast<uint64_t>(size)); |
- traced_value_->SetString("size", buffer_); |
+} // namespace heap_dump |
+ |
+scoped_refptr<TracedValue> WriteHeapDump( |
+ const base::hash_map<AllocationContext, size_t>& allocations, |
+ StackFrameDeduplicator* stack_frame_deduplicator, |
+ TypeNameDeduplicator* type_name_deduplicator) { |
+ return heap_dump::Write(heap_dump::Dump(allocations, stack_frame_deduplicator, |
+ type_name_deduplicator)); |
} |
} // namespace trace_event |