Chromium Code Reviews| Index: base/trace_event/heap_profiler_heap_dump_writer.cc |
| diff --git a/base/trace_event/heap_profiler_heap_dump_writer.cc b/base/trace_event/heap_profiler_heap_dump_writer.cc |
| index b0ed86f32326d0ec66be40b9eca8c1f1c1f2766c..2a629d45efced6c89944ec4262602481d38f46e2 100644 |
| --- a/base/trace_event/heap_profiler_heap_dump_writer.cc |
| +++ b/base/trace_event/heap_profiler_heap_dump_writer.cc |
| @@ -6,44 +6,197 @@ |
| #include <algorithm> |
| #include <iterator> |
| -#include <utility> |
| +#include <tuple> |
| #include <vector> |
| +#include "base/containers/hash_tables.h" |
| #include "base/format_macros.h" |
| +#include "base/logging.h" |
| #include "base/strings/stringprintf.h" |
| +#include "base/trace_event/heap_profiler_allocation_context.h" |
| #include "base/trace_event/heap_profiler_stack_frame_deduplicator.h" |
| #include "base/trace_event/heap_profiler_type_name_deduplicator.h" |
| #include "base/trace_event/trace_event_argument.h" |
| +// Most of what the |HeapDumpWriter| does is aggregating the detailed |
|
petrcermak
2015/12/04 19:13:20
nit: I don't think there should be a definite arti
Ruud van Asseldonk
2015/12/07 13:53:59
As you wish.
|
| +// information about the heap and deciding what to dump. Input to this |
| +// process is a list of (AllocationContext, size). The context has several |
|
petrcermak
2015/12/04 19:13:20
supernit: s/several/two/ (I'd expect "several" to
petrcermak
2015/12/04 19:13:20
nit: I'd say "tuple" before the full stop.
Ruud van Asseldonk
2015/12/07 13:53:59
Yes only two is a bit of an anticlimax here after
|
| +// properties: |
| +// |
| +// * A backtrace (list of strings). |
| +// * A type name (string). |
| +// |
| +// For uniformity, type name is also treated as a list (with one element). A |
| +// list is a path down a tree. For backtrace, this is the call tree. The tree |
| +// for type name has height one. There is one tree for every property, but |
| +// nodes of the trees can be shared among the trees. For instance, starting |
| +// from the root node, following the "BrowserMain" edge breaking down by |
| +// backtrace, and then following the "T" edge breaking down by type name, will |
| +// yield the same node as following the "T" edge from the root, and then |
| +// following the "BrowserMain" edge breaking down by backtrace. The size of |
| +// this node is the number of bytes allocated for type "T" when "BrowserMain" |
| +// was at the bottom of the call stack. |
| +// |
| +// The (AllocationContext, size) pairs are converted into |Part|s and grouped |
| +// into |Bucket|s. Parts are mutually exclusive parts of the heap. A bucket |
| +// is a group of parts of which the properties share a prefix. |
|
petrcermak
2015/12/04 19:13:20
This is quite cryptic. Could you please provide an
petrcermak
2015/12/04 19:13:20
nit: s/of which the/whose/
Ruud van Asseldonk
2015/12/07 13:53:59
I tried an example, but even a very minimal exampl
|
| + |
| namespace base { |
| namespace trace_event { |
| +using Entry = HeapDumpWriter::Entry; |
| + |
| namespace { |
| -template <typename T> |
| -bool PairSizeGt(const std::pair<T, size_t>& lhs, |
| - const std::pair<T, size_t>& rhs) { |
| - return lhs.second > rhs.second; |
| +// References an array of |const char*|s in two ranges: [begin, cursor), and |
| +// [cursor, end). |
| +struct Slice { |
| + // Initializes the slice with the cursor at the first element. |
| + Slice(const char* const* begin, const char* const* end) |
| + : begin(begin), cursor(begin), end(end) {} |
| + |
| + Slice(const char* const* begin, |
| + const char* const* cursor, |
| + const char* const* end) |
| + : begin(begin), cursor(cursor), end(end) {} |
| + |
| + // Returns a slice with cursor advanced by one. |
| + Slice next() const { return Slice(begin, cursor + 1, end); } |
| + |
| + // Pointer into const array (array cannot be modified through this struct) of |
| + // |const char*| (immutable string). |begin| points at the first element, |
| + // |end| points past the last element. |
| + const char* const* begin; |
| + const char* const* cursor; |
| + const char* const* end; |
| +}; |
| + |
| +struct Part; |
| + |
| +// Pointer to member of |Part| that has type |Slice|. |
|
petrcermak
2015/12/04 19:13:20
nit: "a member"
Ruud van Asseldonk
2015/12/07 13:53:59
Done.
|
| +using Property = Slice Part::*; |
| + |
| +// Helper for breaking down the heap. The slices reference an allocation |
|
petrcermak
2015/12/04 19:13:20
Do multiple slices correspond to one allocation co
Ruud van Asseldonk
2015/12/07 13:53:59
Done.
|
| +// context via the |begin| and |end| pointers. |size| is the number of bytes |
| +// allocated for this context. The |cursor| of the slices indicates the current |
|
petrcermak
2015/12/04 19:13:20
If the previous comment applies, go to page 200 ..
Ruud van Asseldonk
2015/12/07 13:53:59
Done.
|
| +// position in the tree when breaking down. The branches taken are [begin, |
|
petrcermak
2015/12/04 19:13:20
"branches taken" is difficult to understand. What
Ruud van Asseldonk
2015/12/07 13:53:59
Done.
|
| +// cursor). If |cursor| is not equal to |end|, it means that further breakdown |
| +// by that property is possible. |
| +struct Part { |
| + Part(Slice backtrace, Slice type_name, size_t size) |
| + : backtrace(backtrace), type_name(type_name), size(size) {} |
| + |
| + // Constructs a part referencing an |AllocationContext|. Cursors are set to |
| + // the beginning. |
| + Part(const AllocationContext& ctx, size_t size) |
| + : backtrace(std::begin(ctx.backtrace.frames), |
| + std::end(ctx.backtrace.frames)), |
| + type_name(&ctx.type_name, &ctx.type_name + 1), |
| + size(size) { |
| + // The backtrace of the allocation context is padded with null pointers, |
| + // but for breaking down the heap this padding should not be taken into |
| + // account, so adjust the end pointer to point past the last non-null frame. |
| + while (backtrace.begin != backtrace.end && *(backtrace.end - 1) == nullptr) |
| + backtrace.end--; |
| + } |
| + |
| + // Returns a copy of this part, with the cursor of the member pointed at |
| + // by |property| advanced by one. |
| + Part advance(Property property) const { |
| + return Part(property == &Part::backtrace ? backtrace.next() : backtrace, |
|
petrcermak
2015/12/04 19:13:20
Thought: Is it worth keeping copying backtrace/typ
Ruud van Asseldonk
2015/12/07 13:53:59
I did not measure, but I think it is. Instead of u
|
| + property == &Part::type_name ? type_name.next() : type_name, |
| + size); |
| + } |
| + |
| + Slice backtrace; |
| + Slice type_name; |
| + size_t size; |
| +}; |
| + |
| +// Group of parts where the [begin, cursor) range is equal for all properties |
| +// of all parts in the bucket. |
| +struct Bucket { |
| + Bucket() : size(0) {} |
| + |
| + std::vector<Part> parts; |
| + |
| + // The sum of the sizes of the parts. |
| + size_t size; |
| +}; |
| + |
| +bool operator<(const Bucket& lhs, const Bucket& rhs) { |
| + return lhs.size < rhs.size; |
| +} |
| + |
| +// Groups the list of heap parts by the value under the cursor of the member |
| +// pointed at by |groupBy|. The parts in the returned list will have the cursor |
| +// advanced for the member pointed at by |groupBy|. |
| +std::vector<Bucket> GroupBy(const std::vector<Part>& parts, Property groupBy) { |
| + base::hash_map<const char*, Bucket> grouped; |
| + |
| + for (const Part& part : parts) { |
| + Slice property = part.*groupBy; |
| + if (property.cursor != property.end) { |
| + Bucket& bucket = grouped[*(property.cursor)]; |
| + bucket.size += part.size; |
| + bucket.parts.push_back(part.advance(groupBy)); |
| + DCHECK_GT(bucket.size, 0u); |
| + } |
| + } |
| + |
| + std::vector<Bucket> buckets; |
| + buckets.reserve(grouped.size()); |
| + for (auto key_bucket : grouped) |
| + buckets.push_back(key_bucket.second); |
| + |
| + return buckets; |
|
petrcermak
2015/12/04 19:13:20
Are move semantics already allowed in Chrome? If t
Ruud van Asseldonk
2015/12/07 13:53:59
Even before C++11, compilers would do Return Value
|
| } |
| -// Converts a |hash_map<T, size_t>| into a vector of (T, size_t) pairs that is |
| -// ordered from high |size_t| to low |size_t|. |
| -template <typename T> |
| -std::vector<std::pair<T, size_t>> SortBySizeDescending( |
| - const hash_map<T, size_t>& grouped) { |
| - std::vector<std::pair<T, size_t>> sorted; |
| - sorted.reserve(grouped.size()); |
| - std::copy(grouped.begin(), grouped.end(), std::back_inserter(sorted)); |
| - std::sort(sorted.begin(), sorted.end(), PairSizeGt<T>); |
| - return sorted; |
| +// Breaks down the bucket by the member pointed at by |property|. Returns only |
| +// buckets that contribute significantly to the total size. The long tail is |
| +// omitted. |
| +std::vector<Bucket> BreakDownBy(const Bucket& bucket, Property property) { |
| + std::vector<Bucket> buckets = GroupBy(bucket.parts, property); |
| + |
| + // Ensure that |buckets| is a max-heap (the data structure, not memory heap), |
| + // so its front contains the largest bucket. Buckets should be iterated |
| + // ordered by size, but sorting the vector is overkill because the long tail |
| + // of small buckets will be discarded. By using a max-heap, the optimal case |
| + // where all but the first bucket are discarded is O(n). The worst case where |
| + // no bucket is discarded is doing a heap sort, which is O(n log n). |
| + std::make_heap(buckets.begin(), buckets.end()); |
| + |
| + // Keep including parts until adding a part would increase the number of |
| + // bytes accounted for by less than a percent. The large buckets end up in |
| + // [it, end()), [begin(), it) is the part that contains the max-heap of small |
| + // buckets. TODO(ruuda): tweak the heuristic. |
| + size_t accounted_for = 0; |
| + std::vector<Bucket>::iterator it; |
| + for (it = buckets.end(); it != buckets.begin(); --it) { |
| + // Compute contribution to number of bytes accounted for in percent, rounded |
| + // down due to integer division. Buckets are iterated by descending size, |
| + // so later buckets cannot have a larger contribution than this one. |
| + accounted_for += buckets.front().size; |
| + size_t contribution = buckets.front().size * 100 / accounted_for; |
| + if (contribution == 0) |
| + break; |
| + |
| + // Put the largest bucket in [begin, it) at |it - 1| and max-heapify |
| + // [begin, it - 1). This puts the next largest bucket at |buckets.front()|. |
| + std::pop_heap(buckets.begin(), it); |
| + } |
| + |
| + // Discard the long tail of buckets that contribute less than a percent. |
| + buckets.erase(buckets.begin(), it); |
| + |
| + return buckets; |
|
petrcermak
2015/12/04 19:13:20
ditto
Ruud van Asseldonk
2015/12/07 13:53:59
See above.
|
| } |
| } // namespace |
| HeapDumpWriter::HeapDumpWriter(StackFrameDeduplicator* stack_frame_deduplicator, |
| TypeNameDeduplicator* type_name_deduplicator) |
| - : traced_value_(new TracedValue()), |
| - stack_frame_deduplicator_(stack_frame_deduplicator), |
| + : stack_frame_deduplicator_(stack_frame_deduplicator), |
| type_name_deduplicator_(type_name_deduplicator) {} |
| HeapDumpWriter::~HeapDumpWriter() {} |
| @@ -53,81 +206,113 @@ void HeapDumpWriter::InsertAllocation(const AllocationContext& context, |
| bytes_by_context_[context] += size; |
| } |
| -scoped_refptr<TracedValue> HeapDumpWriter::WriteHeapDump() { |
| - // Group by backtrace and by type ID, and compute the total heap size while |
| - // iterating anyway. |
| - size_t total_size = 0; |
| - hash_map<Backtrace, size_t> bytes_by_backtrace; |
| - hash_map<const char*, size_t> bytes_by_type; |
| - |
| - for (auto context_size : bytes_by_context_) { |
| - total_size += context_size.second; |
| - bytes_by_backtrace[context_size.first.backtrace] += context_size.second; |
| - bytes_by_type[context_size.first.type_name] += context_size.second; |
| - } |
| +bool HeapDumpWriter::InsertEntry(const Bucket& bucket) { |
| + // The parts in the bucket are all different, but for all properties, the |
| + // [begin, cursor) range is equal for all parts in the bucket. |
| + DCHECK(!bucket.parts.empty()); |
| + Slice backtrace = bucket.parts.front().backtrace; |
| + Slice type_name = bucket.parts.front().type_name; |
| - auto sorted_bytes_by_backtrace = SortBySizeDescending(bytes_by_backtrace); |
| - auto sorted_bytes_by_type = SortBySizeDescending(bytes_by_type); |
| + Entry entry; |
| + entry.stack_frame_id = |
| + stack_frame_deduplicator_->Insert(backtrace.begin, backtrace.cursor); |
| - traced_value_->BeginArray("entries"); |
| + // If the type name is not set use ID -1, otherwise deduplicate the type name. |
|
petrcermak
2015/12/04 19:13:20
supernit: I'd say "ID = -1" (multiple uses in this
Ruud van Asseldonk
2015/12/07 13:53:59
Ignored.
|
| + entry.type_id = (type_name.begin == type_name.cursor) |
| + ? -1 |
| + : type_name_deduplicator_->Insert(*type_name.begin); |
| - // The global size, no column specified. |
| - { |
| - traced_value_->BeginDictionary(); |
| - WriteSize(total_size); |
| - traced_value_->EndDictionary(); |
| - } |
| + entry.size = bucket.size; |
| - // Entries with the size per backtrace. |
| - for (const auto& entry : sorted_bytes_by_backtrace) { |
| - traced_value_->BeginDictionary(); |
| - // Insert a forward reference to the backtrace that will be written to the |
| - // |stackFrames| dictionary later on. |
| - int idx = stack_frame_deduplicator_->Insert(std::begin(entry.first.frames), |
| - std::end(entry.first.frames)); |
| - WriteStackFrameIndex(idx); |
| - WriteSize(entry.second); |
| - traced_value_->EndDictionary(); |
| - } |
| + auto position_and_inserted = entries_.insert(entry); |
| + return position_and_inserted.second; |
| +} |
| - // Entries with the size per type. |
| - for (const auto& entry : sorted_bytes_by_type) { |
| - traced_value_->BeginDictionary(); |
| - // Insert a forward reference to the type name that will be written to the |
| - // trace when it is flushed. |
| - WriteTypeId(type_name_deduplicator_->Insert(entry.first)); |
| - WriteSize(entry.second); |
| - traced_value_->EndDictionary(); |
| - } |
| +void HeapDumpWriter::BreakDown(const Bucket& bucket) { |
| + std::vector<Bucket> by_backtrace = BreakDownBy(bucket, &Part::backtrace); |
| + std::vector<Bucket> by_type_name = BreakDownBy(bucket, &Part::type_name); |
| - traced_value_->EndArray(); // "entries" |
| + // Insert entries for the buckets. If a bucket was not present before, it has |
| + // not been broken down before, so recursively continue breaking down in that |
| + // case. There might be multiple routes to the same entry (first break down |
| + // by type name, then by backtrace, or first by backtrace and then by type), |
| + // so a set is used to avoid dumping and breaking down entries more than once. |
| - return traced_value_; |
| + for (const Bucket& subbucket : by_backtrace) |
| + if (InsertEntry(subbucket)) |
| + BreakDown(subbucket); |
| + |
| + for (const Bucket& subbucket : by_type_name) |
| + if (InsertEntry(subbucket)) |
| + BreakDown(subbucket); |
| } |
| -void HeapDumpWriter::WriteStackFrameIndex(int index) { |
| - if (index == -1) { |
| - // An empty backtrace (which will have index -1) is represented by the empty |
| - // string, because there is no leaf frame to reference in |stackFrames|. |
| - traced_value_->SetString("bt", ""); |
| - } else { |
| - // Format index of the leaf frame as a string, because |stackFrames| is a |
| - // dictionary, not an array. |
| - SStringPrintf(&buffer_, "%i", index); |
| - traced_value_->SetString("bt", buffer_); |
| +const std::set<Entry>& HeapDumpWriter::Dump() { |
| + // Start with one bucket that represents the entire heap. Iterate by |
| + // reference, because the heap parts are going to point into allocation |
| + // contexts stored in |bytes_by_context_|. |
| + Bucket root_bucket; |
| + for (const auto& context_and_size : bytes_by_context_) { |
| + const AllocationContext& context = context_and_size.first; |
| + const size_t size = context_and_size.second; |
| + root_bucket.parts.push_back(Part(context, size)); |
| + root_bucket.size += size; |
| } |
| + |
| + InsertEntry(root_bucket); |
| + |
| + // Recursively break down the heap and fill |entries_| with entries to dump. |
| + BreakDown(root_bucket); |
| + |
| + return entries_; |
| } |
| -void HeapDumpWriter::WriteTypeId(int type_id) { |
| - // Format the type ID as a string. |
| - SStringPrintf(&buffer_, "%i", type_id); |
| - traced_value_->SetString("type", buffer_); |
| +// static |
| +scoped_refptr<TracedValue> HeapDumpWriter::Write( |
| + const std::set<Entry>& entries) { |
| + std::string buffer; |
| + scoped_refptr<TracedValue> traced_value = new TracedValue; |
| + |
| + traced_value->BeginArray("entries"); |
| + |
| + for (const Entry& entry : entries) { |
| + traced_value->BeginDictionary(); |
| + |
| + // Format size as hexadecimal string into |buffer|. |
| + SStringPrintf(&buffer, "%" PRIx64, static_cast<uint64_t>(entry.size)); |
| + traced_value->SetString("size", buffer); |
| + |
| + if (entry.stack_frame_id == -1) { |
| + // An empty backtrace (which will have ID -1) is represented by the empty |
| + // string, because there is no leaf frame to reference in |stackFrames|. |
| + traced_value->SetString("bt", ""); |
| + } else { |
| + // Format index of the leaf frame as a string, because |stackFrames| is a |
| + // dictionary, not an array. |
| + SStringPrintf(&buffer, "%i", entry.stack_frame_id); |
| + traced_value->SetString("bt", buffer); |
| + } |
| + |
| + // Type ID -1 (cumulative size for all types) is represented by the absence |
| + // of the "type" key in the dictionary. |
| + if (entry.type_id != -1) { |
| + // Format the type ID as a string. |
| + SStringPrintf(&buffer, "%i", entry.type_id); |
| + traced_value->SetString("type", buffer); |
| + } |
| + |
| + traced_value->EndDictionary(); |
| + } |
| + |
| + traced_value->EndArray(); // "entries" |
| + return traced_value; |
| } |
| -void HeapDumpWriter::WriteSize(size_t size) { |
| - // Format size as hexadecimal string into |buffer_|. |
| - SStringPrintf(&buffer_, "%" PRIx64, static_cast<uint64_t>(size)); |
| - traced_value_->SetString("size", buffer_); |
| +bool operator<(Entry lhs, Entry rhs) { |
| + // There is no need to compare |size|. If the backtrace and type name are |
| + // equal then the sizes must be equal as well. |
| + return std::tie(lhs.stack_frame_id, lhs.type_id) < |
| + std::tie(rhs.stack_frame_id, rhs.type_id); |
| } |
| } // namespace trace_event |