base/trace_event/heap_profiler_heap_dump_writer.cc - Issue 2650863003: [tracing] Switch to new heap dump format.

Unified Diff: base/trace_event/heap_profiler_heap_dump_writer.cc

Issue 2650863003: [tracing] Switch to new heap dump format. (Closed)

Patch Set: Add 'version' field. Created 3 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

« no previous file with comments | « base/trace_event/heap_profiler_heap_dump_writer.h ('k') | base/trace_event/heap_profiler_heap_dump_writer_unittest.cc » ('j') | base/trace_event/heap_profiler_stack_frame_deduplicator.cc » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: base/trace_event/heap_profiler_heap_dump_writer.cc

diff --git a/base/trace_event/heap_profiler_heap_dump_writer.cc b/base/trace_event/heap_profiler_heap_dump_writer.cc

index 1bf06dbd9763e706d2e7da4ae5933d16baf994ee..a35e01c4b08e6caf5017080e9063d71615f6ba8e 100644

--- a/base/trace_event/heap_profiler_heap_dump_writer.cc

+++ b/base/trace_event/heap_profiler_heap_dump_writer.cc

@@ -6,318 +6,88 @@

#include <stdint.h>

-#include <algorithm>

-#include <iterator>

#include <tuple>

-#include <utility>

-#include <vector>

-#include "base/format_macros.h"

-#include "base/logging.h"

-#include "base/macros.h"

-#include "base/strings/stringprintf.h"

-#include "base/trace_event/heap_profiler_stack_frame_deduplicator.h"

-#include "base/trace_event/heap_profiler_type_name_deduplicator.h"

+#include "base/containers/hash_tables.h"

+#include "base/hash.h"

+#include "base/memory/ptr_util.h"

+#include "base/numerics/safe_conversions.h"

+#include "base/trace_event/heap_profiler_allocation_register.h"

#include "base/trace_event/memory_dump_session_state.h"

-#include "base/trace_event/trace_config.h"

#include "base/trace_event/trace_event_argument.h"

-#include "base/trace_event/trace_log.h"

-// Most of what the |HeapDumpWriter| does is aggregating detailed information

-// about the heap and deciding what to dump. The Input to this process is a list

-// of |AllocationContext|s and size pairs.

-//

-// The pairs are grouped into |Bucket|s. A bucket is a group of (context, size)

-// pairs where the properties of the contexts share a prefix. (Type name is

-// considered a list of length one here.) First all pairs are put into one

-// bucket that represents the entire heap. Then this bucket is recursively

-// broken down into smaller buckets. Each bucket keeps track of whether further

-// breakdown is possible.

namespace base {

namespace trace_event {

-namespace internal {

-namespace {

-// Denotes a property of |AllocationContext| to break down by.

-enum class BreakDownMode { kByBacktrace, kByTypeName };

-// A group of bytes for which the context shares a prefix.

-struct Bucket {

- Bucket()

- : size(0),

- count(0),

- backtrace_cursor(0),

- is_broken_down_by_type_name(false) {}

- std::vector<std::pair<const AllocationContext*, AllocationMetrics>>

- metrics_by_context;

- // The sum of the sizes of |metrics_by_context|.

- size_t size;

- // The sum of number of allocations of |metrics_by_context|.

- size_t count;

- // The index of the stack frame that has not yet been broken down by. For all

- // elements in this bucket, the stack frames 0 up to (but not including) the

- // cursor, must be equal.

- size_t backtrace_cursor;

- // When true, the type name for all elements in this bucket must be equal.

- bool is_broken_down_by_type_name;

-};

-// Comparison operator to order buckets by their size.

-bool operator<(const Bucket& lhs, const Bucket& rhs) {

- return lhs.size < rhs.size;

-// Groups the allocations in the bucket by |break_by|. The buckets in the

-// returned list will have |backtrace_cursor| advanced or

-// |is_broken_down_by_type_name| set depending on the property to group by.

-std::vector<Bucket> GetSubbuckets(const Bucket& bucket,

- BreakDownMode break_by) {

- base::hash_map<const void*, Bucket> breakdown;

- if (break_by == BreakDownMode::kByBacktrace) {

- for (const auto& context_and_metrics : bucket.metrics_by_context) {

- const Backtrace& backtrace = context_and_metrics.first->backtrace;

- const StackFrame* begin = std::begin(backtrace.frames);

- const StackFrame* end = begin + backtrace.frame_count;

- const StackFrame* cursor = begin + bucket.backtrace_cursor;

+namespace {

- DCHECK_LE(cursor, end);

+struct AggregationKey {

+ int backtrace_id;

+ int type_id;

- if (cursor != end) {

- Bucket& subbucket = breakdown[cursor->value];

- subbucket.size += context_and_metrics.second.size;

- subbucket.count += context_and_metrics.second.count;

- subbucket.metrics_by_context.push_back(context_and_metrics);

- subbucket.backtrace_cursor = bucket.backtrace_cursor + 1;

- subbucket.is_broken_down_by_type_name =

- bucket.is_broken_down_by_type_name;

- DCHECK_GT(subbucket.size, 0u);

- DCHECK_GT(subbucket.count, 0u);

- }

+ struct Hasher {

+ size_t operator()(const AggregationKey& key) const {

+ return base::HashInts(key.backtrace_id, key.type_id);

}

- } else if (break_by == BreakDownMode::kByTypeName) {

- if (!bucket.is_broken_down_by_type_name) {

- for (const auto& context_and_metrics : bucket.metrics_by_context) {

- const AllocationContext* context = context_and_metrics.first;

- Bucket& subbucket = breakdown[context->type_name];

- subbucket.size += context_and_metrics.second.size;

- subbucket.count += context_and_metrics.second.count;

- subbucket.metrics_by_context.push_back(context_and_metrics);

- subbucket.backtrace_cursor = bucket.backtrace_cursor;

- subbucket.is_broken_down_by_type_name = true;

- DCHECK_GT(subbucket.size, 0u);

- DCHECK_GT(subbucket.count, 0u);

- }

+ };

- std::vector<Bucket> buckets;

- buckets.reserve(breakdown.size());

- for (auto key_bucket : breakdown)

- buckets.push_back(key_bucket.second);

- return buckets;

-// Breaks down the bucket by |break_by|. Returns only buckets that contribute

-// more than |min_size_bytes| to the total size. The long tail is omitted.

-std::vector<Bucket> BreakDownBy(const Bucket& bucket,

- BreakDownMode break_by,

- size_t min_size_bytes) {

- std::vector<Bucket> buckets = GetSubbuckets(bucket, break_by);

- // Ensure that |buckets| is a max-heap (the data structure, not memory heap),

- // so its front contains the largest bucket. Buckets should be iterated

- // ordered by size, but sorting the vector is overkill because the long tail

- // of small buckets will be discarded. By using a max-heap, the optimal case

- // where all but the first bucket are discarded is O(n). The worst case where

- // no bucket is discarded is doing a heap sort, which is O(n log n).

- std::make_heap(buckets.begin(), buckets.end());

- // Keep including buckets until adding one would increase the number of

- // bytes accounted for by |min_size_bytes|. The large buckets end up in

- // [it, end()), [begin(), it) is the part that contains the max-heap

- // of small buckets.

- std::vector<Bucket>::iterator it;

- for (it = buckets.end(); it != buckets.begin(); --it) {

- if (buckets.front().size < min_size_bytes)

- break;

- // Put the largest bucket in [begin, it) at |it - 1| and max-heapify

- // [begin, it - 1). This puts the next largest bucket at |buckets.front()|.

- std::pop_heap(buckets.begin(), it);

+ bool operator==(const AggregationKey& other) const {

+ return backtrace_id == other.backtrace_id && type_id == other.type_id;

}

- // At this point, |buckets| looks like this (numbers are bucket sizes):

- //

- // <-- max-heap of small buckets --->

- // <-- large buckets by ascending size -->

- // [ 19 | 11 | 13 | 7 | 2 | 5 | ... | 83 | 89 | 97 ]

- // ^ ^ ^

- // | | |

- // begin() it end()

- // Discard the long tail of buckets that contribute less than a percent.

- buckets.erase(buckets.begin(), it);

- return buckets;

+};

} // namespace

-bool operator<(Entry lhs, Entry rhs) {

- // There is no need to compare |size|. If the backtrace and type name are

- // equal then the sizes must be equal as well.

- return std::tie(lhs.stack_frame_id, lhs.type_id) <

- std::tie(rhs.stack_frame_id, rhs.type_id);

-HeapDumpWriter::HeapDumpWriter(StackFrameDeduplicator* stack_frame_deduplicator,

- TypeNameDeduplicator* type_name_deduplicator,

- uint32_t breakdown_threshold_bytes)

- : stack_frame_deduplicator_(stack_frame_deduplicator),

- type_name_deduplicator_(type_name_deduplicator),

- breakdown_threshold_bytes_(breakdown_threshold_bytes) {

-HeapDumpWriter::~HeapDumpWriter() {}

-bool HeapDumpWriter::AddEntryForBucket(const Bucket& bucket) {

- // The contexts in the bucket are all different, but the [begin, cursor) range

- // is equal for all contexts in the bucket, and the type names are the same if

- // |is_broken_down_by_type_name| is set.

- DCHECK(!bucket.metrics_by_context.empty());

- const AllocationContext* context = bucket.metrics_by_context.front().first;

- const StackFrame* backtrace_begin = std::begin(context->backtrace.frames);

- const StackFrame* backtrace_end = backtrace_begin + bucket.backtrace_cursor;

- DCHECK_LE(bucket.backtrace_cursor, arraysize(context->backtrace.frames));

- Entry entry;

- entry.stack_frame_id = stack_frame_deduplicator_->Insert(

- backtrace_begin, backtrace_end);

- // Deduplicate the type name, or use ID -1 if type name is not set.

- entry.type_id = bucket.is_broken_down_by_type_name

- ? type_name_deduplicator_->Insert(context->type_name)

- : -1;

- entry.size = bucket.size;

- entry.count = bucket.count;

- auto position_and_inserted = entries_.insert(entry);

- return position_and_inserted.second;

-void HeapDumpWriter::BreakDown(const Bucket& bucket) {

- auto by_backtrace = BreakDownBy(bucket,

- BreakDownMode::kByBacktrace,

- breakdown_threshold_bytes_);

- auto by_type_name = BreakDownBy(bucket,

- BreakDownMode::kByTypeName,

- breakdown_threshold_bytes_);

- // Insert entries for the buckets. If a bucket was not present before, it has

- // not been broken down before, so recursively continue breaking down in that

- // case. There might be multiple routes to the same entry (first break down

- // by type name, then by backtrace, or first by backtrace and then by type),

- // so a set is used to avoid dumping and breaking down entries more than once.

- for (const Bucket& subbucket : by_backtrace)

- if (AddEntryForBucket(subbucket))

- BreakDown(subbucket);

- for (const Bucket& subbucket : by_type_name)

- if (AddEntryForBucket(subbucket))

- BreakDown(subbucket);

-const std::set<Entry>& HeapDumpWriter::Summarize(

- const hash_map<AllocationContext, AllocationMetrics>& metrics_by_context) {

- // Start with one bucket that represents the entire heap. Iterate by

- // reference, because the allocation contexts are going to point to allocation

- // contexts stored in |metrics_by_context|.

- Bucket root_bucket;

- for (const auto& context_and_metrics : metrics_by_context) {

- DCHECK_GT(context_and_metrics.second.size, 0u);

- DCHECK_GT(context_and_metrics.second.count, 0u);

- const AllocationContext* context = &context_and_metrics.first;

- root_bucket.metrics_by_context.push_back(

- std::make_pair(context, context_and_metrics.second));

- root_bucket.size += context_and_metrics.second.size;

- root_bucket.count += context_and_metrics.second.count;

+std::unique_ptr<TracedValue> ExportHeapDump(

+ const AllocationRegister& allocation_register,

+ const MemoryDumpSessionState& session_state) {

+ hash_map<AggregationKey, AllocationMetrics, AggregationKey::Hasher>

Primiano Tucci (use gerrit) 2017/02/17 17:07:05 note: at some point we should fix the deduplicator

DmitrySkiba 2017/02/23 07:17:19 Acknowledged.

+ metrics_by_key;

+ for (const auto& allocation : allocation_register) {

Primiano Tucci (use gerrit) 2017/02/17 17:07:05 would be very nice if here there was a comment wit

DmitrySkiba 2017/02/23 07:17:19 Hmm, we too many docs about new format. Maybe let'

+ int backtrace_id = session_state.stack_frame_deduplicator()->Insert(

+ std::begin(allocation.context.backtrace.frames),

+ std::begin(allocation.context.backtrace.frames) +

+ allocation.context.backtrace.frame_count);

+ int type_id = session_state.type_name_deduplicator()->Insert(

+ allocation.context.type_name);

+ AggregationKey key = {backtrace_id, type_id};

+ AllocationMetrics& metrics = metrics_by_key[key];

+ metrics.size += allocation.size;

+ metrics.count += 1;

}

- AddEntryForBucket(root_bucket);

- // Recursively break down the heap and fill |entries_| with entries to dump.

- BreakDown(root_bucket);

+ auto traced_value = MakeUnique<TracedValue>();

- return entries_;

-std::unique_ptr<TracedValue> Serialize(const std::set<Entry>& entries) {

- std::string buffer;

- std::unique_ptr<TracedValue> traced_value(new TracedValue);

- traced_value->BeginArray("entries");

- for (const Entry& entry : entries) {

- traced_value->BeginDictionary();

- // Format size as hexadecimal string into |buffer|.

- SStringPrintf(&buffer, "%" PRIx64, static_cast<uint64_t>(entry.size));

- traced_value->SetString("size", buffer);

- SStringPrintf(&buffer, "%" PRIx64, static_cast<uint64_t>(entry.count));

- traced_value->SetString("count", buffer);

+ traced_value->BeginArray("nodes");

+ for (const auto& key_and_metrics : metrics_by_key) {

Primiano Tucci (use gerrit) 2017/02/17 17:07:05 I think the rest of the code in this folder doesn'

DmitrySkiba 2017/02/23 07:17:19 Done.

+ traced_value->AppendInteger(key_and_metrics.first.backtrace_id);

+ }

+ traced_value->EndArray();

- if (entry.stack_frame_id == -1) {

- // An empty backtrace (which will have ID -1) is represented by the empty

- // string, because there is no leaf frame to reference in |stackFrames|.

- traced_value->SetString("bt", "");

- } else {

- // Format index of the leaf frame as a string, because |stackFrames| is a

- // dictionary, not an array.

- SStringPrintf(&buffer, "%i", entry.stack_frame_id);

- traced_value->SetString("bt", buffer);

- }

+ traced_value->BeginArray("types");

+ for (const auto& key_and_metrics : metrics_by_key) {

+ traced_value->AppendInteger(key_and_metrics.first.type_id);

+ }

+ traced_value->EndArray();

- // Type ID -1 (cumulative size for all types) is represented by the absence

- // of the "type" key in the dictionary.

- if (entry.type_id != -1) {

- // Format the type ID as a string.

- SStringPrintf(&buffer, "%i", entry.type_id);

- traced_value->SetString("type", buffer);

- }

+ traced_value->BeginArray("counts");

+ for (const auto& key_and_metrics : metrics_by_key) {

+ traced_value->AppendInteger(

+ saturated_cast<int>(key_and_metrics.second.count));

+ }

+ traced_value->EndArray();

- traced_value->EndDictionary();

+ traced_value->BeginArray("sizes");

+ for (const auto& key_and_metrics : metrics_by_key) {

+ traced_value->AppendInteger(

+ saturated_cast<int>(key_and_metrics.second.size));

}

+ traced_value->EndArray();

- traced_value->EndArray(); // "entries"

return traced_value;

}

-} // namespace internal

-std::unique_ptr<TracedValue> ExportHeapDump(

- const hash_map<AllocationContext, AllocationMetrics>& metrics_by_context,

- const MemoryDumpSessionState& session_state) {

- internal::HeapDumpWriter writer(

- session_state.stack_frame_deduplicator(),

- session_state.type_name_deduplicator(),

- session_state.memory_dump_config().heap_profiler_options

- .breakdown_threshold_bytes);

- return Serialize(writer.Summarize(metrics_by_context));

} // namespace trace_event

} // namespace base