Chromium Code Reviews| Index: base/trace_event/heap_profiler_heap_dump_writer.cc |
| diff --git a/base/trace_event/heap_profiler_heap_dump_writer.cc b/base/trace_event/heap_profiler_heap_dump_writer.cc |
| index 1bf06dbd9763e706d2e7da4ae5933d16baf994ee..a35e01c4b08e6caf5017080e9063d71615f6ba8e 100644 |
| --- a/base/trace_event/heap_profiler_heap_dump_writer.cc |
| +++ b/base/trace_event/heap_profiler_heap_dump_writer.cc |
| @@ -6,318 +6,88 @@ |
| #include <stdint.h> |
| -#include <algorithm> |
| -#include <iterator> |
| #include <tuple> |
| -#include <utility> |
| -#include <vector> |
| -#include "base/format_macros.h" |
| -#include "base/logging.h" |
| -#include "base/macros.h" |
| -#include "base/strings/stringprintf.h" |
| -#include "base/trace_event/heap_profiler_stack_frame_deduplicator.h" |
| -#include "base/trace_event/heap_profiler_type_name_deduplicator.h" |
| +#include "base/containers/hash_tables.h" |
| +#include "base/hash.h" |
| +#include "base/memory/ptr_util.h" |
| +#include "base/numerics/safe_conversions.h" |
| +#include "base/trace_event/heap_profiler_allocation_register.h" |
| #include "base/trace_event/memory_dump_session_state.h" |
| -#include "base/trace_event/trace_config.h" |
| #include "base/trace_event/trace_event_argument.h" |
| -#include "base/trace_event/trace_log.h" |
| - |
| -// Most of what the |HeapDumpWriter| does is aggregating detailed information |
| -// about the heap and deciding what to dump. The Input to this process is a list |
| -// of |AllocationContext|s and size pairs. |
| -// |
| -// The pairs are grouped into |Bucket|s. A bucket is a group of (context, size) |
| -// pairs where the properties of the contexts share a prefix. (Type name is |
| -// considered a list of length one here.) First all pairs are put into one |
| -// bucket that represents the entire heap. Then this bucket is recursively |
| -// broken down into smaller buckets. Each bucket keeps track of whether further |
| -// breakdown is possible. |
| namespace base { |
| namespace trace_event { |
| -namespace internal { |
| -namespace { |
| - |
| -// Denotes a property of |AllocationContext| to break down by. |
| -enum class BreakDownMode { kByBacktrace, kByTypeName }; |
| - |
| -// A group of bytes for which the context shares a prefix. |
| -struct Bucket { |
| - Bucket() |
| - : size(0), |
| - count(0), |
| - backtrace_cursor(0), |
| - is_broken_down_by_type_name(false) {} |
| - |
| - std::vector<std::pair<const AllocationContext*, AllocationMetrics>> |
| - metrics_by_context; |
| - // The sum of the sizes of |metrics_by_context|. |
| - size_t size; |
| - |
| - // The sum of number of allocations of |metrics_by_context|. |
| - size_t count; |
| - |
| - // The index of the stack frame that has not yet been broken down by. For all |
| - // elements in this bucket, the stack frames 0 up to (but not including) the |
| - // cursor, must be equal. |
| - size_t backtrace_cursor; |
| - |
| - // When true, the type name for all elements in this bucket must be equal. |
| - bool is_broken_down_by_type_name; |
| -}; |
| - |
| -// Comparison operator to order buckets by their size. |
| -bool operator<(const Bucket& lhs, const Bucket& rhs) { |
| - return lhs.size < rhs.size; |
| -} |
| - |
| -// Groups the allocations in the bucket by |break_by|. The buckets in the |
| -// returned list will have |backtrace_cursor| advanced or |
| -// |is_broken_down_by_type_name| set depending on the property to group by. |
| -std::vector<Bucket> GetSubbuckets(const Bucket& bucket, |
| - BreakDownMode break_by) { |
| - base::hash_map<const void*, Bucket> breakdown; |
| - |
| - |
| - if (break_by == BreakDownMode::kByBacktrace) { |
| - for (const auto& context_and_metrics : bucket.metrics_by_context) { |
| - const Backtrace& backtrace = context_and_metrics.first->backtrace; |
| - const StackFrame* begin = std::begin(backtrace.frames); |
| - const StackFrame* end = begin + backtrace.frame_count; |
| - const StackFrame* cursor = begin + bucket.backtrace_cursor; |
| +namespace { |
| - DCHECK_LE(cursor, end); |
| +struct AggregationKey { |
| + int backtrace_id; |
| + int type_id; |
| - if (cursor != end) { |
| - Bucket& subbucket = breakdown[cursor->value]; |
| - subbucket.size += context_and_metrics.second.size; |
| - subbucket.count += context_and_metrics.second.count; |
| - subbucket.metrics_by_context.push_back(context_and_metrics); |
| - subbucket.backtrace_cursor = bucket.backtrace_cursor + 1; |
| - subbucket.is_broken_down_by_type_name = |
| - bucket.is_broken_down_by_type_name; |
| - DCHECK_GT(subbucket.size, 0u); |
| - DCHECK_GT(subbucket.count, 0u); |
| - } |
| + struct Hasher { |
| + size_t operator()(const AggregationKey& key) const { |
| + return base::HashInts(key.backtrace_id, key.type_id); |
| } |
| - } else if (break_by == BreakDownMode::kByTypeName) { |
| - if (!bucket.is_broken_down_by_type_name) { |
| - for (const auto& context_and_metrics : bucket.metrics_by_context) { |
| - const AllocationContext* context = context_and_metrics.first; |
| - Bucket& subbucket = breakdown[context->type_name]; |
| - subbucket.size += context_and_metrics.second.size; |
| - subbucket.count += context_and_metrics.second.count; |
| - subbucket.metrics_by_context.push_back(context_and_metrics); |
| - subbucket.backtrace_cursor = bucket.backtrace_cursor; |
| - subbucket.is_broken_down_by_type_name = true; |
| - DCHECK_GT(subbucket.size, 0u); |
| - DCHECK_GT(subbucket.count, 0u); |
| - } |
| - } |
| - } |
| + }; |
| - std::vector<Bucket> buckets; |
| - buckets.reserve(breakdown.size()); |
| - for (auto key_bucket : breakdown) |
| - buckets.push_back(key_bucket.second); |
| - |
| - return buckets; |
| -} |
| - |
| -// Breaks down the bucket by |break_by|. Returns only buckets that contribute |
| -// more than |min_size_bytes| to the total size. The long tail is omitted. |
| -std::vector<Bucket> BreakDownBy(const Bucket& bucket, |
| - BreakDownMode break_by, |
| - size_t min_size_bytes) { |
| - std::vector<Bucket> buckets = GetSubbuckets(bucket, break_by); |
| - |
| - // Ensure that |buckets| is a max-heap (the data structure, not memory heap), |
| - // so its front contains the largest bucket. Buckets should be iterated |
| - // ordered by size, but sorting the vector is overkill because the long tail |
| - // of small buckets will be discarded. By using a max-heap, the optimal case |
| - // where all but the first bucket are discarded is O(n). The worst case where |
| - // no bucket is discarded is doing a heap sort, which is O(n log n). |
| - std::make_heap(buckets.begin(), buckets.end()); |
| - |
| - // Keep including buckets until adding one would increase the number of |
| - // bytes accounted for by |min_size_bytes|. The large buckets end up in |
| - // [it, end()), [begin(), it) is the part that contains the max-heap |
| - // of small buckets. |
| - std::vector<Bucket>::iterator it; |
| - for (it = buckets.end(); it != buckets.begin(); --it) { |
| - if (buckets.front().size < min_size_bytes) |
| - break; |
| - |
| - // Put the largest bucket in [begin, it) at |it - 1| and max-heapify |
| - // [begin, it - 1). This puts the next largest bucket at |buckets.front()|. |
| - std::pop_heap(buckets.begin(), it); |
| + bool operator==(const AggregationKey& other) const { |
| + return backtrace_id == other.backtrace_id && type_id == other.type_id; |
| } |
| - |
| - // At this point, |buckets| looks like this (numbers are bucket sizes): |
| - // |
| - // <-- max-heap of small buckets ---> |
| - // <-- large buckets by ascending size --> |
| - // [ 19 | 11 | 13 | 7 | 2 | 5 | ... | 83 | 89 | 97 ] |
| - // ^ ^ ^ |
| - // | | | |
| - // begin() it end() |
| - |
| - // Discard the long tail of buckets that contribute less than a percent. |
| - buckets.erase(buckets.begin(), it); |
| - |
| - return buckets; |
| -} |
| +}; |
| } // namespace |
| -bool operator<(Entry lhs, Entry rhs) { |
| - // There is no need to compare |size|. If the backtrace and type name are |
| - // equal then the sizes must be equal as well. |
| - return std::tie(lhs.stack_frame_id, lhs.type_id) < |
| - std::tie(rhs.stack_frame_id, rhs.type_id); |
| -} |
| - |
| -HeapDumpWriter::HeapDumpWriter(StackFrameDeduplicator* stack_frame_deduplicator, |
| - TypeNameDeduplicator* type_name_deduplicator, |
| - uint32_t breakdown_threshold_bytes) |
| - : stack_frame_deduplicator_(stack_frame_deduplicator), |
| - type_name_deduplicator_(type_name_deduplicator), |
| - breakdown_threshold_bytes_(breakdown_threshold_bytes) { |
| -} |
| - |
| -HeapDumpWriter::~HeapDumpWriter() {} |
| - |
| -bool HeapDumpWriter::AddEntryForBucket(const Bucket& bucket) { |
| - // The contexts in the bucket are all different, but the [begin, cursor) range |
| - // is equal for all contexts in the bucket, and the type names are the same if |
| - // |is_broken_down_by_type_name| is set. |
| - DCHECK(!bucket.metrics_by_context.empty()); |
| - |
| - const AllocationContext* context = bucket.metrics_by_context.front().first; |
| - |
| - const StackFrame* backtrace_begin = std::begin(context->backtrace.frames); |
| - const StackFrame* backtrace_end = backtrace_begin + bucket.backtrace_cursor; |
| - DCHECK_LE(bucket.backtrace_cursor, arraysize(context->backtrace.frames)); |
| - |
| - Entry entry; |
| - entry.stack_frame_id = stack_frame_deduplicator_->Insert( |
| - backtrace_begin, backtrace_end); |
| - |
| - // Deduplicate the type name, or use ID -1 if type name is not set. |
| - entry.type_id = bucket.is_broken_down_by_type_name |
| - ? type_name_deduplicator_->Insert(context->type_name) |
| - : -1; |
| - |
| - entry.size = bucket.size; |
| - entry.count = bucket.count; |
| - |
| - auto position_and_inserted = entries_.insert(entry); |
| - return position_and_inserted.second; |
| -} |
| - |
| -void HeapDumpWriter::BreakDown(const Bucket& bucket) { |
| - auto by_backtrace = BreakDownBy(bucket, |
| - BreakDownMode::kByBacktrace, |
| - breakdown_threshold_bytes_); |
| - auto by_type_name = BreakDownBy(bucket, |
| - BreakDownMode::kByTypeName, |
| - breakdown_threshold_bytes_); |
| - |
| - // Insert entries for the buckets. If a bucket was not present before, it has |
| - // not been broken down before, so recursively continue breaking down in that |
| - // case. There might be multiple routes to the same entry (first break down |
| - // by type name, then by backtrace, or first by backtrace and then by type), |
| - // so a set is used to avoid dumping and breaking down entries more than once. |
| - |
| - for (const Bucket& subbucket : by_backtrace) |
| - if (AddEntryForBucket(subbucket)) |
| - BreakDown(subbucket); |
| - |
| - for (const Bucket& subbucket : by_type_name) |
| - if (AddEntryForBucket(subbucket)) |
| - BreakDown(subbucket); |
| -} |
| - |
| -const std::set<Entry>& HeapDumpWriter::Summarize( |
| - const hash_map<AllocationContext, AllocationMetrics>& metrics_by_context) { |
| - // Start with one bucket that represents the entire heap. Iterate by |
| - // reference, because the allocation contexts are going to point to allocation |
| - // contexts stored in |metrics_by_context|. |
| - Bucket root_bucket; |
| - for (const auto& context_and_metrics : metrics_by_context) { |
| - DCHECK_GT(context_and_metrics.second.size, 0u); |
| - DCHECK_GT(context_and_metrics.second.count, 0u); |
| - const AllocationContext* context = &context_and_metrics.first; |
| - root_bucket.metrics_by_context.push_back( |
| - std::make_pair(context, context_and_metrics.second)); |
| - root_bucket.size += context_and_metrics.second.size; |
| - root_bucket.count += context_and_metrics.second.count; |
| +std::unique_ptr<TracedValue> ExportHeapDump( |
| + const AllocationRegister& allocation_register, |
| + const MemoryDumpSessionState& session_state) { |
| + hash_map<AggregationKey, AllocationMetrics, AggregationKey::Hasher> |
|
Primiano Tucci (use gerrit)
2017/02/17 17:07:05
note: at some point we should fix the deduplicator
DmitrySkiba
2017/02/23 07:17:19
Acknowledged.
|
| + metrics_by_key; |
| + for (const auto& allocation : allocation_register) { |
|
Primiano Tucci (use gerrit)
2017/02/17 17:07:05
would be very nice if here there was a comment wit
DmitrySkiba
2017/02/23 07:17:19
Hmm, we too many docs about new format. Maybe let'
|
| + int backtrace_id = session_state.stack_frame_deduplicator()->Insert( |
| + std::begin(allocation.context.backtrace.frames), |
| + std::begin(allocation.context.backtrace.frames) + |
| + allocation.context.backtrace.frame_count); |
| + |
| + int type_id = session_state.type_name_deduplicator()->Insert( |
| + allocation.context.type_name); |
| + |
| + AggregationKey key = {backtrace_id, type_id}; |
| + AllocationMetrics& metrics = metrics_by_key[key]; |
| + metrics.size += allocation.size; |
| + metrics.count += 1; |
| } |
| - AddEntryForBucket(root_bucket); |
| - |
| - // Recursively break down the heap and fill |entries_| with entries to dump. |
| - BreakDown(root_bucket); |
| + auto traced_value = MakeUnique<TracedValue>(); |
| - return entries_; |
| -} |
| - |
| -std::unique_ptr<TracedValue> Serialize(const std::set<Entry>& entries) { |
| - std::string buffer; |
| - std::unique_ptr<TracedValue> traced_value(new TracedValue); |
| - |
| - traced_value->BeginArray("entries"); |
| - |
| - for (const Entry& entry : entries) { |
| - traced_value->BeginDictionary(); |
| - |
| - // Format size as hexadecimal string into |buffer|. |
| - SStringPrintf(&buffer, "%" PRIx64, static_cast<uint64_t>(entry.size)); |
| - traced_value->SetString("size", buffer); |
| - |
| - SStringPrintf(&buffer, "%" PRIx64, static_cast<uint64_t>(entry.count)); |
| - traced_value->SetString("count", buffer); |
| + traced_value->BeginArray("nodes"); |
| + for (const auto& key_and_metrics : metrics_by_key) { |
|
Primiano Tucci (use gerrit)
2017/02/17 17:07:05
I think the rest of the code in this folder doesn'
DmitrySkiba
2017/02/23 07:17:19
Done.
|
| + traced_value->AppendInteger(key_and_metrics.first.backtrace_id); |
| + } |
| + traced_value->EndArray(); |
| - if (entry.stack_frame_id == -1) { |
| - // An empty backtrace (which will have ID -1) is represented by the empty |
| - // string, because there is no leaf frame to reference in |stackFrames|. |
| - traced_value->SetString("bt", ""); |
| - } else { |
| - // Format index of the leaf frame as a string, because |stackFrames| is a |
| - // dictionary, not an array. |
| - SStringPrintf(&buffer, "%i", entry.stack_frame_id); |
| - traced_value->SetString("bt", buffer); |
| - } |
| + traced_value->BeginArray("types"); |
| + for (const auto& key_and_metrics : metrics_by_key) { |
| + traced_value->AppendInteger(key_and_metrics.first.type_id); |
| + } |
| + traced_value->EndArray(); |
| - // Type ID -1 (cumulative size for all types) is represented by the absence |
| - // of the "type" key in the dictionary. |
| - if (entry.type_id != -1) { |
| - // Format the type ID as a string. |
| - SStringPrintf(&buffer, "%i", entry.type_id); |
| - traced_value->SetString("type", buffer); |
| - } |
| + traced_value->BeginArray("counts"); |
| + for (const auto& key_and_metrics : metrics_by_key) { |
| + traced_value->AppendInteger( |
| + saturated_cast<int>(key_and_metrics.second.count)); |
| + } |
| + traced_value->EndArray(); |
| - traced_value->EndDictionary(); |
| + traced_value->BeginArray("sizes"); |
| + for (const auto& key_and_metrics : metrics_by_key) { |
| + traced_value->AppendInteger( |
| + saturated_cast<int>(key_and_metrics.second.size)); |
| } |
| + traced_value->EndArray(); |
| - traced_value->EndArray(); // "entries" |
| return traced_value; |
| } |
| -} // namespace internal |
| - |
| -std::unique_ptr<TracedValue> ExportHeapDump( |
| - const hash_map<AllocationContext, AllocationMetrics>& metrics_by_context, |
| - const MemoryDumpSessionState& session_state) { |
| - internal::HeapDumpWriter writer( |
| - session_state.stack_frame_deduplicator(), |
| - session_state.type_name_deduplicator(), |
| - session_state.memory_dump_config().heap_profiler_options |
| - .breakdown_threshold_bytes); |
| - return Serialize(writer.Summarize(metrics_by_context)); |
| -} |
| - |
| } // namespace trace_event |
| } // namespace base |