OLD | NEW |
1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "base/trace_event/heap_profiler_heap_dump_writer.h" | 5 #include "base/trace_event/heap_profiler_heap_dump_writer.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <iterator> | 8 #include <iterator> |
| 9 #include <tuple> |
9 #include <utility> | 10 #include <utility> |
10 #include <vector> | 11 #include <vector> |
11 | 12 |
12 #include "base/format_macros.h" | 13 #include "base/format_macros.h" |
| 14 #include "base/logging.h" |
13 #include "base/strings/stringprintf.h" | 15 #include "base/strings/stringprintf.h" |
14 #include "base/trace_event/heap_profiler_stack_frame_deduplicator.h" | 16 #include "base/trace_event/heap_profiler_stack_frame_deduplicator.h" |
15 #include "base/trace_event/heap_profiler_type_name_deduplicator.h" | 17 #include "base/trace_event/heap_profiler_type_name_deduplicator.h" |
16 #include "base/trace_event/trace_event_argument.h" | 18 #include "base/trace_event/trace_event_argument.h" |
17 | 19 |
| 20 // Most of what the |HeapDumpWriter| does is aggregating detailed information |
| 21 // about the heap and deciding what to dump. The Input to this process is a list |
| 22 // of |AllocationContext|s and size pairs. |
| 23 // |
| 24 // The pairs are grouped into |Bucket|s. A bucket is a group of (context, size) |
| 25 // pairs where the properties of the contexts share a prefix. (Type name is |
| 26 // considered a list of length one here.) First all pairs are put into one |
| 27 // bucket that represents the entire heap. Then this bucket is recursively |
| 28 // broken down into smaller buckets. Each bucket keeps track of whether further |
| 29 // breakdown is possible. |
| 30 |
18 namespace base { | 31 namespace base { |
19 namespace trace_event { | 32 namespace trace_event { |
20 | 33 namespace internal { |
21 namespace { | 34 namespace { |
22 | 35 |
23 template <typename T> | 36 // Denotes a property of |AllocationContext| to break down by. |
24 bool PairSizeGt(const std::pair<T, size_t>& lhs, | 37 enum class BreakDownMode { kByBacktrace, kByTypeName }; |
25 const std::pair<T, size_t>& rhs) { | 38 |
26 return lhs.second > rhs.second; | 39 // A group of bytes for which the context shares a prefix. |
27 } | 40 struct Bucket { |
28 | 41 Bucket() : size(0), backtrace_cursor(0), is_broken_down_by_type_name(false) {} |
29 // Converts a |hash_map<T, size_t>| into a vector of (T, size_t) pairs that is | 42 |
30 // ordered from high |size_t| to low |size_t|. | 43 std::vector<std::pair<const AllocationContext*, size_t>> bytes_by_context; |
31 template <typename T> | 44 |
32 std::vector<std::pair<T, size_t>> SortBySizeDescending( | 45 // The sum of the sizes of |bytes_by_context|. |
33 const hash_map<T, size_t>& grouped) { | 46 size_t size; |
34 std::vector<std::pair<T, size_t>> sorted; | 47 |
35 sorted.reserve(grouped.size()); | 48 // The index of the stack frame that has not yet been broken down by. For all |
36 std::copy(grouped.begin(), grouped.end(), std::back_inserter(sorted)); | 49 // elements in this bucket, the stack frames 0 up to (but not including) the |
37 std::sort(sorted.begin(), sorted.end(), PairSizeGt<T>); | 50 // cursor, must be equal. |
38 return sorted; | 51 size_t backtrace_cursor; |
| 52 |
| 53 // When true, the type name for all elements in this bucket must be equal. |
| 54 bool is_broken_down_by_type_name; |
| 55 }; |
| 56 |
| 57 // Comparison operator to order buckets by their size. |
| 58 bool operator<(const Bucket& lhs, const Bucket& rhs) { |
| 59 return lhs.size < rhs.size; |
| 60 } |
| 61 |
| 62 // Groups the allocations in the bucket by |breakBy|. The buckets in the |
| 63 // returned list will have |backtrace_cursor| advanced or |
| 64 // |is_broken_down_by_type_name| set depending on the property to group by. |
| 65 std::vector<Bucket> GetSubbuckets(const Bucket& bucket, BreakDownMode breakBy) { |
| 66 base::hash_map<const char*, Bucket> breakdown; |
| 67 |
| 68 if (breakBy == BreakDownMode::kByBacktrace) { |
| 69 for (const auto& context_and_size : bucket.bytes_by_context) { |
| 70 const Backtrace& backtrace = context_and_size.first->backtrace; |
| 71 const char* const* begin = std::begin(backtrace.frames); |
| 72 const char* const* end = std::end(backtrace.frames); |
| 73 const char* const* cursor = begin + bucket.backtrace_cursor; |
| 74 |
| 75 // The backtrace in the context is padded with null pointers, but these |
| 76 // should not be considered for breakdown. Adjust end to point past the |
| 77 // last non-null frame. |
| 78 while (begin != end && *(end - 1) == nullptr) |
| 79 end--; |
| 80 |
| 81 DCHECK_LE(cursor, end); |
| 82 |
| 83 if (cursor != end) { |
| 84 Bucket& subbucket = breakdown[*cursor]; |
| 85 subbucket.size += context_and_size.second; |
| 86 subbucket.bytes_by_context.push_back(context_and_size); |
| 87 subbucket.backtrace_cursor = bucket.backtrace_cursor + 1; |
| 88 subbucket.is_broken_down_by_type_name = |
| 89 bucket.is_broken_down_by_type_name; |
| 90 DCHECK_GT(subbucket.size, 0u); |
| 91 } |
| 92 } |
| 93 } else if (breakBy == BreakDownMode::kByTypeName) { |
| 94 if (!bucket.is_broken_down_by_type_name) { |
| 95 for (const auto& context_and_size : bucket.bytes_by_context) { |
| 96 const AllocationContext* context = context_and_size.first; |
| 97 Bucket& subbucket = breakdown[context->type_name]; |
| 98 subbucket.size += context_and_size.second; |
| 99 subbucket.bytes_by_context.push_back(context_and_size); |
| 100 subbucket.backtrace_cursor = bucket.backtrace_cursor; |
| 101 subbucket.is_broken_down_by_type_name = true; |
| 102 DCHECK_GT(subbucket.size, 0u); |
| 103 } |
| 104 } |
| 105 } |
| 106 |
| 107 std::vector<Bucket> buckets; |
| 108 buckets.reserve(breakdown.size()); |
| 109 for (auto key_bucket : breakdown) |
| 110 buckets.push_back(key_bucket.second); |
| 111 |
| 112 return buckets; |
| 113 } |
| 114 |
| 115 // Breaks down the bucket by |breakBy|. Returns only buckets that contribute |
| 116 // significantly to the total size. The long tail is omitted. |
| 117 std::vector<Bucket> BreakDownBy(const Bucket& bucket, BreakDownMode breakBy) { |
| 118 std::vector<Bucket> buckets = GetSubbuckets(bucket, breakBy); |
| 119 |
| 120 // Ensure that |buckets| is a max-heap (the data structure, not memory heap), |
| 121 // so its front contains the largest bucket. Buckets should be iterated |
| 122 // ordered by size, but sorting the vector is overkill because the long tail |
| 123 // of small buckets will be discarded. By using a max-heap, the optimal case |
| 124 // where all but the first bucket are discarded is O(n). The worst case where |
| 125 // no bucket is discarded is doing a heap sort, which is O(n log n). |
| 126 std::make_heap(buckets.begin(), buckets.end()); |
| 127 |
| 128 // Keep including buckets until adding one would increase the number of |
| 129 // bytes accounted for by less than a percent. The large buckets end up in |
| 130 // [it, end()), [begin(), it) is the part that contains the max-heap of small |
| 131 // buckets. TODO(ruuda): tweak the heuristic. |
| 132 size_t accounted_for = 0; |
| 133 std::vector<Bucket>::iterator it; |
| 134 for (it = buckets.end(); it != buckets.begin(); --it) { |
| 135 // Compute contribution to number of bytes accounted for in percent, rounded |
| 136 // down due to integer division. Buckets are iterated by descending size, |
| 137 // so later buckets cannot have a larger contribution than this one. |
| 138 accounted_for += buckets.front().size; |
| 139 size_t contribution = buckets.front().size * 100 / accounted_for; |
| 140 if (contribution == 0) |
| 141 break; |
| 142 |
| 143 // Put the largest bucket in [begin, it) at |it - 1| and max-heapify |
| 144 // [begin, it - 1). This puts the next largest bucket at |buckets.front()|. |
| 145 std::pop_heap(buckets.begin(), it); |
| 146 } |
| 147 |
| 148 // At this point, |buckets| looks like this (numbers are bucket sizes): |
| 149 // |
| 150 // <-- max-heap of small buckets ---> |
| 151 // <-- large buckets by ascending size --> |
| 152 // [ 19 | 11 | 13 | 7 | 2 | 5 | ... | 83 | 89 | 97 ] |
| 153 // ^ ^ ^ |
| 154 // | | | |
| 155 // begin() it end() |
| 156 |
| 157 // Discard the long tail of buckets that contribute less than a percent. |
| 158 buckets.erase(buckets.begin(), it); |
| 159 |
| 160 return buckets; |
39 } | 161 } |
40 | 162 |
41 } // namespace | 163 } // namespace |
42 | 164 |
| 165 bool operator<(Entry lhs, Entry rhs) { |
| 166 // There is no need to compare |size|. If the backtrace and type name are |
| 167 // equal then the sizes must be equal as well. |
| 168 return std::tie(lhs.stack_frame_id, lhs.type_id) < |
| 169 std::tie(rhs.stack_frame_id, rhs.type_id); |
| 170 } |
| 171 |
43 HeapDumpWriter::HeapDumpWriter(StackFrameDeduplicator* stack_frame_deduplicator, | 172 HeapDumpWriter::HeapDumpWriter(StackFrameDeduplicator* stack_frame_deduplicator, |
44 TypeNameDeduplicator* type_name_deduplicator) | 173 TypeNameDeduplicator* type_name_deduplicator) |
45 : traced_value_(new TracedValue()), | 174 : stack_frame_deduplicator_(stack_frame_deduplicator), |
46 stack_frame_deduplicator_(stack_frame_deduplicator), | |
47 type_name_deduplicator_(type_name_deduplicator) {} | 175 type_name_deduplicator_(type_name_deduplicator) {} |
48 | 176 |
49 HeapDumpWriter::~HeapDumpWriter() {} | 177 HeapDumpWriter::~HeapDumpWriter() {} |
50 | 178 |
51 void HeapDumpWriter::InsertAllocation(const AllocationContext& context, | 179 bool HeapDumpWriter::AddEntryForBucket(const Bucket& bucket) { |
52 size_t size) { | 180 // The contexts in the bucket are all different, but the [begin, cursor) range |
53 bytes_by_context_[context] += size; | 181 // is equal for all contexts in the bucket, and the type names are the same if |
54 } | 182 // |is_broken_down_by_type_name| is set. |
55 | 183 DCHECK(!bucket.bytes_by_context.empty()); |
56 scoped_refptr<TracedValue> HeapDumpWriter::WriteHeapDump() { | 184 |
57 // Group by backtrace and by type ID, and compute the total heap size while | 185 const AllocationContext* context = bucket.bytes_by_context.front().first; |
58 // iterating anyway. | 186 |
59 size_t total_size = 0; | 187 const char* const* backtrace_begin = std::begin(context->backtrace.frames); |
60 hash_map<Backtrace, size_t> bytes_by_backtrace; | 188 const char* const* backtrace_end = backtrace_begin + bucket.backtrace_cursor; |
61 hash_map<const char*, size_t> bytes_by_type; | 189 DCHECK_LE(bucket.backtrace_cursor, arraysize(context->backtrace.frames)); |
62 | 190 |
63 for (auto context_size : bytes_by_context_) { | 191 Entry entry; |
64 total_size += context_size.second; | 192 entry.stack_frame_id = |
65 bytes_by_backtrace[context_size.first.backtrace] += context_size.second; | 193 stack_frame_deduplicator_->Insert(backtrace_begin, backtrace_end); |
66 bytes_by_type[context_size.first.type_name] += context_size.second; | 194 |
67 } | 195 // Deduplicate the type name, or use ID -1 if type name is not set. |
68 | 196 entry.type_id = bucket.is_broken_down_by_type_name |
69 auto sorted_bytes_by_backtrace = SortBySizeDescending(bytes_by_backtrace); | 197 ? type_name_deduplicator_->Insert(context->type_name) |
70 auto sorted_bytes_by_type = SortBySizeDescending(bytes_by_type); | 198 : -1; |
71 | 199 |
72 traced_value_->BeginArray("entries"); | 200 entry.size = bucket.size; |
73 | 201 |
74 // The global size, no column specified. | 202 auto position_and_inserted = entries_.insert(entry); |
75 { | 203 return position_and_inserted.second; |
76 traced_value_->BeginDictionary(); | 204 } |
77 WriteSize(total_size); | 205 |
78 traced_value_->EndDictionary(); | 206 void HeapDumpWriter::BreakDown(const Bucket& bucket) { |
79 } | 207 auto by_backtrace = BreakDownBy(bucket, BreakDownMode::kByBacktrace); |
80 | 208 auto by_type_name = BreakDownBy(bucket, BreakDownMode::kByTypeName); |
81 // Entries with the size per backtrace. | 209 |
82 for (const auto& entry : sorted_bytes_by_backtrace) { | 210 // Insert entries for the buckets. If a bucket was not present before, it has |
83 traced_value_->BeginDictionary(); | 211 // not been broken down before, so recursively continue breaking down in that |
84 // Insert a forward reference to the backtrace that will be written to the | 212 // case. There might be multiple routes to the same entry (first break down |
85 // |stackFrames| dictionary later on. | 213 // by type name, then by backtrace, or first by backtrace and then by type), |
86 int idx = stack_frame_deduplicator_->Insert(std::begin(entry.first.frames), | 214 // so a set is used to avoid dumping and breaking down entries more than once. |
87 std::end(entry.first.frames)); | 215 |
88 WriteStackFrameIndex(idx); | 216 for (const Bucket& subbucket : by_backtrace) |
89 WriteSize(entry.second); | 217 if (AddEntryForBucket(subbucket)) |
90 traced_value_->EndDictionary(); | 218 BreakDown(subbucket); |
91 } | 219 |
92 | 220 for (const Bucket& subbucket : by_type_name) |
93 // Entries with the size per type. | 221 if (AddEntryForBucket(subbucket)) |
94 for (const auto& entry : sorted_bytes_by_type) { | 222 BreakDown(subbucket); |
95 traced_value_->BeginDictionary(); | 223 } |
96 // Insert a forward reference to the type name that will be written to the | 224 |
97 // trace when it is flushed. | 225 const std::set<Entry>& HeapDumpWriter::Summarize( |
98 WriteTypeId(type_name_deduplicator_->Insert(entry.first)); | 226 const hash_map<AllocationContext, size_t>& bytes_by_context) { |
99 WriteSize(entry.second); | 227 // Start with one bucket that represents the entire heap. Iterate by |
100 traced_value_->EndDictionary(); | 228 // reference, because the allocation contexts are going to point to allocation |
101 } | 229 // contexts stored in |bytes_by_context|. |
102 | 230 Bucket root_bucket; |
103 traced_value_->EndArray(); // "entries" | 231 for (const auto& context_and_size : bytes_by_context) { |
104 | 232 const AllocationContext* context = &context_and_size.first; |
105 return traced_value_; | 233 const size_t size = context_and_size.second; |
106 } | 234 root_bucket.bytes_by_context.push_back(std::make_pair(context, size)); |
107 | 235 root_bucket.size += size; |
108 void HeapDumpWriter::WriteStackFrameIndex(int index) { | 236 } |
109 if (index == -1) { | 237 |
110 // An empty backtrace (which will have index -1) is represented by the empty | 238 AddEntryForBucket(root_bucket); |
111 // string, because there is no leaf frame to reference in |stackFrames|. | 239 |
112 traced_value_->SetString("bt", ""); | 240 // Recursively break down the heap and fill |entries_| with entries to dump. |
113 } else { | 241 BreakDown(root_bucket); |
114 // Format index of the leaf frame as a string, because |stackFrames| is a | 242 |
115 // dictionary, not an array. | 243 return entries_; |
116 SStringPrintf(&buffer_, "%i", index); | 244 } |
117 traced_value_->SetString("bt", buffer_); | 245 |
118 } | 246 scoped_refptr<TracedValue> Serialize(const std::set<Entry>& entries) { |
119 } | 247 std::string buffer; |
120 | 248 scoped_refptr<TracedValue> traced_value = new TracedValue; |
121 void HeapDumpWriter::WriteTypeId(int type_id) { | 249 |
122 // Format the type ID as a string. | 250 traced_value->BeginArray("entries"); |
123 SStringPrintf(&buffer_, "%i", type_id); | 251 |
124 traced_value_->SetString("type", buffer_); | 252 for (const Entry& entry : entries) { |
125 } | 253 traced_value->BeginDictionary(); |
126 | 254 |
127 void HeapDumpWriter::WriteSize(size_t size) { | 255 // Format size as hexadecimal string into |buffer|. |
128 // Format size as hexadecimal string into |buffer_|. | 256 SStringPrintf(&buffer, "%" PRIx64, static_cast<uint64_t>(entry.size)); |
129 SStringPrintf(&buffer_, "%" PRIx64, static_cast<uint64_t>(size)); | 257 traced_value->SetString("size", buffer); |
130 traced_value_->SetString("size", buffer_); | 258 |
| 259 if (entry.stack_frame_id == -1) { |
| 260 // An empty backtrace (which will have ID -1) is represented by the empty |
| 261 // string, because there is no leaf frame to reference in |stackFrames|. |
| 262 traced_value->SetString("bt", ""); |
| 263 } else { |
| 264 // Format index of the leaf frame as a string, because |stackFrames| is a |
| 265 // dictionary, not an array. |
| 266 SStringPrintf(&buffer, "%i", entry.stack_frame_id); |
| 267 traced_value->SetString("bt", buffer); |
| 268 } |
| 269 |
| 270 // Type ID -1 (cumulative size for all types) is represented by the absence |
| 271 // of the "type" key in the dictionary. |
| 272 if (entry.type_id != -1) { |
| 273 // Format the type ID as a string. |
| 274 SStringPrintf(&buffer, "%i", entry.type_id); |
| 275 traced_value->SetString("type", buffer); |
| 276 } |
| 277 |
| 278 traced_value->EndDictionary(); |
| 279 } |
| 280 |
| 281 traced_value->EndArray(); // "entries" |
| 282 return traced_value; |
| 283 } |
| 284 |
| 285 } // namespace internal |
| 286 |
| 287 scoped_refptr<TracedValue> ExportHeapDump( |
| 288 const hash_map<AllocationContext, size_t>& bytes_by_size, |
| 289 StackFrameDeduplicator* stack_frame_deduplicator, |
| 290 TypeNameDeduplicator* type_name_deduplicator) { |
| 291 internal::HeapDumpWriter writer(stack_frame_deduplicator, |
| 292 type_name_deduplicator); |
| 293 return Serialize(writer.Summarize(bytes_by_size)); |
131 } | 294 } |
132 | 295 |
133 } // namespace trace_event | 296 } // namespace trace_event |
134 } // namespace base | 297 } // namespace base |
OLD | NEW |