Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "base/trace_event/heap_profiler_heap_dump_writer.h" | 5 #include "base/trace_event/heap_profiler_heap_dump_writer.h" |
| 6 | 6 |
| 7 #include <algorithm> | 7 #include <algorithm> |
| 8 #include <iterator> | 8 #include <iterator> |
| 9 #include <utility> | 9 #include <tuple> |
| 10 #include <vector> | 10 #include <vector> |
| 11 | 11 |
| 12 #include "base/containers/hash_tables.h" | |
| 12 #include "base/format_macros.h" | 13 #include "base/format_macros.h" |
| 14 #include "base/logging.h" | |
| 15 #include "base/macros.h" | |
| 13 #include "base/strings/stringprintf.h" | 16 #include "base/strings/stringprintf.h" |
| 17 #include "base/trace_event/heap_profiler_allocation_context.h" | |
| 14 #include "base/trace_event/heap_profiler_stack_frame_deduplicator.h" | 18 #include "base/trace_event/heap_profiler_stack_frame_deduplicator.h" |
| 15 #include "base/trace_event/heap_profiler_type_name_deduplicator.h" | 19 #include "base/trace_event/heap_profiler_type_name_deduplicator.h" |
| 16 #include "base/trace_event/trace_event_argument.h" | 20 #include "base/trace_event/trace_event_argument.h" |
| 17 | 21 |
| 22 // Most of what this file does is aggregating detailed information about the | |
| 23 // heap and deciding what to dump. Input to this process is a list of | |
|
petrcermak
2015/12/09 15:02:31
nit: I think there should be "The" in front of "In
Ruud van Asseldonk
2015/12/09 16:16:01
Done.
| |
| 24 // |AllocationContext|s and size pairs. | |
| 25 // | |
| 26 // The pairs are grouped into |Bucket|s. A bucket is a group of contexts with | |
| 27 // sizes of which the properties share a prefix. (Type name is considered a | |
|
petrcermak
2015/12/09 15:02:31
hnit: It sounds like you're talking about "propert
Ruud van Asseldonk
2015/12/09 16:16:01
Done.
| |
| 28 // list of length one here.) First all pairs are put into one bucket that | |
| 29 // represents the entire heap. Then this bucket is recursively broken down into | |
| 30 // smaller buckets. Each bucket keeps track of whether further breakdown is | |
| 31 // possible. | |
| 32 | |
| 18 namespace base { | 33 namespace base { |
| 19 namespace trace_event { | 34 namespace trace_event { |
| 35 namespace heap_dump { | |
| 20 | 36 |
| 21 namespace { | 37 namespace { |
| 22 | 38 |
| 23 template <typename T> | 39 // Denotes a property of |AllocationContext|. |
| 24 bool PairSizeGt(const std::pair<T, size_t>& lhs, | 40 enum class BreakDownMode { kByBacktrace, kByTypeName }; |
| 25 const std::pair<T, size_t>& rhs) { | 41 |
| 26 return lhs.second > rhs.second; | 42 struct Bucket { |
| 27 } | 43 Bucket() : size(0), backtrace_cursor(0), is_broken_down_by_type_name(false) {} |
| 28 | 44 |
| 29 // Converts a |hash_map<T, size_t>| into a vector of (T, size_t) pairs that is | 45 // List of context and number of bytes allocated for that context. |
| 30 // ordered from high |size_t| to low |size_t|. | 46 std::vector<std::pair<const AllocationContext*, size_t>> allocations; |
| 31 template <typename T> | 47 |
| 32 std::vector<std::pair<T, size_t>> SortBySizeDescending( | 48 // The sum of the sizes of the allocations. |
| 33 const hash_map<T, size_t>& grouped) { | 49 size_t size; |
| 34 std::vector<std::pair<T, size_t>> sorted; | 50 |
| 35 sorted.reserve(grouped.size()); | 51 // The index of the stack frame that has not yet been broken down by. For |
| 36 std::copy(grouped.begin(), grouped.end(), std::back_inserter(sorted)); | 52 // all contexts in this bucket, the stack frames 0 up to (but not including) |
| 37 std::sort(sorted.begin(), sorted.end(), PairSizeGt<T>); | 53 // the cursor, must be equal. |
| 38 return sorted; | 54 size_t backtrace_cursor; |
| 55 | |
| 56 // When true, the type name for all contexts in this bucket must be equal. | |
| 57 bool is_broken_down_by_type_name; | |
| 58 }; | |
| 59 | |
| 60 // Comparison operator used to order buckets by size so that buckets of | |
| 61 // insignificant size might be discarded. | |
| 62 bool operator<(const Bucket& lhs, const Bucket& rhs) { | |
| 63 return lhs.size < rhs.size; | |
| 64 } | |
| 65 | |
| 66 // Groups the allocations in the bucket by |breakDownBy|. The buckets in the | |
| 67 // returned list will have |backtrace_cursor| advanced or | |
| 68 // |is_broken_down_by_type_name| set depending the property to group by. | |
|
petrcermak
2015/12/09 15:02:32
nit: "depending ON"
Ruud van Asseldonk
2015/12/09 16:16:01
Done.
| |
| 69 std::vector<Bucket> BreakDownBy(const Bucket& bucket, | |
| 70 BreakDownMode breakDownBy) { | |
| 71 base::hash_map<const char*, Bucket> breakdown; | |
| 72 | |
| 73 if (breakDownBy == BreakDownMode::kByBacktrace) { | |
|
petrcermak
2015/12/09 15:02:32
Wouldn't a switch make more sense here?
Ruud van Asseldonk
2015/12/09 16:16:01
I don’t have a strong opinion. I find a switch to
| |
| 74 for (const auto& context_and_size : bucket.allocations) { | |
| 75 const AllocationContext* context = context_and_size.first; | |
|
petrcermak
2015/12/09 15:02:31
You always use |context| to get the backtrace so y
Ruud van Asseldonk
2015/12/09 16:16:01
Done.
| |
| 76 const char* const* begin = std::begin(context->backtrace.frames); | |
| 77 const char* const* end = std::end(context->backtrace.frames); | |
| 78 const char* const* cursor = begin + bucket.backtrace_cursor; | |
| 79 | |
| 80 // The backtrace in the context is padded with null pointers, but these | |
| 81 // should not be considered for breakdown. Adjust end to point past the | |
| 82 // last non-null frame. | |
| 83 while (begin != end && *(end - 1) == nullptr) | |
| 84 end--; | |
| 85 | |
| 86 DCHECK_LE(cursor, end); | |
| 87 | |
| 88 if (cursor != end) { | |
| 89 Bucket& subbucket = breakdown[*cursor]; | |
| 90 subbucket.size += context_and_size.second; | |
| 91 subbucket.allocations.push_back(context_and_size); | |
| 92 subbucket.backtrace_cursor = bucket.backtrace_cursor + 1; | |
| 93 subbucket.is_broken_down_by_type_name = | |
| 94 bucket.is_broken_down_by_type_name; | |
| 95 DCHECK_GT(subbucket.size, 0u); | |
| 96 } | |
| 97 } | |
| 98 } else if (breakDownBy == BreakDownMode::kByTypeName) { | |
| 99 if (!bucket.is_broken_down_by_type_name) { | |
| 100 for (const auto& context_and_size : bucket.allocations) { | |
| 101 Bucket& subbucket = breakdown[context_and_size.first->type_name]; | |
| 102 subbucket.size += context_and_size.second; | |
| 103 subbucket.allocations.push_back(context_and_size); | |
| 104 subbucket.backtrace_cursor = bucket.backtrace_cursor; | |
| 105 subbucket.is_broken_down_by_type_name = true; | |
| 106 DCHECK_GT(subbucket.size, 0u); | |
| 107 } | |
| 108 } | |
| 109 } | |
| 110 | |
| 111 std::vector<Bucket> buckets; | |
| 112 buckets.reserve(breakdown.size()); | |
| 113 for (auto key_bucket : breakdown) | |
| 114 buckets.push_back(key_bucket.second); | |
| 115 | |
| 116 return buckets; | |
| 117 } | |
| 118 | |
| 119 // Breaks down the bucket by |property|. Returns only buckets that contribute | |
| 120 // significantly to the total size. The long tail is omitted. | |
| 121 std::vector<Bucket> DiscardSmallBuckets(std::vector<Bucket> buckets) { | |
| 122 // Ensure that |buckets| is a max-heap (the data structure, not memory heap), | |
| 123 // so its front contains the largest bucket. Buckets should be iterated | |
| 124 // ordered by size, but sorting the vector is overkill because the long tail | |
| 125 // of small buckets will be discarded. By using a max-heap, the optimal case | |
| 126 // where all but the first bucket are discarded is O(n). The worst case where | |
| 127 // no bucket is discarded is doing a heap sort, which is O(n log n). | |
| 128 std::make_heap(buckets.begin(), buckets.end()); | |
| 129 | |
| 130 // Keep including allocations until adding one would increase the number of | |
| 131 // bytes accounted for by less than a percent. The large buckets end up in | |
| 132 // [it, end()), [begin(), it) is the part that contains the max-heap of small | |
| 133 // buckets. TODO(ruuda): tweak the heuristic. | |
| 134 size_t accounted_for = 0; | |
| 135 std::vector<Bucket>::iterator it; | |
| 136 for (it = buckets.end(); it != buckets.begin(); --it) { | |
| 137 // Compute contribution to number of bytes accounted for in percent, rounded | |
| 138 // down due to integer division. Buckets are iterated by descending size, | |
| 139 // so later buckets cannot have a larger contribution than this one. | |
| 140 accounted_for += buckets.front().size; | |
| 141 size_t contribution = buckets.front().size * 100 / accounted_for; | |
| 142 if (contribution == 0) | |
| 143 break; | |
| 144 | |
| 145 // Put the largest bucket in [begin, it) at |it - 1| and max-heapify | |
| 146 // [begin, it - 1). This puts the next largest bucket at |buckets.front()|. | |
| 147 std::pop_heap(buckets.begin(), it); | |
| 148 } | |
| 149 | |
| 150 // At this point, |buckets| looks like this (numbers are bucket sizes): | |
| 151 // | |
| 152 // <-- max-heap of small buckets ---> | |
| 153 // <-- large buckets by descending size --> | |
| 154 // [ 19 | 11 | 13 | 7 | 2 | 5 | ... | 83 | 89 | 97 ] | |
| 155 // ^ ^ ^ | |
| 156 // | | | | |
| 157 // begin() it end() | |
| 158 | |
| 159 // Discard the long tail of buckets that contribute less than a percent. | |
| 160 buckets.erase(buckets.begin(), it); | |
| 161 | |
| 162 return buckets; | |
| 163 } | |
| 164 | |
| 165 // Inserts an entry for |bucket| into |entries|. | |
| 166 bool InsertEntry(const Bucket& bucket, | |
| 167 std::set<HeapEntry>* entries, | |
| 168 StackFrameDeduplicator* stack_frame_deduplicator, | |
| 169 TypeNameDeduplicator* type_name_deduplicator) { | |
| 170 DCHECK(!bucket.allocations.empty()); | |
| 171 | |
| 172 // The contexts in the bucket are all different, but for all properties, the | |
| 173 // [begin, cursor) range is equal for all contexts in the bucket. | |
|
petrcermak
2015/12/09 15:02:32
This comment is outdated - it doesn't capture the
Ruud van Asseldonk
2015/12/09 16:16:01
Fixed.
| |
| 174 const AllocationContext& context = *bucket.allocations.front().first; | |
| 175 | |
| 176 const char* const* backtrace_begin = std::begin(context.backtrace.frames); | |
| 177 const char* const* backtrace_end = backtrace_begin + bucket.backtrace_cursor; | |
| 178 DCHECK_LE(bucket.backtrace_cursor, arraysize(context.backtrace.frames)); | |
| 179 | |
| 180 HeapEntry entry; | |
| 181 entry.stack_frame_id = | |
| 182 stack_frame_deduplicator->Insert(backtrace_begin, backtrace_end); | |
| 183 | |
| 184 // Deduplicate the type name, or use ID -1 if type name is not set. | |
| 185 entry.type_id = bucket.is_broken_down_by_type_name | |
| 186 ? type_name_deduplicator->Insert(context.type_name) | |
| 187 : -1; | |
| 188 | |
| 189 entry.size = bucket.size; | |
| 190 | |
| 191 auto position_and_inserted = entries->insert(entry); | |
| 192 return position_and_inserted.second; | |
| 193 } | |
| 194 | |
| 195 // Recursively breaks down |bucket| into smaller buckets, and adds the buckets | |
| 196 // of significant size to |entries|. | |
| 197 void BreakDown(const Bucket& bucket, | |
| 198 std::set<HeapEntry>* entries, | |
| 199 StackFrameDeduplicator* stack_frame_deduplicator, | |
| 200 TypeNameDeduplicator* type_name_deduplicator) { | |
| 201 std::vector<Bucket> by_backtrace = | |
| 202 DiscardSmallBuckets(BreakDownBy(bucket, BreakDownMode::kByBacktrace)); | |
| 203 std::vector<Bucket> by_type_name = | |
| 204 DiscardSmallBuckets(BreakDownBy(bucket, BreakDownMode::kByTypeName)); | |
| 205 | |
| 206 // Insert entries for the buckets. If a bucket was not present before, it has | |
| 207 // not been broken down before, so recursively continue breaking down in that | |
| 208 // case. There might be multiple routes to the same entry (first break down | |
| 209 // by type name, then by backtrace, or first by backtrace and then by type), | |
| 210 // so a set is used to avoid dumping and breaking down entries more than once. | |
| 211 | |
| 212 for (const Bucket& subbucket : by_backtrace) | |
| 213 if (InsertEntry(subbucket, entries, stack_frame_deduplicator, | |
| 214 type_name_deduplicator)) | |
| 215 BreakDown(subbucket, entries, stack_frame_deduplicator, | |
| 216 type_name_deduplicator); | |
| 217 | |
| 218 for (const Bucket& subbucket : by_type_name) | |
| 219 if (InsertEntry(subbucket, entries, stack_frame_deduplicator, | |
| 220 type_name_deduplicator)) | |
| 221 BreakDown(subbucket, entries, stack_frame_deduplicator, | |
| 222 type_name_deduplicator); | |
| 39 } | 223 } |
| 40 | 224 |
| 41 } // namespace | 225 } // namespace |
| 42 | 226 |
| 43 HeapDumpWriter::HeapDumpWriter(StackFrameDeduplicator* stack_frame_deduplicator, | 227 bool operator<(HeapEntry lhs, HeapEntry rhs) { |
| 44 TypeNameDeduplicator* type_name_deduplicator) | 228 // There is no need to compare |size|. If the backtrace and type name are |
| 45 : traced_value_(new TracedValue()), | 229 // equal then the sizes must be equal as well. |
| 46 stack_frame_deduplicator_(stack_frame_deduplicator), | 230 return std::tie(lhs.stack_frame_id, lhs.type_id) < |
| 47 type_name_deduplicator_(type_name_deduplicator) {} | 231 std::tie(rhs.stack_frame_id, rhs.type_id); |
| 48 | 232 } |
| 49 HeapDumpWriter::~HeapDumpWriter() {} | 233 |
| 50 | 234 std::set<HeapEntry> Dump( |
| 51 void HeapDumpWriter::InsertAllocation(const AllocationContext& context, | 235 const base::hash_map<AllocationContext, size_t>& allocations, |
| 52 size_t size) { | 236 StackFrameDeduplicator* stack_frame_deduplicator, |
| 53 bytes_by_context_[context] += size; | 237 TypeNameDeduplicator* type_name_deduplicator) { |
| 54 } | 238 // Start with one bucket that represents the entire heap. Iterate by |
| 55 | 239 // reference, because the pairs are going to point to allocation contexts |
| 56 scoped_refptr<TracedValue> HeapDumpWriter::WriteHeapDump() { | 240 // stored in |allocations|. |
| 57 // Group by backtrace and by type ID, and compute the total heap size while | 241 Bucket root_bucket; |
| 58 // iterating anyway. | 242 for (const auto& context_and_size : allocations) { |
| 59 size_t total_size = 0; | 243 const AllocationContext* context = &context_and_size.first; |
| 60 hash_map<Backtrace, size_t> bytes_by_backtrace; | 244 const size_t size = context_and_size.second; |
| 61 hash_map<const char*, size_t> bytes_by_type; | 245 root_bucket.allocations.push_back(std::make_pair(context, size)); |
| 62 | 246 root_bucket.size += size; |
| 63 for (auto context_size : bytes_by_context_) { | |
| 64 total_size += context_size.second; | |
| 65 bytes_by_backtrace[context_size.first.backtrace] += context_size.second; | |
| 66 bytes_by_type[context_size.first.type_name] += context_size.second; | |
| 67 } | 247 } |
| 68 | 248 |
| 69 auto sorted_bytes_by_backtrace = SortBySizeDescending(bytes_by_backtrace); | 249 std::set<HeapEntry> entries; |
| 70 auto sorted_bytes_by_type = SortBySizeDescending(bytes_by_type); | 250 InsertEntry(root_bucket, &entries, stack_frame_deduplicator, |
| 71 | 251 type_name_deduplicator); |
| 72 traced_value_->BeginArray("entries"); | 252 |
| 73 | 253 // Recursively break down the heap and fill |entries| with entries to dump. |
| 74 // The global size, no column specified. | 254 BreakDown(root_bucket, &entries, stack_frame_deduplicator, |
| 75 { | 255 type_name_deduplicator); |
| 76 traced_value_->BeginDictionary(); | 256 |
| 77 WriteSize(total_size); | 257 return entries; |
| 78 traced_value_->EndDictionary(); | 258 } |
| 259 | |
| 260 scoped_refptr<TracedValue> Write(const std::set<HeapEntry>& entries) { | |
| 261 std::string buffer; | |
| 262 scoped_refptr<TracedValue> traced_value = new TracedValue; | |
| 263 | |
| 264 traced_value->BeginArray("entries"); | |
| 265 | |
| 266 for (const HeapEntry& entry : entries) { | |
| 267 traced_value->BeginDictionary(); | |
| 268 | |
| 269 // Format size as hexadecimal string into |buffer|. | |
| 270 SStringPrintf(&buffer, "%" PRIx64, static_cast<uint64_t>(entry.size)); | |
| 271 traced_value->SetString("size", buffer); | |
| 272 | |
| 273 if (entry.stack_frame_id == -1) { | |
| 274 // An empty backtrace (which will have ID -1) is represented by the empty | |
| 275 // string, because there is no leaf frame to reference in |stackFrames|. | |
| 276 traced_value->SetString("bt", ""); | |
| 277 } else { | |
| 278 // Format index of the leaf frame as a string, because |stackFrames| is a | |
| 279 // dictionary, not an array. | |
| 280 SStringPrintf(&buffer, "%i", entry.stack_frame_id); | |
| 281 traced_value->SetString("bt", buffer); | |
| 282 } | |
| 283 | |
| 284 // Type ID -1 (cumulative size for all types) is represented by the absence | |
| 285 // of the "type" key in the dictionary. | |
| 286 if (entry.type_id != -1) { | |
| 287 // Format the type ID as a string. | |
| 288 SStringPrintf(&buffer, "%i", entry.type_id); | |
| 289 traced_value->SetString("type", buffer); | |
| 290 } | |
| 291 | |
| 292 traced_value->EndDictionary(); | |
| 79 } | 293 } |
| 80 | 294 |
| 81 // Entries with the size per backtrace. | 295 traced_value->EndArray(); // "entries" |
| 82 for (const auto& entry : sorted_bytes_by_backtrace) { | 296 return traced_value; |
| 83 traced_value_->BeginDictionary(); | 297 } |
| 84 // Insert a forward reference to the backtrace that will be written to the | 298 |
| 85 // |stackFrames| dictionary later on. | 299 } // namespace heap_dump |
| 86 int idx = stack_frame_deduplicator_->Insert(std::begin(entry.first.frames), | 300 |
| 87 std::end(entry.first.frames)); | 301 scoped_refptr<TracedValue> WriteHeapDump( |
| 88 WriteStackFrameIndex(idx); | 302 const base::hash_map<AllocationContext, size_t>& allocations, |
| 89 WriteSize(entry.second); | 303 StackFrameDeduplicator* stack_frame_deduplicator, |
| 90 traced_value_->EndDictionary(); | 304 TypeNameDeduplicator* type_name_deduplicator) { |
| 91 } | 305 return heap_dump::Write(heap_dump::Dump(allocations, stack_frame_deduplicator, |
| 92 | 306 type_name_deduplicator)); |
| 93 // Entries with the size per type. | |
| 94 for (const auto& entry : sorted_bytes_by_type) { | |
| 95 traced_value_->BeginDictionary(); | |
| 96 // Insert a forward reference to the type name that will be written to the | |
| 97 // trace when it is flushed. | |
| 98 WriteTypeId(type_name_deduplicator_->Insert(entry.first)); | |
| 99 WriteSize(entry.second); | |
| 100 traced_value_->EndDictionary(); | |
| 101 } | |
| 102 | |
| 103 traced_value_->EndArray(); // "entries" | |
| 104 | |
| 105 return traced_value_; | |
| 106 } | |
| 107 | |
| 108 void HeapDumpWriter::WriteStackFrameIndex(int index) { | |
| 109 if (index == -1) { | |
| 110 // An empty backtrace (which will have index -1) is represented by the empty | |
| 111 // string, because there is no leaf frame to reference in |stackFrames|. | |
| 112 traced_value_->SetString("bt", ""); | |
| 113 } else { | |
| 114 // Format index of the leaf frame as a string, because |stackFrames| is a | |
| 115 // dictionary, not an array. | |
| 116 SStringPrintf(&buffer_, "%i", index); | |
| 117 traced_value_->SetString("bt", buffer_); | |
| 118 } | |
| 119 } | |
| 120 | |
| 121 void HeapDumpWriter::WriteTypeId(int type_id) { | |
| 122 // Format the type ID as a string. | |
| 123 SStringPrintf(&buffer_, "%i", type_id); | |
| 124 traced_value_->SetString("type", buffer_); | |
| 125 } | |
| 126 | |
| 127 void HeapDumpWriter::WriteSize(size_t size) { | |
| 128 // Format size as hexadecimal string into |buffer_|. | |
| 129 SStringPrintf(&buffer_, "%" PRIx64, static_cast<uint64_t>(size)); | |
| 130 traced_value_->SetString("size", buffer_); | |
| 131 } | 307 } |
| 132 | 308 |
| 133 } // namespace trace_event | 309 } // namespace trace_event |
| 134 } // namespace base | 310 } // namespace base |
| OLD | NEW |