Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(729)

Side by Side Diff: base/trace_event/heap_profiler_heap_dump_writer.cc

Issue 1494293005: [Tracing] Enable heap dumps with both type info and backtraces (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@sfdedup-iter
Patch Set: IWYU Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2015 The Chromium Authors. All rights reserved. 1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "base/trace_event/heap_profiler_heap_dump_writer.h" 5 #include "base/trace_event/heap_profiler_heap_dump_writer.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <iterator> 8 #include <iterator>
9 #include <set>
Primiano Tucci (use gerrit) 2015/12/09 17:04:00 remove, you have this in the .h
Ruud van Asseldonk 2015/12/09 20:58:04 Done.
10 #include <tuple>
Primiano Tucci (use gerrit) 2015/12/09 17:03:59 I think this is not needed anymore
Ruud van Asseldonk 2015/12/09 20:58:04 It is required for |std::tie| which is required fo
9 #include <utility> 11 #include <utility>
10 #include <vector> 12 #include <vector>
11 13
14 #include "base/containers/hash_tables.h"
12 #include "base/format_macros.h" 15 #include "base/format_macros.h"
16 #include "base/logging.h"
17 #include "base/macros.h"
Primiano Tucci (use gerrit) 2015/12/09 17:03:59 remove, you have this in the .h
Ruud van Asseldonk 2015/12/09 20:58:05 Done.
13 #include "base/strings/stringprintf.h" 18 #include "base/strings/stringprintf.h"
19 #include "base/trace_event/heap_profiler_allocation_context.h"
14 #include "base/trace_event/heap_profiler_stack_frame_deduplicator.h" 20 #include "base/trace_event/heap_profiler_stack_frame_deduplicator.h"
15 #include "base/trace_event/heap_profiler_type_name_deduplicator.h" 21 #include "base/trace_event/heap_profiler_type_name_deduplicator.h"
16 #include "base/trace_event/trace_event_argument.h" 22 #include "base/trace_event/trace_event_argument.h"
17 23
24 // Most of what the |HeapDumpWriter| does is aggregating detailed information
25 // about the heap and deciding what to dump. The Input to this process is a list
26 // of |AllocationContext|s and size pairs.
27 //
28 // The pairs are grouped into |Bucket|s. A bucket is a group of (context, size)
29 // pairs where the properties of the contexts share a prefix. (Type name is
30 // considered a list of length one here.) First all pairs are put into one
31 // bucket that represents the entire heap. Then this bucket is recursively
32 // broken down into smaller buckets. Each bucket keeps track of whether further
33 // breakdown is possible.
34
18 namespace base { 35 namespace base {
19 namespace trace_event { 36 namespace trace_event {
20 37 namespace internal {
21 namespace { 38 namespace {
22 39
23 template <typename T> 40 // Denotes a property of |AllocationContext| to break down by.
24 bool PairSizeGt(const std::pair<T, size_t>& lhs, 41 enum class BreakDownMode { kByBacktrace, kByTypeName };
25 const std::pair<T, size_t>& rhs) { 42
26 return lhs.second > rhs.second; 43 // A group of bytes for which the context shares a prefix.
27 } 44 struct BASE_EXPORT Bucket {
Primiano Tucci (use gerrit) 2015/12/09 17:03:59 don't think this needs BASE_EXPORT
Ruud van Asseldonk 2015/12/09 20:58:05 You are right. Fixed.
28 45 Bucket() : size(0), backtrace_cursor(0), is_broken_down_by_type_name(false) {}
29 // Converts a |hash_map<T, size_t>| into a vector of (T, size_t) pairs that is 46
30 // ordered from high |size_t| to low |size_t|. 47 std::vector<std::pair<const AllocationContext*, size_t>> bytes_by_context;
31 template <typename T> 48
32 std::vector<std::pair<T, size_t>> SortBySizeDescending( 49 // The sum of the sizes of |bytes_by_context|.
33 const hash_map<T, size_t>& grouped) { 50 size_t size;
34 std::vector<std::pair<T, size_t>> sorted; 51
35 sorted.reserve(grouped.size()); 52 // The index of the stack frame that has not yet been broken down by. For all
36 std::copy(grouped.begin(), grouped.end(), std::back_inserter(sorted)); 53 // elements in this bucket, the stack frames 0 up to (but not including) the
37 std::sort(sorted.begin(), sorted.end(), PairSizeGt<T>); 54 // cursor, must be equal.
38 return sorted; 55 size_t backtrace_cursor;
56
57 // When true, the type name for all elements in this bucket must be equal.
58 bool is_broken_down_by_type_name;
59 };
60
61 // Comparison operator to order buckets by their size.
62 bool operator<(const Bucket& lhs, const Bucket& rhs) {
63 return lhs.size < rhs.size;
64 }
65
66 // Groups the allocations in the bucket by |breakBy|. The buckets in the
67 // returned list will have |backtrace_cursor| advanced or
68 // |is_broken_down_by_type_name| set depending on the property to group by.
69 std::vector<Bucket> GetSubbuckets(const Bucket& bucket, BreakDownMode breakBy) {
70 base::hash_map<const char*, Bucket> breakdown;
71
72 if (breakBy == BreakDownMode::kByBacktrace) {
73 for (const auto& context_and_size : bucket.bytes_by_context) {
74 const Backtrace& backtrace = context_and_size.first->backtrace;
75 const char* const* begin = std::begin(backtrace.frames);
76 const char* const* end = std::end(backtrace.frames);
77 const char* const* cursor = begin + bucket.backtrace_cursor;
78
79 // The backtrace in the context is padded with null pointers, but these
80 // should not be considered for breakdown. Adjust end to point past the
81 // last non-null frame.
82 while (begin != end && *(end - 1) == nullptr)
83 end--;
84
85 DCHECK_LE(cursor, end);
86
87 if (cursor != end) {
88 Bucket& subbucket = breakdown[*cursor];
89 subbucket.size += context_and_size.second;
90 subbucket.bytes_by_context.push_back(context_and_size);
91 subbucket.backtrace_cursor = bucket.backtrace_cursor + 1;
92 subbucket.is_broken_down_by_type_name =
93 bucket.is_broken_down_by_type_name;
94 DCHECK_GT(subbucket.size, 0u);
95 }
96 }
97 } else if (breakBy == BreakDownMode::kByTypeName) {
98 if (!bucket.is_broken_down_by_type_name) {
99 for (const auto& context_and_size : bucket.bytes_by_context) {
100 const AllocationContext* context = context_and_size.first;
101 Bucket& subbucket = breakdown[context->type_name];
102 subbucket.size += context_and_size.second;
103 subbucket.bytes_by_context.push_back(context_and_size);
104 subbucket.backtrace_cursor = bucket.backtrace_cursor;
105 subbucket.is_broken_down_by_type_name = true;
106 DCHECK_GT(subbucket.size, 0u);
107 }
108 }
109 }
110
111 std::vector<Bucket> buckets;
112 buckets.reserve(breakdown.size());
113 for (auto key_bucket : breakdown)
114 buckets.push_back(key_bucket.second);
115
116 return buckets;
117 }
118
119 // Breaks down the bucket by |breakBy|. Returns only buckets that contribute
120 // significantly to the total size. The long tail is omitted.
121 std::vector<Bucket> BreakDownBy(const Bucket& bucket, BreakDownMode breakBy) {
122 std::vector<Bucket> buckets = GetSubbuckets(bucket, breakBy);
123
124 // Ensure that |buckets| is a max-heap (the data structure, not memory heap),
125 // so its front contains the largest bucket. Buckets should be iterated
126 // ordered by size, but sorting the vector is overkill because the long tail
127 // of small buckets will be discarded. By using a max-heap, the optimal case
128 // where all but the first bucket are discarded is O(n). The worst case where
129 // no bucket is discarded is doing a heap sort, which is O(n log n).
130 std::make_heap(buckets.begin(), buckets.end());
131
132 // Keep including buckets until adding one would increase the number of
133 // bytes accounted for by less than a percent. The large buckets end up in
134 // [it, end()), [begin(), it) is the part that contains the max-heap of small
135 // buckets. TODO(ruuda): tweak the heuristic.
136 size_t accounted_for = 0;
137 std::vector<Bucket>::iterator it;
138 for (it = buckets.end(); it != buckets.begin(); --it) {
139 // Compute contribution to number of bytes accounted for in percent, rounded
140 // down due to integer division. Buckets are iterated by descending size,
141 // so later buckets cannot have a larger contribution than this one.
142 accounted_for += buckets.front().size;
143 size_t contribution = buckets.front().size * 100 / accounted_for;
144 if (contribution == 0)
145 break;
146
147 // Put the largest bucket in [begin, it) at |it - 1| and max-heapify
148 // [begin, it - 1). This puts the next largest bucket at |buckets.front()|.
149 std::pop_heap(buckets.begin(), it);
150 }
151
152 // At this point, |buckets| looks like this (numbers are bucket sizes):
153 //
154 // <-- max-heap of small buckets --->
155 // <-- large buckets by ascending size -->
156 // [ 19 | 11 | 13 | 7 | 2 | 5 | ... | 83 | 89 | 97 ]
157 // ^ ^ ^
158 // | | |
159 // begin() it end()
160
161 // Discard the long tail of buckets that contribute less than a percent.
162 buckets.erase(buckets.begin(), it);
Primiano Tucci (use gerrit) 2015/12/09 17:04:00 I wonder whether resize() here is more efficient o
Ruud van Asseldonk 2015/12/09 20:58:04 Resize discards elements at the end, I want to dis
163
164 return buckets;
39 } 165 }
40 166
41 } // namespace 167 } // namespace
42 168
169 bool operator<(Entry lhs, Entry rhs) {
170 // There is no need to compare |size|. If the backtrace and type name are
171 // equal then the sizes must be equal as well.
172 return std::tie(lhs.stack_frame_id, lhs.type_id) <
173 std::tie(rhs.stack_frame_id, rhs.type_id);
174 }
175
43 HeapDumpWriter::HeapDumpWriter(StackFrameDeduplicator* stack_frame_deduplicator, 176 HeapDumpWriter::HeapDumpWriter(StackFrameDeduplicator* stack_frame_deduplicator,
44 TypeNameDeduplicator* type_name_deduplicator) 177 TypeNameDeduplicator* type_name_deduplicator)
45 : traced_value_(new TracedValue()), 178 : stack_frame_deduplicator_(stack_frame_deduplicator),
46 stack_frame_deduplicator_(stack_frame_deduplicator),
47 type_name_deduplicator_(type_name_deduplicator) {} 179 type_name_deduplicator_(type_name_deduplicator) {}
48 180
49 HeapDumpWriter::~HeapDumpWriter() {} 181 HeapDumpWriter::~HeapDumpWriter() {}
50 182
51 void HeapDumpWriter::InsertAllocation(const AllocationContext& context, 183 bool HeapDumpWriter::AddEntryForBucket(const Bucket& bucket) {
52 size_t size) { 184 // The contexts in the bucket are all different, but the [begin, cursor) range
53 bytes_by_context_[context] += size; 185 // is equal for all contexts in the bucket, and the type names are the same if
54 } 186 // |is_broken_down_by_type_name| is set.
55 187 DCHECK(!bucket.bytes_by_context.empty());
56 scoped_refptr<TracedValue> HeapDumpWriter::WriteHeapDump() { 188
57 // Group by backtrace and by type ID, and compute the total heap size while 189 const AllocationContext* context = bucket.bytes_by_context.front().first;
58 // iterating anyway. 190
59 size_t total_size = 0; 191 const char* const* backtrace_begin = std::begin(context->backtrace.frames);
60 hash_map<Backtrace, size_t> bytes_by_backtrace; 192 const char* const* backtrace_end = backtrace_begin + bucket.backtrace_cursor;
61 hash_map<const char*, size_t> bytes_by_type; 193 DCHECK_LE(bucket.backtrace_cursor, arraysize(context->backtrace.frames));
62 194
63 for (auto context_size : bytes_by_context_) { 195 Entry entry;
64 total_size += context_size.second; 196 entry.stack_frame_id =
65 bytes_by_backtrace[context_size.first.backtrace] += context_size.second; 197 stack_frame_deduplicator_->Insert(backtrace_begin, backtrace_end);
66 bytes_by_type[context_size.first.type_name] += context_size.second; 198
67 } 199 // Deduplicate the type name, or use ID -1 if type name is not set.
68 200 entry.type_id = bucket.is_broken_down_by_type_name
69 auto sorted_bytes_by_backtrace = SortBySizeDescending(bytes_by_backtrace); 201 ? type_name_deduplicator_->Insert(context->type_name)
70 auto sorted_bytes_by_type = SortBySizeDescending(bytes_by_type); 202 : -1;
71 203
72 traced_value_->BeginArray("entries"); 204 entry.size = bucket.size;
73 205
74 // The global size, no column specified. 206 auto position_and_inserted = entries_.insert(entry);
75 { 207 return position_and_inserted.second;
76 traced_value_->BeginDictionary(); 208 }
77 WriteSize(total_size); 209
78 traced_value_->EndDictionary(); 210 void HeapDumpWriter::BreakDown(const Bucket& bucket) {
79 } 211 auto by_backtrace = BreakDownBy(bucket, BreakDownMode::kByBacktrace);
80 212 auto by_type_name = BreakDownBy(bucket, BreakDownMode::kByTypeName);
81 // Entries with the size per backtrace. 213
82 for (const auto& entry : sorted_bytes_by_backtrace) { 214 // Insert entries for the buckets. If a bucket was not present before, it has
83 traced_value_->BeginDictionary(); 215 // not been broken down before, so recursively continue breaking down in that
84 // Insert a forward reference to the backtrace that will be written to the 216 // case. There might be multiple routes to the same entry (first break down
85 // |stackFrames| dictionary later on. 217 // by type name, then by backtrace, or first by backtrace and then by type),
86 int idx = stack_frame_deduplicator_->Insert(std::begin(entry.first.frames), 218 // so a set is used to avoid dumping and breaking down entries more than once.
87 std::end(entry.first.frames)); 219
88 WriteStackFrameIndex(idx); 220 for (const Bucket& subbucket : by_backtrace)
89 WriteSize(entry.second); 221 if (AddEntryForBucket(subbucket))
90 traced_value_->EndDictionary(); 222 BreakDown(subbucket);
Ruud van Asseldonk 2015/12/09 16:16:01 I prefer to keep the call to |BreakDown| here inst
Primiano Tucci (use gerrit) 2015/12/09 17:04:00 Acknowledged.
91 } 223
92 224 for (const Bucket& subbucket : by_type_name)
93 // Entries with the size per type. 225 if (AddEntryForBucket(subbucket))
94 for (const auto& entry : sorted_bytes_by_type) { 226 BreakDown(subbucket);
95 traced_value_->BeginDictionary(); 227 }
96 // Insert a forward reference to the type name that will be written to the 228
97 // trace when it is flushed. 229 const std::set<Entry>& HeapDumpWriter::Dump(
Primiano Tucci (use gerrit) 2015/12/09 17:04:00 I wonder if we can get a better name here. I think
98 WriteTypeId(type_name_deduplicator_->Insert(entry.first)); 230 const hash_map<AllocationContext, size_t>& bytes_by_context) {
99 WriteSize(entry.second); 231 // Start with one bucket that represents the entire heap. Iterate by
100 traced_value_->EndDictionary(); 232 // reference, because the allocation contexts are going to point to allocation
101 } 233 // contexts stored in |bytes_by_context|.
102 234 Bucket root_bucket;
103 traced_value_->EndArray(); // "entries" 235 for (const auto& context_and_size : bytes_by_context) {
104 236 const AllocationContext* context = &context_and_size.first;
105 return traced_value_; 237 const size_t size = context_and_size.second;
106 } 238 root_bucket.bytes_by_context.push_back(std::make_pair(context, size));
107 239 root_bucket.size += size;
108 void HeapDumpWriter::WriteStackFrameIndex(int index) { 240 }
109 if (index == -1) { 241
110 // An empty backtrace (which will have index -1) is represented by the empty 242 AddEntryForBucket(root_bucket);
111 // string, because there is no leaf frame to reference in |stackFrames|. 243
112 traced_value_->SetString("bt", ""); 244 // Recursively break down the heap and fill |entries_| with entries to dump.
113 } else { 245 BreakDown(root_bucket);
114 // Format index of the leaf frame as a string, because |stackFrames| is a 246
115 // dictionary, not an array. 247 return entries_;
116 SStringPrintf(&buffer_, "%i", index); 248 }
117 traced_value_->SetString("bt", buffer_); 249
118 } 250 scoped_refptr<TracedValue> Write(const std::set<Entry>& entries) {
119 } 251 std::string buffer;
120 252 scoped_refptr<TracedValue> traced_value = new TracedValue;
121 void HeapDumpWriter::WriteTypeId(int type_id) { 253
122 // Format the type ID as a string. 254 traced_value->BeginArray("entries");
123 SStringPrintf(&buffer_, "%i", type_id); 255
124 traced_value_->SetString("type", buffer_); 256 for (const Entry& entry : entries) {
125 } 257 traced_value->BeginDictionary();
126 258
127 void HeapDumpWriter::WriteSize(size_t size) { 259 // Format size as hexadecimal string into |buffer|.
128 // Format size as hexadecimal string into |buffer_|. 260 SStringPrintf(&buffer, "%" PRIx64, static_cast<uint64_t>(entry.size));
129 SStringPrintf(&buffer_, "%" PRIx64, static_cast<uint64_t>(size)); 261 traced_value->SetString("size", buffer);
130 traced_value_->SetString("size", buffer_); 262
131 } 263 if (entry.stack_frame_id == -1) {
264 // An empty backtrace (which will have ID -1) is represented by the empty
265 // string, because there is no leaf frame to reference in |stackFrames|.
266 traced_value->SetString("bt", "");
267 } else {
268 // Format index of the leaf frame as a string, because |stackFrames| is a
269 // dictionary, not an array.
270 SStringPrintf(&buffer, "%i", entry.stack_frame_id);
271 traced_value->SetString("bt", buffer);
272 }
273
274 // Type ID -1 (cumulative size for all types) is represented by the absence
275 // of the "type" key in the dictionary.
276 if (entry.type_id != -1) {
277 // Format the type ID as a string.
278 SStringPrintf(&buffer, "%i", entry.type_id);
279 traced_value->SetString("type", buffer);
280 }
281
282 traced_value->EndDictionary();
283 }
284
285 traced_value->EndArray(); // "entries"
286 return traced_value;
287 }
288
289 } // namespace internal
132 290
133 } // namespace trace_event 291 } // namespace trace_event
134 } // namespace base 292 } // namespace base
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698