Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(453)

Side by Side Diff: base/trace_event/heap_profiler_heap_dump_writer.cc

Issue 2650863003: [tracing] Switch to new heap dump format. (Closed)
Patch Set: Rebase Created 3 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/trace_event/heap_profiler_heap_dump_writer.h"
6
7 #include <stdint.h>
8
9 #include <algorithm>
10 #include <iterator>
11 #include <tuple>
12 #include <utility>
13 #include <vector>
14
15 #include "base/format_macros.h"
16 #include "base/logging.h"
17 #include "base/macros.h"
18 #include "base/strings/stringprintf.h"
19 #include "base/trace_event/heap_profiler_serialization_state.h"
20 #include "base/trace_event/heap_profiler_stack_frame_deduplicator.h"
21 #include "base/trace_event/heap_profiler_type_name_deduplicator.h"
22 #include "base/trace_event/trace_config.h"
23 #include "base/trace_event/trace_event_argument.h"
24 #include "base/trace_event/trace_log.h"
25
26 // Most of what the |HeapDumpWriter| does is aggregating detailed information
27 // about the heap and deciding what to dump. The Input to this process is a list
28 // of |AllocationContext|s and size pairs.
29 //
30 // The pairs are grouped into |Bucket|s. A bucket is a group of (context, size)
31 // pairs where the properties of the contexts share a prefix. (Type name is
32 // considered a list of length one here.) First all pairs are put into one
33 // bucket that represents the entire heap. Then this bucket is recursively
34 // broken down into smaller buckets. Each bucket keeps track of whether further
35 // breakdown is possible.
36
37 namespace base {
38 namespace trace_event {
39 namespace internal {
40 namespace {
41
42 // Denotes a property of |AllocationContext| to break down by.
43 enum class BreakDownMode { kByBacktrace, kByTypeName };
44
45 // A group of bytes for which the context shares a prefix.
46 struct Bucket {
47 Bucket()
48 : size(0),
49 count(0),
50 backtrace_cursor(0),
51 is_broken_down_by_type_name(false) {}
52
53 std::vector<std::pair<const AllocationContext*, AllocationMetrics>>
54 metrics_by_context;
55
56 // The sum of the sizes of |metrics_by_context|.
57 size_t size;
58
59 // The sum of number of allocations of |metrics_by_context|.
60 size_t count;
61
62 // The index of the stack frame that has not yet been broken down by. For all
63 // elements in this bucket, the stack frames 0 up to (but not including) the
64 // cursor, must be equal.
65 size_t backtrace_cursor;
66
67 // When true, the type name for all elements in this bucket must be equal.
68 bool is_broken_down_by_type_name;
69 };
70
71 // Comparison operator to order buckets by their size.
72 bool operator<(const Bucket& lhs, const Bucket& rhs) {
73 return lhs.size < rhs.size;
74 }
75
76 // Groups the allocations in the bucket by |break_by|. The buckets in the
77 // returned list will have |backtrace_cursor| advanced or
78 // |is_broken_down_by_type_name| set depending on the property to group by.
79 std::vector<Bucket> GetSubbuckets(const Bucket& bucket,
80 BreakDownMode break_by) {
81 std::unordered_map<const void*, Bucket> breakdown;
82
83 if (break_by == BreakDownMode::kByBacktrace) {
84 for (const auto& context_and_metrics : bucket.metrics_by_context) {
85 const Backtrace& backtrace = context_and_metrics.first->backtrace;
86 const StackFrame* begin = std::begin(backtrace.frames);
87 const StackFrame* end = begin + backtrace.frame_count;
88 const StackFrame* cursor = begin + bucket.backtrace_cursor;
89
90 DCHECK_LE(cursor, end);
91
92 if (cursor != end) {
93 Bucket& subbucket = breakdown[cursor->value];
94 subbucket.size += context_and_metrics.second.size;
95 subbucket.count += context_and_metrics.second.count;
96 subbucket.metrics_by_context.push_back(context_and_metrics);
97 subbucket.backtrace_cursor = bucket.backtrace_cursor + 1;
98 subbucket.is_broken_down_by_type_name =
99 bucket.is_broken_down_by_type_name;
100 DCHECK_GT(subbucket.size, 0u);
101 DCHECK_GT(subbucket.count, 0u);
102 }
103 }
104 } else if (break_by == BreakDownMode::kByTypeName) {
105 if (!bucket.is_broken_down_by_type_name) {
106 for (const auto& context_and_metrics : bucket.metrics_by_context) {
107 const AllocationContext* context = context_and_metrics.first;
108 Bucket& subbucket = breakdown[context->type_name];
109 subbucket.size += context_and_metrics.second.size;
110 subbucket.count += context_and_metrics.second.count;
111 subbucket.metrics_by_context.push_back(context_and_metrics);
112 subbucket.backtrace_cursor = bucket.backtrace_cursor;
113 subbucket.is_broken_down_by_type_name = true;
114 DCHECK_GT(subbucket.size, 0u);
115 DCHECK_GT(subbucket.count, 0u);
116 }
117 }
118 }
119
120 std::vector<Bucket> buckets;
121 buckets.reserve(breakdown.size());
122 for (auto key_bucket : breakdown)
123 buckets.push_back(key_bucket.second);
124
125 return buckets;
126 }
127
128 // Breaks down the bucket by |break_by|. Returns only buckets that contribute
129 // more than |min_size_bytes| to the total size. The long tail is omitted.
130 std::vector<Bucket> BreakDownBy(const Bucket& bucket,
131 BreakDownMode break_by,
132 size_t min_size_bytes) {
133 std::vector<Bucket> buckets = GetSubbuckets(bucket, break_by);
134
135 // Ensure that |buckets| is a max-heap (the data structure, not memory heap),
136 // so its front contains the largest bucket. Buckets should be iterated
137 // ordered by size, but sorting the vector is overkill because the long tail
138 // of small buckets will be discarded. By using a max-heap, the optimal case
139 // where all but the first bucket are discarded is O(n). The worst case where
140 // no bucket is discarded is doing a heap sort, which is O(n log n).
141 std::make_heap(buckets.begin(), buckets.end());
142
143 // Keep including buckets until adding one would increase the number of
144 // bytes accounted for by |min_size_bytes|. The large buckets end up in
145 // [it, end()), [begin(), it) is the part that contains the max-heap
146 // of small buckets.
147 std::vector<Bucket>::iterator it;
148 for (it = buckets.end(); it != buckets.begin(); --it) {
149 if (buckets.front().size < min_size_bytes)
150 break;
151
152 // Put the largest bucket in [begin, it) at |it - 1| and max-heapify
153 // [begin, it - 1). This puts the next largest bucket at |buckets.front()|.
154 std::pop_heap(buckets.begin(), it);
155 }
156
157 // At this point, |buckets| looks like this (numbers are bucket sizes):
158 //
159 // <-- max-heap of small buckets --->
160 // <-- large buckets by ascending size -->
161 // [ 19 | 11 | 13 | 7 | 2 | 5 | ... | 83 | 89 | 97 ]
162 // ^ ^ ^
163 // | | |
164 // begin() it end()
165
166 // Discard the long tail of buckets that contribute less than a percent.
167 buckets.erase(buckets.begin(), it);
168
169 return buckets;
170 }
171
172 } // namespace
173
174 bool operator<(Entry lhs, Entry rhs) {
175 // There is no need to compare |size|. If the backtrace and type name are
176 // equal then the sizes must be equal as well.
177 return std::tie(lhs.stack_frame_id, lhs.type_id) <
178 std::tie(rhs.stack_frame_id, rhs.type_id);
179 }
180
181 HeapDumpWriter::HeapDumpWriter(StackFrameDeduplicator* stack_frame_deduplicator,
182 TypeNameDeduplicator* type_name_deduplicator,
183 uint32_t breakdown_threshold_bytes)
184 : stack_frame_deduplicator_(stack_frame_deduplicator),
185 type_name_deduplicator_(type_name_deduplicator),
186 breakdown_threshold_bytes_(breakdown_threshold_bytes) {
187 }
188
189 HeapDumpWriter::~HeapDumpWriter() {}
190
191 bool HeapDumpWriter::AddEntryForBucket(const Bucket& bucket) {
192 // The contexts in the bucket are all different, but the [begin, cursor) range
193 // is equal for all contexts in the bucket, and the type names are the same if
194 // |is_broken_down_by_type_name| is set.
195 DCHECK(!bucket.metrics_by_context.empty());
196
197 const AllocationContext* context = bucket.metrics_by_context.front().first;
198
199 const StackFrame* backtrace_begin = std::begin(context->backtrace.frames);
200 const StackFrame* backtrace_end = backtrace_begin + bucket.backtrace_cursor;
201 DCHECK_LE(bucket.backtrace_cursor, arraysize(context->backtrace.frames));
202
203 Entry entry;
204 entry.stack_frame_id = stack_frame_deduplicator_->Insert(
205 backtrace_begin, backtrace_end);
206
207 // Deduplicate the type name, or use ID -1 if type name is not set.
208 entry.type_id = bucket.is_broken_down_by_type_name
209 ? type_name_deduplicator_->Insert(context->type_name)
210 : -1;
211
212 entry.size = bucket.size;
213 entry.count = bucket.count;
214
215 auto position_and_inserted = entries_.insert(entry);
216 return position_and_inserted.second;
217 }
218
219 void HeapDumpWriter::BreakDown(const Bucket& bucket) {
220 auto by_backtrace = BreakDownBy(bucket,
221 BreakDownMode::kByBacktrace,
222 breakdown_threshold_bytes_);
223 auto by_type_name = BreakDownBy(bucket,
224 BreakDownMode::kByTypeName,
225 breakdown_threshold_bytes_);
226
227 // Insert entries for the buckets. If a bucket was not present before, it has
228 // not been broken down before, so recursively continue breaking down in that
229 // case. There might be multiple routes to the same entry (first break down
230 // by type name, then by backtrace, or first by backtrace and then by type),
231 // so a set is used to avoid dumping and breaking down entries more than once.
232
233 for (const Bucket& subbucket : by_backtrace)
234 if (AddEntryForBucket(subbucket))
235 BreakDown(subbucket);
236
237 for (const Bucket& subbucket : by_type_name)
238 if (AddEntryForBucket(subbucket))
239 BreakDown(subbucket);
240 }
241
242 const std::set<Entry>& HeapDumpWriter::Summarize(
243 const std::unordered_map<AllocationContext, AllocationMetrics>&
244 metrics_by_context) {
245 // Start with one bucket that represents the entire heap. Iterate by
246 // reference, because the allocation contexts are going to point to allocation
247 // contexts stored in |metrics_by_context|.
248 Bucket root_bucket;
249 for (const auto& context_and_metrics : metrics_by_context) {
250 DCHECK_GT(context_and_metrics.second.size, 0u);
251 DCHECK_GT(context_and_metrics.second.count, 0u);
252 const AllocationContext* context = &context_and_metrics.first;
253 root_bucket.metrics_by_context.push_back(
254 std::make_pair(context, context_and_metrics.second));
255 root_bucket.size += context_and_metrics.second.size;
256 root_bucket.count += context_and_metrics.second.count;
257 }
258
259 AddEntryForBucket(root_bucket);
260
261 // Recursively break down the heap and fill |entries_| with entries to dump.
262 BreakDown(root_bucket);
263
264 return entries_;
265 }
266
267 std::unique_ptr<TracedValue> Serialize(const std::set<Entry>& entries) {
268 std::string buffer;
269 std::unique_ptr<TracedValue> traced_value(new TracedValue);
270
271 traced_value->BeginArray("entries");
272
273 for (const Entry& entry : entries) {
274 traced_value->BeginDictionary();
275
276 // Format size as hexadecimal string into |buffer|.
277 SStringPrintf(&buffer, "%" PRIx64, static_cast<uint64_t>(entry.size));
278 traced_value->SetString("size", buffer);
279
280 SStringPrintf(&buffer, "%" PRIx64, static_cast<uint64_t>(entry.count));
281 traced_value->SetString("count", buffer);
282
283 if (entry.stack_frame_id == -1) {
284 // An empty backtrace (which will have ID -1) is represented by the empty
285 // string, because there is no leaf frame to reference in |stackFrames|.
286 traced_value->SetString("bt", "");
287 } else {
288 // Format index of the leaf frame as a string, because |stackFrames| is a
289 // dictionary, not an array.
290 SStringPrintf(&buffer, "%i", entry.stack_frame_id);
291 traced_value->SetString("bt", buffer);
292 }
293
294 // Type ID -1 (cumulative size for all types) is represented by the absence
295 // of the "type" key in the dictionary.
296 if (entry.type_id != -1) {
297 // Format the type ID as a string.
298 SStringPrintf(&buffer, "%i", entry.type_id);
299 traced_value->SetString("type", buffer);
300 }
301
302 traced_value->EndDictionary();
303 }
304
305 traced_value->EndArray(); // "entries"
306 return traced_value;
307 }
308
309 } // namespace internal
310
311 std::unique_ptr<TracedValue> ExportHeapDump(
312 const std::unordered_map<AllocationContext, AllocationMetrics>&
313 metrics_by_context,
314 const HeapProfilerSerializationState& heap_profiler_serialization_state) {
315 internal::HeapDumpWriter writer(
316 heap_profiler_serialization_state.stack_frame_deduplicator(),
317 heap_profiler_serialization_state.type_name_deduplicator(),
318 heap_profiler_serialization_state
319 .heap_profiler_breakdown_threshold_bytes());
320 return Serialize(writer.Summarize(metrics_by_context));
321 }
322
323 } // namespace trace_event
324 } // namespace base
OLDNEW
« no previous file with comments | « base/trace_event/heap_profiler_heap_dump_writer.h ('k') | base/trace_event/heap_profiler_heap_dump_writer_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698