Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(34)

Side by Side Diff: base/trace_event/heap_profiler_heap_dump_writer.cc

Issue 1494293005: [Tracing] Enable heap dumps with both type info and backtraces (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@sfdedup-iter
Patch Set: 32-bit is still a thing Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2015 The Chromium Authors. All rights reserved. 1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "base/trace_event/heap_profiler_heap_dump_writer.h" 5 #include "base/trace_event/heap_profiler_heap_dump_writer.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <iterator> 8 #include <iterator>
9 #include <utility> 9 #include <tuple>
10 #include <vector> 10 #include <vector>
11 11
12 #include "base/containers/hash_tables.h"
12 #include "base/format_macros.h" 13 #include "base/format_macros.h"
14 #include "base/logging.h"
13 #include "base/strings/stringprintf.h" 15 #include "base/strings/stringprintf.h"
16 #include "base/trace_event/heap_profiler_allocation_context.h"
14 #include "base/trace_event/heap_profiler_stack_frame_deduplicator.h" 17 #include "base/trace_event/heap_profiler_stack_frame_deduplicator.h"
15 #include "base/trace_event/heap_profiler_type_name_deduplicator.h" 18 #include "base/trace_event/heap_profiler_type_name_deduplicator.h"
16 #include "base/trace_event/trace_event_argument.h" 19 #include "base/trace_event/trace_event_argument.h"
17 20
21 // Most of what the |HeapDumpWriter| does is aggregating the detailed
petrcermak 2015/12/04 19:13:20 nit: I don't think there should be a definite arti
Ruud van Asseldonk 2015/12/07 13:53:59 As you wish.
22 // information about the heap and deciding what to dump. Input to this
23 // process is a list of (AllocationContext, size). The context has several
petrcermak 2015/12/04 19:13:20 supernit: s/several/two/ (I'd expect "several" to
petrcermak 2015/12/04 19:13:20 nit: I'd say "tuple" before the full stop.
Ruud van Asseldonk 2015/12/07 13:53:59 Yes only two is a bit of an anticlimax here after
24 // properties:
25 //
26 // * A backtrace (list of strings).
27 // * A type name (string).
28 //
29 // For uniformity, type name is also treated as a list (with one element). A
30 // list is a path down a tree. For backtrace, this is the call tree. The tree
31 // for type name has height one. There is one tree for every property, but
32 // nodes of the trees can be shared among the trees. For instance, starting
33 // from the root node, following the "BrowserMain" edge breaking down by
34 // backtrace, and then following the "T" edge breaking down by type name, will
35 // yield the same node as following the "T" edge from the root, and then
36 // following the "BrowserMain" edge breaking down by backtrace. The size of
37 // this node is the number of bytes allocated for type "T" when "BrowserMain"
38 // was at the bottom of the call stack.
39 //
40 // The (AllocationContext, size) pairs are converted into |Part|s and grouped
41 // into |Bucket|s. Parts are mutually exclusive parts of the heap. A bucket
42 // is a group of parts of which the properties share a prefix.
petrcermak 2015/12/04 19:13:20 This is quite cryptic. Could you please provide an
petrcermak 2015/12/04 19:13:20 nit: s/of which the/whose/
Ruud van Asseldonk 2015/12/07 13:53:59 I tried an example, but even a very minimal exampl
43
18 namespace base { 44 namespace base {
19 namespace trace_event { 45 namespace trace_event {
20 46
47 using Entry = HeapDumpWriter::Entry;
48
21 namespace { 49 namespace {
22 50
23 template <typename T> 51 // References an array of |const char*|s in two ranges: [begin, cursor), and
24 bool PairSizeGt(const std::pair<T, size_t>& lhs, 52 // [cursor, end).
25 const std::pair<T, size_t>& rhs) { 53 struct Slice {
26 return lhs.second > rhs.second; 54 // Initializes the slice with the cursor at the first element.
55 Slice(const char* const* begin, const char* const* end)
56 : begin(begin), cursor(begin), end(end) {}
57
58 Slice(const char* const* begin,
59 const char* const* cursor,
60 const char* const* end)
61 : begin(begin), cursor(cursor), end(end) {}
62
63 // Returns a slice with cursor advanced by one.
64 Slice next() const { return Slice(begin, cursor + 1, end); }
65
66 // Pointer into const array (array cannot be modified through this struct) of
67 // |const char*| (immutable string). |begin| points at the first element,
68 // |end| points past the last element.
69 const char* const* begin;
70 const char* const* cursor;
71 const char* const* end;
72 };
73
74 struct Part;
75
76 // Pointer to member of |Part| that has type |Slice|.
petrcermak 2015/12/04 19:13:20 nit: "a member"
Ruud van Asseldonk 2015/12/07 13:53:59 Done.
77 using Property = Slice Part::*;
78
79 // Helper for breaking down the heap. The slices reference an allocation
petrcermak 2015/12/04 19:13:20 Do multiple slices correspond to one allocation co
Ruud van Asseldonk 2015/12/07 13:53:59 Done.
80 // context via the |begin| and |end| pointers. |size| is the number of bytes
81 // allocated for this context. The |cursor| of the slices indicates the current
petrcermak 2015/12/04 19:13:20 If the previous comment applies, go to page 200 ..
Ruud van Asseldonk 2015/12/07 13:53:59 Done.
82 // position in the tree when breaking down. The branches taken are [begin,
petrcermak 2015/12/04 19:13:20 "branches taken" is difficult to understand. What
Ruud van Asseldonk 2015/12/07 13:53:59 Done.
83 // cursor). If |cursor| is not equal to |end|, it means that further breakdown
84 // by that property is possible.
85 struct Part {
86 Part(Slice backtrace, Slice type_name, size_t size)
87 : backtrace(backtrace), type_name(type_name), size(size) {}
88
89 // Constructs a part referencing an |AllocationContext|. Cursors are set to
90 // the beginning.
91 Part(const AllocationContext& ctx, size_t size)
92 : backtrace(std::begin(ctx.backtrace.frames),
93 std::end(ctx.backtrace.frames)),
94 type_name(&ctx.type_name, &ctx.type_name + 1),
95 size(size) {
96 // The backtrace of the allocation context is padded with null pointers,
97 // but for breaking down the heap this padding should not be taken into
98 // account, so adjust the end pointer to point past the last non-null frame.
99 while (backtrace.begin != backtrace.end && *(backtrace.end - 1) == nullptr)
100 backtrace.end--;
101 }
102
103 // Returns a copy of this part, with the cursor of the member pointed at
104 // by |property| advanced by one.
105 Part advance(Property property) const {
106 return Part(property == &Part::backtrace ? backtrace.next() : backtrace,
petrcermak 2015/12/04 19:13:20 Thought: Is it worth keeping copying backtrace/typ
Ruud van Asseldonk 2015/12/07 13:53:59 I did not measure, but I think it is. Instead of u
107 property == &Part::type_name ? type_name.next() : type_name,
108 size);
109 }
110
111 Slice backtrace;
112 Slice type_name;
113 size_t size;
114 };
115
116 // Group of parts where the [begin, cursor) range is equal for all properties
117 // of all parts in the bucket.
118 struct Bucket {
119 Bucket() : size(0) {}
120
121 std::vector<Part> parts;
122
123 // The sum of the sizes of the parts.
124 size_t size;
125 };
126
127 bool operator<(const Bucket& lhs, const Bucket& rhs) {
128 return lhs.size < rhs.size;
27 } 129 }
28 130
29 // Converts a |hash_map<T, size_t>| into a vector of (T, size_t) pairs that is 131 // Groups the list of heap parts by the value under the cursor of the member
30 // ordered from high |size_t| to low |size_t|. 132 // pointed at by |groupBy|. The parts in the returned list will have the cursor
31 template <typename T> 133 // advanced for the member pointed at by |groupBy|.
32 std::vector<std::pair<T, size_t>> SortBySizeDescending( 134 std::vector<Bucket> GroupBy(const std::vector<Part>& parts, Property groupBy) {
33 const hash_map<T, size_t>& grouped) { 135 base::hash_map<const char*, Bucket> grouped;
34 std::vector<std::pair<T, size_t>> sorted; 136
35 sorted.reserve(grouped.size()); 137 for (const Part& part : parts) {
36 std::copy(grouped.begin(), grouped.end(), std::back_inserter(sorted)); 138 Slice property = part.*groupBy;
37 std::sort(sorted.begin(), sorted.end(), PairSizeGt<T>); 139 if (property.cursor != property.end) {
38 return sorted; 140 Bucket& bucket = grouped[*(property.cursor)];
141 bucket.size += part.size;
142 bucket.parts.push_back(part.advance(groupBy));
143 DCHECK_GT(bucket.size, 0u);
144 }
145 }
146
147 std::vector<Bucket> buckets;
148 buckets.reserve(grouped.size());
149 for (auto key_bucket : grouped)
150 buckets.push_back(key_bucket.second);
151
152 return buckets;
petrcermak 2015/12/04 19:13:20 Are move semantics already allowed in Chrome? If t
Ruud van Asseldonk 2015/12/07 13:53:59 Even before C++11, compilers would do Return Value
153 }
154
155 // Breaks down the bucket by the member pointed at by |property|. Returns only
156 // buckets that contribute significantly to the total size. The long tail is
157 // omitted.
158 std::vector<Bucket> BreakDownBy(const Bucket& bucket, Property property) {
159 std::vector<Bucket> buckets = GroupBy(bucket.parts, property);
160
161 // Ensure that |buckets| is a max-heap (the data structure, not memory heap),
162 // so its front contains the largest bucket. Buckets should be iterated
163 // ordered by size, but sorting the vector is overkill because the long tail
164 // of small buckets will be discarded. By using a max-heap, the optimal case
165 // where all but the first bucket are discarded is O(n). The worst case where
166 // no bucket is discarded is doing a heap sort, which is O(n log n).
167 std::make_heap(buckets.begin(), buckets.end());
168
169 // Keep including parts until adding a part would increase the number of
170 // bytes accounted for by less than a percent. The large buckets end up in
171 // [it, end()), [begin(), it) is the part that contains the max-heap of small
172 // buckets. TODO(ruuda): tweak the heuristic.
173 size_t accounted_for = 0;
174 std::vector<Bucket>::iterator it;
175 for (it = buckets.end(); it != buckets.begin(); --it) {
176 // Compute contribution to number of bytes accounted for in percent, rounded
177 // down due to integer division. Buckets are iterated by descending size,
178 // so later buckets cannot have a larger contribution than this one.
179 accounted_for += buckets.front().size;
180 size_t contribution = buckets.front().size * 100 / accounted_for;
181 if (contribution == 0)
182 break;
183
184 // Put the largest bucket in [begin, it) at |it - 1| and max-heapify
185 // [begin, it - 1). This puts the next largest bucket at |buckets.front()|.
186 std::pop_heap(buckets.begin(), it);
187 }
188
189 // Discard the long tail of buckets that contribute less than a percent.
190 buckets.erase(buckets.begin(), it);
191
192 return buckets;
petrcermak 2015/12/04 19:13:20 ditto
Ruud van Asseldonk 2015/12/07 13:53:59 See above.
39 } 193 }
40 194
41 } // namespace 195 } // namespace
42 196
43 HeapDumpWriter::HeapDumpWriter(StackFrameDeduplicator* stack_frame_deduplicator, 197 HeapDumpWriter::HeapDumpWriter(StackFrameDeduplicator* stack_frame_deduplicator,
44 TypeNameDeduplicator* type_name_deduplicator) 198 TypeNameDeduplicator* type_name_deduplicator)
45 : traced_value_(new TracedValue()), 199 : stack_frame_deduplicator_(stack_frame_deduplicator),
46 stack_frame_deduplicator_(stack_frame_deduplicator),
47 type_name_deduplicator_(type_name_deduplicator) {} 200 type_name_deduplicator_(type_name_deduplicator) {}
48 201
49 HeapDumpWriter::~HeapDumpWriter() {} 202 HeapDumpWriter::~HeapDumpWriter() {}
50 203
51 void HeapDumpWriter::InsertAllocation(const AllocationContext& context, 204 void HeapDumpWriter::InsertAllocation(const AllocationContext& context,
52 size_t size) { 205 size_t size) {
53 bytes_by_context_[context] += size; 206 bytes_by_context_[context] += size;
54 } 207 }
55 208
56 scoped_refptr<TracedValue> HeapDumpWriter::WriteHeapDump() { 209 bool HeapDumpWriter::InsertEntry(const Bucket& bucket) {
57 // Group by backtrace and by type ID, and compute the total heap size while 210 // The parts in the bucket are all different, but for all properties, the
58 // iterating anyway. 211 // [begin, cursor) range is equal for all parts in the bucket.
59 size_t total_size = 0; 212 DCHECK(!bucket.parts.empty());
60 hash_map<Backtrace, size_t> bytes_by_backtrace; 213 Slice backtrace = bucket.parts.front().backtrace;
61 hash_map<const char*, size_t> bytes_by_type; 214 Slice type_name = bucket.parts.front().type_name;
62 215
63 for (auto context_size : bytes_by_context_) { 216 Entry entry;
64 total_size += context_size.second; 217 entry.stack_frame_id =
65 bytes_by_backtrace[context_size.first.backtrace] += context_size.second; 218 stack_frame_deduplicator_->Insert(backtrace.begin, backtrace.cursor);
66 bytes_by_type[context_size.first.type_name] += context_size.second; 219
220 // If the type name is not set use ID -1, otherwise deduplicate the type name.
petrcermak 2015/12/04 19:13:20 supernit: I'd say "ID = -1" (multiple uses in this
Ruud van Asseldonk 2015/12/07 13:53:59 Ignored.
221 entry.type_id = (type_name.begin == type_name.cursor)
222 ? -1
223 : type_name_deduplicator_->Insert(*type_name.begin);
224
225 entry.size = bucket.size;
226
227 auto position_and_inserted = entries_.insert(entry);
228 return position_and_inserted.second;
229 }
230
231 void HeapDumpWriter::BreakDown(const Bucket& bucket) {
232 std::vector<Bucket> by_backtrace = BreakDownBy(bucket, &Part::backtrace);
233 std::vector<Bucket> by_type_name = BreakDownBy(bucket, &Part::type_name);
234
235 // Insert entries for the buckets. If a bucket was not present before, it has
236 // not been broken down before, so recursively continue breaking down in that
237 // case. There might be multiple routes to the same entry (first break down
238 // by type name, then by backtrace, or first by backtrace and then by type),
239 // so a set is used to avoid dumping and breaking down entries more than once.
240
241 for (const Bucket& subbucket : by_backtrace)
242 if (InsertEntry(subbucket))
243 BreakDown(subbucket);
244
245 for (const Bucket& subbucket : by_type_name)
246 if (InsertEntry(subbucket))
247 BreakDown(subbucket);
248 }
249
250 const std::set<Entry>& HeapDumpWriter::Dump() {
251 // Start with one bucket that represents the entire heap. Iterate by
252 // reference, because the heap parts are going to point into allocation
253 // contexts stored in |bytes_by_context_|.
254 Bucket root_bucket;
255 for (const auto& context_and_size : bytes_by_context_) {
256 const AllocationContext& context = context_and_size.first;
257 const size_t size = context_and_size.second;
258 root_bucket.parts.push_back(Part(context, size));
259 root_bucket.size += size;
67 } 260 }
68 261
69 auto sorted_bytes_by_backtrace = SortBySizeDescending(bytes_by_backtrace); 262 InsertEntry(root_bucket);
70 auto sorted_bytes_by_type = SortBySizeDescending(bytes_by_type);
71 263
72 traced_value_->BeginArray("entries"); 264 // Recursively break down the heap and fill |entries_| with entries to dump.
265 BreakDown(root_bucket);
73 266
74 // The global size, no column specified. 267 return entries_;
75 { 268 }
76 traced_value_->BeginDictionary(); 269
77 WriteSize(total_size); 270 // static
78 traced_value_->EndDictionary(); 271 scoped_refptr<TracedValue> HeapDumpWriter::Write(
272 const std::set<Entry>& entries) {
273 std::string buffer;
274 scoped_refptr<TracedValue> traced_value = new TracedValue;
275
276 traced_value->BeginArray("entries");
277
278 for (const Entry& entry : entries) {
279 traced_value->BeginDictionary();
280
281 // Format size as hexadecimal string into |buffer|.
282 SStringPrintf(&buffer, "%" PRIx64, static_cast<uint64_t>(entry.size));
283 traced_value->SetString("size", buffer);
284
285 if (entry.stack_frame_id == -1) {
286 // An empty backtrace (which will have ID -1) is represented by the empty
287 // string, because there is no leaf frame to reference in |stackFrames|.
288 traced_value->SetString("bt", "");
289 } else {
290 // Format index of the leaf frame as a string, because |stackFrames| is a
291 // dictionary, not an array.
292 SStringPrintf(&buffer, "%i", entry.stack_frame_id);
293 traced_value->SetString("bt", buffer);
294 }
295
296 // Type ID -1 (cumulative size for all types) is represented by the absence
297 // of the "type" key in the dictionary.
298 if (entry.type_id != -1) {
299 // Format the type ID as a string.
300 SStringPrintf(&buffer, "%i", entry.type_id);
301 traced_value->SetString("type", buffer);
302 }
303
304 traced_value->EndDictionary();
79 } 305 }
80 306
81 // Entries with the size per backtrace. 307 traced_value->EndArray(); // "entries"
82 for (const auto& entry : sorted_bytes_by_backtrace) { 308 return traced_value;
83 traced_value_->BeginDictionary();
84 // Insert a forward reference to the backtrace that will be written to the
85 // |stackFrames| dictionary later on.
86 int idx = stack_frame_deduplicator_->Insert(std::begin(entry.first.frames),
87 std::end(entry.first.frames));
88 WriteStackFrameIndex(idx);
89 WriteSize(entry.second);
90 traced_value_->EndDictionary();
91 }
92
93 // Entries with the size per type.
94 for (const auto& entry : sorted_bytes_by_type) {
95 traced_value_->BeginDictionary();
96 // Insert a forward reference to the type name that will be written to the
97 // trace when it is flushed.
98 WriteTypeId(type_name_deduplicator_->Insert(entry.first));
99 WriteSize(entry.second);
100 traced_value_->EndDictionary();
101 }
102
103 traced_value_->EndArray(); // "entries"
104
105 return traced_value_;
106 } 309 }
107 310
108 void HeapDumpWriter::WriteStackFrameIndex(int index) { 311 bool operator<(Entry lhs, Entry rhs) {
109 if (index == -1) { 312 // There is no need to compare |size|. If the backtrace and type name are
110 // An empty backtrace (which will have index -1) is represented by the empty 313 // equal then the sizes must be equal as well.
111 // string, because there is no leaf frame to reference in |stackFrames|. 314 return std::tie(lhs.stack_frame_id, lhs.type_id) <
112 traced_value_->SetString("bt", ""); 315 std::tie(rhs.stack_frame_id, rhs.type_id);
113 } else {
114 // Format index of the leaf frame as a string, because |stackFrames| is a
115 // dictionary, not an array.
116 SStringPrintf(&buffer_, "%i", index);
117 traced_value_->SetString("bt", buffer_);
118 }
119 }
120
121 void HeapDumpWriter::WriteTypeId(int type_id) {
122 // Format the type ID as a string.
123 SStringPrintf(&buffer_, "%i", type_id);
124 traced_value_->SetString("type", buffer_);
125 }
126
127 void HeapDumpWriter::WriteSize(size_t size) {
128 // Format size as hexadecimal string into |buffer_|.
129 SStringPrintf(&buffer_, "%" PRIx64, static_cast<uint64_t>(size));
130 traced_value_->SetString("size", buffer_);
131 } 316 }
132 317
133 } // namespace trace_event 318 } // namespace trace_event
134 } // namespace base 319 } // namespace base
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698