base/trace_event/heap_profiler_heap_dump_writer.cc - Issue 1494293005: [Tracing] Enable heap dumps with both type info and backtraces

Side by Side Diff: base/trace_event/heap_profiler_heap_dump_writer.cc

Issue 1494293005: [Tracing] Enable heap dumps with both type info and backtraces (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@sfdedup-iter

Patch Set: IWYU Created 5 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« base/trace_event/heap_profiler_heap_dump_writer.h ('K') | « base/trace_event/heap_profiler_heap_dump_writer.h ('k') | base/trace_event/heap_profiler_heap_dump_writer_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 // Copyright 2015 The Chromium Authors. All rights reserved.	1 // Copyright 2015 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "base/trace_event/heap_profiler_heap_dump_writer.h"	5 #include "base/trace_event/heap_profiler_heap_dump_writer.h"

6	6

7 #include <algorithm>	7 #include <algorithm>

8 #include <iterator>	8 #include <iterator>

	9 #include <set>
	Primiano Tucci (use gerrit) 2015/12/09 17:04:00 remove, you have this in the .h remove, you have this in the .h Ruud van Asseldonk 2015/12/09 20:58:04 Done. Show quoted text On 2015/12/09 17:04:00, Primiano Tucci wrote: > remove, you have this in the .h Done.
	10 #include <tuple>
	Primiano Tucci (use gerrit) 2015/12/09 17:03:59 I think this is not needed anymore I think this is not needed anymore Ruud van Asseldonk 2015/12/09 20:58:04 It is required for \|std::tie\| which is required fo Show quoted text On 2015/12/09 17:03:59, Primiano Tucci wrote: > I think this is not needed anymore It is required for \|std::tie\| which is required for \|operator<\|.
9 #include <utility>	11 #include <utility>

10 #include <vector>	12 #include <vector>

11	13

	14 #include "base/containers/hash_tables.h"

12 #include "base/format_macros.h"	15 #include "base/format_macros.h"

	16 #include "base/logging.h"

	17 #include "base/macros.h"
	Primiano Tucci (use gerrit) 2015/12/09 17:03:59 remove, you have this in the .h remove, you have this in the .h Ruud van Asseldonk 2015/12/09 20:58:05 Done. Show quoted text On 2015/12/09 17:03:59, Primiano Tucci wrote: > remove, you have this in the .h Done.
13 #include "base/strings/stringprintf.h"	18 #include "base/strings/stringprintf.h"

	19 #include "base/trace_event/heap_profiler_allocation_context.h"

14 #include "base/trace_event/heap_profiler_stack_frame_deduplicator.h"	20 #include "base/trace_event/heap_profiler_stack_frame_deduplicator.h"

15 #include "base/trace_event/heap_profiler_type_name_deduplicator.h"	21 #include "base/trace_event/heap_profiler_type_name_deduplicator.h"

16 #include "base/trace_event/trace_event_argument.h"	22 #include "base/trace_event/trace_event_argument.h"

17	23

	24 // Most of what the \|HeapDumpWriter\| does is aggregating detailed information

	25 // about the heap and deciding what to dump. The Input to this process is a list

	26 // of \|AllocationContext\|s and size pairs.

	27 //

	28 // The pairs are grouped into \|Bucket\|s. A bucket is a group of (context, size)

	29 // pairs where the properties of the contexts share a prefix. (Type name is

	30 // considered a list of length one here.) First all pairs are put into one

	31 // bucket that represents the entire heap. Then this bucket is recursively

	32 // broken down into smaller buckets. Each bucket keeps track of whether further

	33 // breakdown is possible.

	34

18 namespace base {	35 namespace base {

19 namespace trace_event {	36 namespace trace_event {

20	37 namespace internal {

21 namespace {	38 namespace {

22	39

23 template <typename T>	40 // Denotes a property of \|AllocationContext\| to break down by.

24 bool PairSizeGt(const std::pair<T, size_t>& lhs,	41 enum class BreakDownMode { kByBacktrace, kByTypeName };

25 const std::pair<T, size_t>& rhs) {	42

26 return lhs.second > rhs.second;	43 // A group of bytes for which the context shares a prefix.

27 }	44 struct BASE_EXPORT Bucket {
	Primiano Tucci (use gerrit) 2015/12/09 17:03:59 don't think this needs BASE_EXPORT don't think this needs BASE_EXPORT Ruud van Asseldonk 2015/12/09 20:58:05 You are right. Fixed. Show quoted text On 2015/12/09 17:03:59, Primiano Tucci wrote: > don't think this needs BASE_EXPORT You are right. Fixed.
28	45 Bucket() : size(0), backtrace_cursor(0), is_broken_down_by_type_name(false) {}

29 // Converts a \|hash_map<T, size_t>\| into a vector of (T, size_t) pairs that is	46

30 // ordered from high \|size_t\| to low \|size_t\|.	47 std::vector<std::pair<const AllocationContext*, size_t>> bytes_by_context;

31 template <typename T>	48

32 std::vector<std::pair<T, size_t>> SortBySizeDescending(	49 // The sum of the sizes of \|bytes_by_context\|.

33 const hash_map<T, size_t>& grouped) {	50 size_t size;

34 std::vector<std::pair<T, size_t>> sorted;	51

35 sorted.reserve(grouped.size());	52 // The index of the stack frame that has not yet been broken down by. For all

36 std::copy(grouped.begin(), grouped.end(), std::back_inserter(sorted));	53 // elements in this bucket, the stack frames 0 up to (but not including) the

37 std::sort(sorted.begin(), sorted.end(), PairSizeGt<T>);	54 // cursor, must be equal.

38 return sorted;	55 size_t backtrace_cursor;

	56

	57 // When true, the type name for all elements in this bucket must be equal.

	58 bool is_broken_down_by_type_name;

	59 };

	60

	61 // Comparison operator to order buckets by their size.

	62 bool operator<(const Bucket& lhs, const Bucket& rhs) {

	63 return lhs.size < rhs.size;

	64 }

	65

	66 // Groups the allocations in the bucket by \|breakBy\|. The buckets in the

	67 // returned list will have \|backtrace_cursor\| advanced or

	68 // \|is_broken_down_by_type_name\| set depending on the property to group by.

	69 std::vector<Bucket> GetSubbuckets(const Bucket& bucket, BreakDownMode breakBy) {

	70 base::hash_map<const char*, Bucket> breakdown;

	71

	72 if (breakBy == BreakDownMode::kByBacktrace) {

	73 for (const auto& context_and_size : bucket.bytes_by_context) {

	74 const Backtrace& backtrace = context_and_size.first->backtrace;

	75 const char* const* begin = std::begin(backtrace.frames);

	76 const char* const* end = std::end(backtrace.frames);

	77 const char* const* cursor = begin + bucket.backtrace_cursor;

	78

	79 // The backtrace in the context is padded with null pointers, but these

	80 // should not be considered for breakdown. Adjust end to point past the

	81 // last non-null frame.

	82 while (begin != end && *(end - 1) == nullptr)

	83 end--;

	84

	85 DCHECK_LE(cursor, end);

	86

	87 if (cursor != end) {

	88 Bucket& subbucket = breakdown[*cursor];

	89 subbucket.size += context_and_size.second;

	90 subbucket.bytes_by_context.push_back(context_and_size);

	91 subbucket.backtrace_cursor = bucket.backtrace_cursor + 1;

	92 subbucket.is_broken_down_by_type_name =

	93 bucket.is_broken_down_by_type_name;

	94 DCHECK_GT(subbucket.size, 0u);

	95 }

	96 }

	97 } else if (breakBy == BreakDownMode::kByTypeName) {

	98 if (!bucket.is_broken_down_by_type_name) {

	99 for (const auto& context_and_size : bucket.bytes_by_context) {

	100 const AllocationContext* context = context_and_size.first;

	101 Bucket& subbucket = breakdown[context->type_name];

	102 subbucket.size += context_and_size.second;

	103 subbucket.bytes_by_context.push_back(context_and_size);

	104 subbucket.backtrace_cursor = bucket.backtrace_cursor;

	105 subbucket.is_broken_down_by_type_name = true;

	106 DCHECK_GT(subbucket.size, 0u);

	107 }

	108 }

	109 }

	110

	111 std::vector<Bucket> buckets;

	112 buckets.reserve(breakdown.size());

	113 for (auto key_bucket : breakdown)

	114 buckets.push_back(key_bucket.second);

	115

	116 return buckets;

	117 }

	118

	119 // Breaks down the bucket by \|breakBy\|. Returns only buckets that contribute

	120 // significantly to the total size. The long tail is omitted.

	121 std::vector<Bucket> BreakDownBy(const Bucket& bucket, BreakDownMode breakBy) {

	122 std::vector<Bucket> buckets = GetSubbuckets(bucket, breakBy);

	123

	124 // Ensure that \|buckets\| is a max-heap (the data structure, not memory heap),

	125 // so its front contains the largest bucket. Buckets should be iterated

	126 // ordered by size, but sorting the vector is overkill because the long tail

	127 // of small buckets will be discarded. By using a max-heap, the optimal case

	128 // where all but the first bucket are discarded is O(n). The worst case where

	129 // no bucket is discarded is doing a heap sort, which is O(n log n).

	130 std::make_heap(buckets.begin(), buckets.end());

	131

	132 // Keep including buckets until adding one would increase the number of

	133 // bytes accounted for by less than a percent. The large buckets end up in

	134 // [it, end()), [begin(), it) is the part that contains the max-heap of small

	135 // buckets. TODO(ruuda): tweak the heuristic.

	136 size_t accounted_for = 0;

	137 std::vector<Bucket>::iterator it;

	138 for (it = buckets.end(); it != buckets.begin(); --it) {

	139 // Compute contribution to number of bytes accounted for in percent, rounded

	140 // down due to integer division. Buckets are iterated by descending size,

	141 // so later buckets cannot have a larger contribution than this one.

	142 accounted_for += buckets.front().size;

	143 size_t contribution = buckets.front().size * 100 / accounted_for;

	144 if (contribution == 0)

	145 break;

	146

	147 // Put the largest bucket in [begin, it) at \|it - 1\| and max-heapify

	148 // [begin, it - 1). This puts the next largest bucket at \|buckets.front()\|.

	149 std::pop_heap(buckets.begin(), it);

	150 }

	151

	152 // At this point, \|buckets\| looks like this (numbers are bucket sizes):

	153 //

	154 // <-- max-heap of small buckets --->

	155 // <-- large buckets by ascending size -->

	156 // [ 19 \| 11 \| 13 \| 7 \| 2 \| 5 \| ... \| 83 \| 89 \| 97 ]

	157 // ^ ^ ^

	158 // \| \| \|

	159 // begin() it end()

	160

	161 // Discard the long tail of buckets that contribute less than a percent.

	162 buckets.erase(buckets.begin(), it);
	Primiano Tucci (use gerrit) 2015/12/09 17:04:00 I wonder whether resize() here is more efficient o I wonder whether resize() here is more efficient or erase(begin()) is smart enough. (just curious, non need to change the code) Ruud van Asseldonk 2015/12/09 20:58:04 Resize discards elements at the end, I want to dis Show quoted text On 2015/12/09 17:04:00, Primiano Tucci wrote: > I wonder whether resize() here is more efficient or erase(begin()) is smart > enough. (just curious, non need to change the code) Resize discards elements at the end, I want to discard elements at the beginning. I hope that \|erase\| is smart enough to move every element at most once but I did not check this. I considered using a \|deque\| instead for efficient removal from the front, but \|deque\| does not have \|reserve\|. Either way we shouln’t optimise prematurely.
	163

	164 return buckets;

39 }	165 }

40	166

41 } // namespace	167 } // namespace

42	168

	169 bool operator<(Entry lhs, Entry rhs) {

	170 // There is no need to compare \|size\|. If the backtrace and type name are

	171 // equal then the sizes must be equal as well.

	172 return std::tie(lhs.stack_frame_id, lhs.type_id) <

	173 std::tie(rhs.stack_frame_id, rhs.type_id);

	174 }

	175

43 HeapDumpWriter::HeapDumpWriter(StackFrameDeduplicator* stack_frame_deduplicator,	176 HeapDumpWriter::HeapDumpWriter(StackFrameDeduplicator* stack_frame_deduplicator,

44 TypeNameDeduplicator* type_name_deduplicator)	177 TypeNameDeduplicator* type_name_deduplicator)

45 : traced_value_(new TracedValue()),	178 : stack_frame_deduplicator_(stack_frame_deduplicator),

46 stack_frame_deduplicator_(stack_frame_deduplicator),

47 type_name_deduplicator_(type_name_deduplicator) {}	179 type_name_deduplicator_(type_name_deduplicator) {}

48	180

49 HeapDumpWriter::~HeapDumpWriter() {}	181 HeapDumpWriter::~HeapDumpWriter() {}

50	182

51 void HeapDumpWriter::InsertAllocation(const AllocationContext& context,	183 bool HeapDumpWriter::AddEntryForBucket(const Bucket& bucket) {

52 size_t size) {	184 // The contexts in the bucket are all different, but the [begin, cursor) range

53 bytes_by_context_[context] += size;	185 // is equal for all contexts in the bucket, and the type names are the same if

54 }	186 // \|is_broken_down_by_type_name\| is set.

55	187 DCHECK(!bucket.bytes_by_context.empty());

56 scoped_refptr<TracedValue> HeapDumpWriter::WriteHeapDump() {	188

57 // Group by backtrace and by type ID, and compute the total heap size while	189 const AllocationContext* context = bucket.bytes_by_context.front().first;

58 // iterating anyway.	190

59 size_t total_size = 0;	191 const char* const* backtrace_begin = std::begin(context->backtrace.frames);

60 hash_map<Backtrace, size_t> bytes_by_backtrace;	192 const char* const* backtrace_end = backtrace_begin + bucket.backtrace_cursor;

61 hash_map<const char*, size_t> bytes_by_type;	193 DCHECK_LE(bucket.backtrace_cursor, arraysize(context->backtrace.frames));

62	194

63 for (auto context_size : bytes_by_context_) {	195 Entry entry;

64 total_size += context_size.second;	196 entry.stack_frame_id =

65 bytes_by_backtrace[context_size.first.backtrace] += context_size.second;	197 stack_frame_deduplicator_->Insert(backtrace_begin, backtrace_end);

66 bytes_by_type[context_size.first.type_name] += context_size.second;	198

67 }	199 // Deduplicate the type name, or use ID -1 if type name is not set.

68	200 entry.type_id = bucket.is_broken_down_by_type_name

69 auto sorted_bytes_by_backtrace = SortBySizeDescending(bytes_by_backtrace);	201 ? type_name_deduplicator_->Insert(context->type_name)

70 auto sorted_bytes_by_type = SortBySizeDescending(bytes_by_type);	202 : -1;

71	203

72 traced_value_->BeginArray("entries");	204 entry.size = bucket.size;

73	205

74 // The global size, no column specified.	206 auto position_and_inserted = entries_.insert(entry);

75 {	207 return position_and_inserted.second;

76 traced_value_->BeginDictionary();	208 }

77 WriteSize(total_size);	209

78 traced_value_->EndDictionary();	210 void HeapDumpWriter::BreakDown(const Bucket& bucket) {

79 }	211 auto by_backtrace = BreakDownBy(bucket, BreakDownMode::kByBacktrace);

80	212 auto by_type_name = BreakDownBy(bucket, BreakDownMode::kByTypeName);

81 // Entries with the size per backtrace.	213

82 for (const auto& entry : sorted_bytes_by_backtrace) {	214 // Insert entries for the buckets. If a bucket was not present before, it has

83 traced_value_->BeginDictionary();	215 // not been broken down before, so recursively continue breaking down in that

84 // Insert a forward reference to the backtrace that will be written to the	216 // case. There might be multiple routes to the same entry (first break down

85 // \|stackFrames\| dictionary later on.	217 // by type name, then by backtrace, or first by backtrace and then by type),

86 int idx = stack_frame_deduplicator_->Insert(std::begin(entry.first.frames),	218 // so a set is used to avoid dumping and breaking down entries more than once.

87 std::end(entry.first.frames));	219

88 WriteStackFrameIndex(idx);	220 for (const Bucket& subbucket : by_backtrace)

89 WriteSize(entry.second);	221 if (AddEntryForBucket(subbucket))

90 traced_value_->EndDictionary();	222 BreakDown(subbucket);
	Ruud van Asseldonk 2015/12/09 16:16:01 I prefer to keep the call to \|BreakDown\| here inst I prefer to keep the call to \|BreakDown\| here instead of moving it into \|AddEntryForBucket\|, because then this function is recursive directly. If we move it into \|AddEntryForBucket\|, then there is the \|BreakDown\| -> \|AddEntryForBucket\| -> \|BreakDown\| -> \|AddEntryForBucket\| -> ... recursion, and it might be harder to see what is going on. I did rename \|InsertEntry\| to \|AddEntryForBucket\|. Primiano Tucci (use gerrit) 2015/12/09 17:04:00 Acknowledged. Show quoted text On 2015/12/09 16:16:01, Ruud van Asseldonk wrote: > I prefer to keep the call to \|BreakDown\| here instead of moving it into > \|AddEntryForBucket\|, because then this function is recursive directly. If we > move it into \|AddEntryForBucket\|, then there is the \|BreakDown\| -> > \|AddEntryForBucket\| -> \|BreakDown\| -> \|AddEntryForBucket\| -> ... recursion, and > it might be harder to see what is going on. > > I did rename \|InsertEntry\| to \|AddEntryForBucket\|. Acknowledged.
91 }	223

92	224 for (const Bucket& subbucket : by_type_name)

93 // Entries with the size per type.	225 if (AddEntryForBucket(subbucket))

94 for (const auto& entry : sorted_bytes_by_type) {	226 BreakDown(subbucket);

95 traced_value_->BeginDictionary();	227 }

96 // Insert a forward reference to the type name that will be written to the	228

97 // trace when it is flushed.	229 const std::set<Entry>& HeapDumpWriter::Dump(
	Primiano Tucci (use gerrit) 2015/12/09 17:04:00 I wonder if we can get a better name here. I think I wonder if we can get a better name here. I think this is really something like "Reduce" or "SummarizeAndCluster" or something like that. Dump is a bit generic. I think the key thing you want to convey in the name is that here you are doing some smart bucketing + threshold .
98 WriteTypeId(type_name_deduplicator_->Insert(entry.first));	230 const hash_map<AllocationContext, size_t>& bytes_by_context) {

99 WriteSize(entry.second);	231 // Start with one bucket that represents the entire heap. Iterate by

100 traced_value_->EndDictionary();	232 // reference, because the allocation contexts are going to point to allocation

101 }	233 // contexts stored in \|bytes_by_context\|.

102	234 Bucket root_bucket;

103 traced_value_->EndArray(); // "entries"	235 for (const auto& context_and_size : bytes_by_context) {

104	236 const AllocationContext* context = &context_and_size.first;

105 return traced_value_;	237 const size_t size = context_and_size.second;

106 }	238 root_bucket.bytes_by_context.push_back(std::make_pair(context, size));

107	239 root_bucket.size += size;

108 void HeapDumpWriter::WriteStackFrameIndex(int index) {	240 }

109 if (index == -1) {	241

110 // An empty backtrace (which will have index -1) is represented by the empty	242 AddEntryForBucket(root_bucket);

111 // string, because there is no leaf frame to reference in \|stackFrames\|.	243

112 traced_value_->SetString("bt", "");	244 // Recursively break down the heap and fill \|entries_\| with entries to dump.

113 } else {	245 BreakDown(root_bucket);

114 // Format index of the leaf frame as a string, because \|stackFrames\| is a	246

115 // dictionary, not an array.	247 return entries_;

116 SStringPrintf(&buffer_, "%i", index);	248 }

117 traced_value_->SetString("bt", buffer_);	249

118 }	250 scoped_refptr<TracedValue> Write(const std::set<Entry>& entries) {

119 }	251 std::string buffer;

120	252 scoped_refptr<TracedValue> traced_value = new TracedValue;

121 void HeapDumpWriter::WriteTypeId(int type_id) {	253

122 // Format the type ID as a string.	254 traced_value->BeginArray("entries");

123 SStringPrintf(&buffer_, "%i", type_id);	255

124 traced_value_->SetString("type", buffer_);	256 for (const Entry& entry : entries) {

125 }	257 traced_value->BeginDictionary();

126	258

127 void HeapDumpWriter::WriteSize(size_t size) {	259 // Format size as hexadecimal string into \|buffer\|.

128 // Format size as hexadecimal string into \|buffer_\|.	260 SStringPrintf(&buffer, "%" PRIx64, static_cast<uint64_t>(entry.size));

129 SStringPrintf(&buffer_, "%" PRIx64, static_cast<uint64_t>(size));	261 traced_value->SetString("size", buffer);

130 traced_value_->SetString("size", buffer_);	262

131 }	263 if (entry.stack_frame_id == -1) {

	264 // An empty backtrace (which will have ID -1) is represented by the empty

	265 // string, because there is no leaf frame to reference in \|stackFrames\|.

	266 traced_value->SetString("bt", "");

	267 } else {

	268 // Format index of the leaf frame as a string, because \|stackFrames\| is a

	269 // dictionary, not an array.

	270 SStringPrintf(&buffer, "%i", entry.stack_frame_id);

	271 traced_value->SetString("bt", buffer);

	272 }

	273

	274 // Type ID -1 (cumulative size for all types) is represented by the absence

	275 // of the "type" key in the dictionary.

	276 if (entry.type_id != -1) {

	277 // Format the type ID as a string.

	278 SStringPrintf(&buffer, "%i", entry.type_id);

	279 traced_value->SetString("type", buffer);

	280 }

	281

	282 traced_value->EndDictionary();

	283 }

	284

	285 traced_value->EndArray(); // "entries"

	286 return traced_value;

	287 }

	288

	289 } // namespace internal

132	290

133 } // namespace trace_event	291 } // namespace trace_event

134 } // namespace base	292 } // namespace base

OLD	NEW