base/trace_event/heap_profiler_heap_dump_writer.cc - Issue 1494293005: [Tracing] Enable heap dumps with both type info and backtraces

Side by Side Diff: base/trace_event/heap_profiler_heap_dump_writer.cc

Issue 1494293005: [Tracing] Enable heap dumps with both type info and backtraces (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@sfdedup-iter

Patch Set: Address most primiano comments Created 5 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2015 The Chromium Authors. All rights reserved.	1 // Copyright 2015 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "base/trace_event/heap_profiler_heap_dump_writer.h"	5 #include "base/trace_event/heap_profiler_heap_dump_writer.h"

6	6

7 #include <algorithm>	7 #include <algorithm>

8 #include <iterator>	8 #include <iterator>

9 #include <utility>	9 #include <tuple>

10 #include <vector>	10 #include <vector>

11	11

	12 #include "base/containers/hash_tables.h"

12 #include "base/format_macros.h"	13 #include "base/format_macros.h"

	14 #include "base/logging.h"

	15 #include "base/macros.h"

13 #include "base/strings/stringprintf.h"	16 #include "base/strings/stringprintf.h"

	17 #include "base/trace_event/heap_profiler_allocation_context.h"

14 #include "base/trace_event/heap_profiler_stack_frame_deduplicator.h"	18 #include "base/trace_event/heap_profiler_stack_frame_deduplicator.h"

15 #include "base/trace_event/heap_profiler_type_name_deduplicator.h"	19 #include "base/trace_event/heap_profiler_type_name_deduplicator.h"

16 #include "base/trace_event/trace_event_argument.h"	20 #include "base/trace_event/trace_event_argument.h"

17	21

	22 // Most of what this file does is aggregating detailed information about the

	23 // heap and deciding what to dump. Input to this process is a list of
	petrcermak 2015/12/09 15:02:31 nit: I think there should be "The" in front of "In nit: I think there should be "The" in front of "Input" Ruud van Asseldonk 2015/12/09 16:16:01 Done. Show quoted text On 2015/12/09 15:02:31, petrcermak wrote: > nit: I think there should be "The" in front of "Input" Done.
	24 // \|AllocationContext\|s and size pairs.

	25 //

	26 // The pairs are grouped into \|Bucket\|s. A bucket is a group of contexts with

	27 // sizes of which the properties share a prefix. (Type name is considered a
	petrcermak 2015/12/09 15:02:31 hnit: It sounds like you're talking about "propert hnit: It sounds like you're talking about "properties of sizes" here, but I think you want to say "properties of contexts". Not sure how to avoid splitting this into two sentences. Ruud van Asseldonk 2015/12/09 16:16:01 Done. Show quoted text On 2015/12/09 15:02:31, petrcermak wrote: > hnit: It sounds like you're talking about "properties of sizes" here, but I > think you want to say "properties of contexts". Not sure how to avoid splitting > this into two sentences. Done.
	28 // list of length one here.) First all pairs are put into one bucket that

	29 // represents the entire heap. Then this bucket is recursively broken down into

	30 // smaller buckets. Each bucket keeps track of whether further breakdown is

	31 // possible.

	32

18 namespace base {	33 namespace base {

19 namespace trace_event {	34 namespace trace_event {

	35 namespace heap_dump {

20	36

21 namespace {	37 namespace {

22	38

23 template <typename T>	39 // Denotes a property of \|AllocationContext\|.

24 bool PairSizeGt(const std::pair<T, size_t>& lhs,	40 enum class BreakDownMode { kByBacktrace, kByTypeName };

25 const std::pair<T, size_t>& rhs) {	41

26 return lhs.second > rhs.second;	42 struct Bucket {

27 }	43 Bucket() : size(0), backtrace_cursor(0), is_broken_down_by_type_name(false) {}

28	44

29 // Converts a \|hash_map<T, size_t>\| into a vector of (T, size_t) pairs that is	45 // List of context and number of bytes allocated for that context.

30 // ordered from high \|size_t\| to low \|size_t\|.	46 std::vector<std::pair<const AllocationContext*, size_t>> allocations;

31 template <typename T>	47

32 std::vector<std::pair<T, size_t>> SortBySizeDescending(	48 // The sum of the sizes of the allocations.

33 const hash_map<T, size_t>& grouped) {	49 size_t size;

34 std::vector<std::pair<T, size_t>> sorted;	50

35 sorted.reserve(grouped.size());	51 // The index of the stack frame that has not yet been broken down by. For

36 std::copy(grouped.begin(), grouped.end(), std::back_inserter(sorted));	52 // all contexts in this bucket, the stack frames 0 up to (but not including)

37 std::sort(sorted.begin(), sorted.end(), PairSizeGt<T>);	53 // the cursor, must be equal.

38 return sorted;	54 size_t backtrace_cursor;

	55

	56 // When true, the type name for all contexts in this bucket must be equal.

	57 bool is_broken_down_by_type_name;

	58 };

	59

	60 // Comparison operator used to order buckets by size so that buckets of

	61 // insignificant size might be discarded.

	62 bool operator<(const Bucket& lhs, const Bucket& rhs) {

	63 return lhs.size < rhs.size;

	64 }

	65

	66 // Groups the allocations in the bucket by \|breakDownBy\|. The buckets in the

	67 // returned list will have \|backtrace_cursor\| advanced or

	68 // \|is_broken_down_by_type_name\| set depending the property to group by.
	petrcermak 2015/12/09 15:02:32 nit: "depending ON" nit: "depending ON" Ruud van Asseldonk 2015/12/09 16:16:01 Done. Show quoted text On 2015/12/09 15:02:32, petrcermak wrote: > nit: "depending ON" Done.
	69 std::vector<Bucket> BreakDownBy(const Bucket& bucket,

	70 BreakDownMode breakDownBy) {

	71 base::hash_map<const char*, Bucket> breakdown;

	72

	73 if (breakDownBy == BreakDownMode::kByBacktrace) {
	petrcermak 2015/12/09 15:02:32 Wouldn't a switch make more sense here? Wouldn't a switch make more sense here? Ruud van Asseldonk 2015/12/09 16:16:01 I don’t have a strong opinion. I find a switch to Show quoted text On 2015/12/09 15:02:32, petrcermak wrote: > Wouldn't a switch make more sense here? I don’t have a strong opinion. I find a switch to be a bit awkward because you need the \|break\| and there is a block here anyway. An advantage of switch is that some compilers warn about non-exhaustive switch statements.
	74 for (const auto& context_and_size : bucket.allocations) {

	75 const AllocationContext* context = context_and_size.first;
	petrcermak 2015/12/09 15:02:31 You always use \|context\| to get the backtrace so y You always use \|context\| to get the backtrace so you might as well store context_and_size.first->backtrace straightaway. Ruud van Asseldonk 2015/12/09 16:16:01 Done. Show quoted text On 2015/12/09 15:02:31, petrcermak wrote: > You always use \|context\| to get the backtrace so you might as well store > context_and_size.first->backtrace straightaway. Done.
	76 const char* const* begin = std::begin(context->backtrace.frames);

	77 const char* const* end = std::end(context->backtrace.frames);

	78 const char* const* cursor = begin + bucket.backtrace_cursor;

	79

	80 // The backtrace in the context is padded with null pointers, but these

	81 // should not be considered for breakdown. Adjust end to point past the

	82 // last non-null frame.

	83 while (begin != end && *(end - 1) == nullptr)

	84 end--;

	85

	86 DCHECK_LE(cursor, end);

	87

	88 if (cursor != end) {

	89 Bucket& subbucket = breakdown[*cursor];

	90 subbucket.size += context_and_size.second;

	91 subbucket.allocations.push_back(context_and_size);

	92 subbucket.backtrace_cursor = bucket.backtrace_cursor + 1;

	93 subbucket.is_broken_down_by_type_name =

	94 bucket.is_broken_down_by_type_name;

	95 DCHECK_GT(subbucket.size, 0u);

	96 }

	97 }

	98 } else if (breakDownBy == BreakDownMode::kByTypeName) {

	99 if (!bucket.is_broken_down_by_type_name) {

	100 for (const auto& context_and_size : bucket.allocations) {

	101 Bucket& subbucket = breakdown[context_and_size.first->type_name];

	102 subbucket.size += context_and_size.second;

	103 subbucket.allocations.push_back(context_and_size);

	104 subbucket.backtrace_cursor = bucket.backtrace_cursor;

	105 subbucket.is_broken_down_by_type_name = true;

	106 DCHECK_GT(subbucket.size, 0u);

	107 }

	108 }

	109 }

	110

	111 std::vector<Bucket> buckets;

	112 buckets.reserve(breakdown.size());

	113 for (auto key_bucket : breakdown)

	114 buckets.push_back(key_bucket.second);

	115

	116 return buckets;

	117 }

	118

	119 // Breaks down the bucket by \|property\|. Returns only buckets that contribute

	120 // significantly to the total size. The long tail is omitted.

	121 std::vector<Bucket> DiscardSmallBuckets(std::vector<Bucket> buckets) {

	122 // Ensure that \|buckets\| is a max-heap (the data structure, not memory heap),

	123 // so its front contains the largest bucket. Buckets should be iterated

	124 // ordered by size, but sorting the vector is overkill because the long tail

	125 // of small buckets will be discarded. By using a max-heap, the optimal case

	126 // where all but the first bucket are discarded is O(n). The worst case where

	127 // no bucket is discarded is doing a heap sort, which is O(n log n).

	128 std::make_heap(buckets.begin(), buckets.end());

	129

	130 // Keep including allocations until adding one would increase the number of

	131 // bytes accounted for by less than a percent. The large buckets end up in

	132 // [it, end()), [begin(), it) is the part that contains the max-heap of small

	133 // buckets. TODO(ruuda): tweak the heuristic.

	134 size_t accounted_for = 0;

	135 std::vector<Bucket>::iterator it;

	136 for (it = buckets.end(); it != buckets.begin(); --it) {

	137 // Compute contribution to number of bytes accounted for in percent, rounded

	138 // down due to integer division. Buckets are iterated by descending size,

	139 // so later buckets cannot have a larger contribution than this one.

	140 accounted_for += buckets.front().size;

	141 size_t contribution = buckets.front().size * 100 / accounted_for;

	142 if (contribution == 0)

	143 break;

	144

	145 // Put the largest bucket in [begin, it) at \|it - 1\| and max-heapify

	146 // [begin, it - 1). This puts the next largest bucket at \|buckets.front()\|.

	147 std::pop_heap(buckets.begin(), it);

	148 }

	149

	150 // At this point, \|buckets\| looks like this (numbers are bucket sizes):

	151 //

	152 // <-- max-heap of small buckets --->

	153 // <-- large buckets by descending size -->

	154 // [ 19 \| 11 \| 13 \| 7 \| 2 \| 5 \| ... \| 83 \| 89 \| 97 ]

	155 // ^ ^ ^

	156 // \| \| \|

	157 // begin() it end()

	158

	159 // Discard the long tail of buckets that contribute less than a percent.

	160 buckets.erase(buckets.begin(), it);

	161

	162 return buckets;

	163 }

	164

	165 // Inserts an entry for \|bucket\| into \|entries\|.

	166 bool InsertEntry(const Bucket& bucket,

	167 std::set<HeapEntry>* entries,

	168 StackFrameDeduplicator* stack_frame_deduplicator,

	169 TypeNameDeduplicator* type_name_deduplicator) {

	170 DCHECK(!bucket.allocations.empty());

	171

	172 // The contexts in the bucket are all different, but for all properties, the

	173 // [begin, cursor) range is equal for all contexts in the bucket.
	petrcermak 2015/12/09 15:02:32 This comment is outdated - it doesn't capture the This comment is outdated - it doesn't capture the fact that types IDs are not a list anymore. Ruud van Asseldonk 2015/12/09 16:16:01 Fixed. Show quoted text On 2015/12/09 15:02:32, petrcermak wrote: > This comment is outdated - it doesn't capture the fact that types IDs > are not a list anymore. Fixed.
	174 const AllocationContext& context = *bucket.allocations.front().first;

	175

	176 const char* const* backtrace_begin = std::begin(context.backtrace.frames);

	177 const char* const* backtrace_end = backtrace_begin + bucket.backtrace_cursor;

	178 DCHECK_LE(bucket.backtrace_cursor, arraysize(context.backtrace.frames));

	179

	180 HeapEntry entry;

	181 entry.stack_frame_id =

	182 stack_frame_deduplicator->Insert(backtrace_begin, backtrace_end);

	183

	184 // Deduplicate the type name, or use ID -1 if type name is not set.

	185 entry.type_id = bucket.is_broken_down_by_type_name

	186 ? type_name_deduplicator->Insert(context.type_name)

	187 : -1;

	188

	189 entry.size = bucket.size;

	190

	191 auto position_and_inserted = entries->insert(entry);

	192 return position_and_inserted.second;

	193 }

	194

	195 // Recursively breaks down \|bucket\| into smaller buckets, and adds the buckets

	196 // of significant size to \|entries\|.

	197 void BreakDown(const Bucket& bucket,

	198 std::set<HeapEntry>* entries,

	199 StackFrameDeduplicator* stack_frame_deduplicator,

	200 TypeNameDeduplicator* type_name_deduplicator) {

	201 std::vector<Bucket> by_backtrace =

	202 DiscardSmallBuckets(BreakDownBy(bucket, BreakDownMode::kByBacktrace));

	203 std::vector<Bucket> by_type_name =

	204 DiscardSmallBuckets(BreakDownBy(bucket, BreakDownMode::kByTypeName));

	205

	206 // Insert entries for the buckets. If a bucket was not present before, it has

	207 // not been broken down before, so recursively continue breaking down in that

	208 // case. There might be multiple routes to the same entry (first break down

	209 // by type name, then by backtrace, or first by backtrace and then by type),

	210 // so a set is used to avoid dumping and breaking down entries more than once.

	211

	212 for (const Bucket& subbucket : by_backtrace)

	213 if (InsertEntry(subbucket, entries, stack_frame_deduplicator,

	214 type_name_deduplicator))

	215 BreakDown(subbucket, entries, stack_frame_deduplicator,

	216 type_name_deduplicator);

	217

	218 for (const Bucket& subbucket : by_type_name)

	219 if (InsertEntry(subbucket, entries, stack_frame_deduplicator,

	220 type_name_deduplicator))

	221 BreakDown(subbucket, entries, stack_frame_deduplicator,

	222 type_name_deduplicator);

39 }	223 }

40	224

41 } // namespace	225 } // namespace

42	226

43 HeapDumpWriter::HeapDumpWriter(StackFrameDeduplicator* stack_frame_deduplicator,	227 bool operator<(HeapEntry lhs, HeapEntry rhs) {

44 TypeNameDeduplicator* type_name_deduplicator)	228 // There is no need to compare \|size\|. If the backtrace and type name are

45 : traced_value_(new TracedValue()),	229 // equal then the sizes must be equal as well.

46 stack_frame_deduplicator_(stack_frame_deduplicator),	230 return std::tie(lhs.stack_frame_id, lhs.type_id) <

47 type_name_deduplicator_(type_name_deduplicator) {}	231 std::tie(rhs.stack_frame_id, rhs.type_id);

48	232 }

49 HeapDumpWriter::~HeapDumpWriter() {}	233

50	234 std::set<HeapEntry> Dump(

51 void HeapDumpWriter::InsertAllocation(const AllocationContext& context,	235 const base::hash_map<AllocationContext, size_t>& allocations,

52 size_t size) {	236 StackFrameDeduplicator* stack_frame_deduplicator,

53 bytes_by_context_[context] += size;	237 TypeNameDeduplicator* type_name_deduplicator) {

54 }	238 // Start with one bucket that represents the entire heap. Iterate by

55	239 // reference, because the pairs are going to point to allocation contexts

56 scoped_refptr<TracedValue> HeapDumpWriter::WriteHeapDump() {	240 // stored in \|allocations\|.

57 // Group by backtrace and by type ID, and compute the total heap size while	241 Bucket root_bucket;

58 // iterating anyway.	242 for (const auto& context_and_size : allocations) {

59 size_t total_size = 0;	243 const AllocationContext* context = &context_and_size.first;

60 hash_map<Backtrace, size_t> bytes_by_backtrace;	244 const size_t size = context_and_size.second;

61 hash_map<const char*, size_t> bytes_by_type;	245 root_bucket.allocations.push_back(std::make_pair(context, size));

62	246 root_bucket.size += size;

63 for (auto context_size : bytes_by_context_) {

64 total_size += context_size.second;

65 bytes_by_backtrace[context_size.first.backtrace] += context_size.second;

66 bytes_by_type[context_size.first.type_name] += context_size.second;

67 }	247 }

68	248

69 auto sorted_bytes_by_backtrace = SortBySizeDescending(bytes_by_backtrace);	249 std::set<HeapEntry> entries;

70 auto sorted_bytes_by_type = SortBySizeDescending(bytes_by_type);	250 InsertEntry(root_bucket, &entries, stack_frame_deduplicator,

71	251 type_name_deduplicator);

72 traced_value_->BeginArray("entries");	252

73	253 // Recursively break down the heap and fill \|entries\| with entries to dump.

74 // The global size, no column specified.	254 BreakDown(root_bucket, &entries, stack_frame_deduplicator,

75 {	255 type_name_deduplicator);

76 traced_value_->BeginDictionary();	256

77 WriteSize(total_size);	257 return entries;

78 traced_value_->EndDictionary();	258 }

	259

	260 scoped_refptr<TracedValue> Write(const std::set<HeapEntry>& entries) {

	261 std::string buffer;

	262 scoped_refptr<TracedValue> traced_value = new TracedValue;

	263

	264 traced_value->BeginArray("entries");

	265

	266 for (const HeapEntry& entry : entries) {

	267 traced_value->BeginDictionary();

	268

	269 // Format size as hexadecimal string into \|buffer\|.

	270 SStringPrintf(&buffer, "%" PRIx64, static_cast<uint64_t>(entry.size));

	271 traced_value->SetString("size", buffer);

	272

	273 if (entry.stack_frame_id == -1) {

	274 // An empty backtrace (which will have ID -1) is represented by the empty

	275 // string, because there is no leaf frame to reference in \|stackFrames\|.

	276 traced_value->SetString("bt", "");

	277 } else {

	278 // Format index of the leaf frame as a string, because \|stackFrames\| is a

	279 // dictionary, not an array.

	280 SStringPrintf(&buffer, "%i", entry.stack_frame_id);

	281 traced_value->SetString("bt", buffer);

	282 }

	283

	284 // Type ID -1 (cumulative size for all types) is represented by the absence

	285 // of the "type" key in the dictionary.

	286 if (entry.type_id != -1) {

	287 // Format the type ID as a string.

	288 SStringPrintf(&buffer, "%i", entry.type_id);

	289 traced_value->SetString("type", buffer);

	290 }

	291

	292 traced_value->EndDictionary();

79 }	293 }

80	294

81 // Entries with the size per backtrace.	295 traced_value->EndArray(); // "entries"

82 for (const auto& entry : sorted_bytes_by_backtrace) {	296 return traced_value;

83 traced_value_->BeginDictionary();	297 }

84 // Insert a forward reference to the backtrace that will be written to the	298

85 // \|stackFrames\| dictionary later on.	299 } // namespace heap_dump

86 int idx = stack_frame_deduplicator_->Insert(std::begin(entry.first.frames),	300

87 std::end(entry.first.frames));	301 scoped_refptr<TracedValue> WriteHeapDump(

88 WriteStackFrameIndex(idx);	302 const base::hash_map<AllocationContext, size_t>& allocations,

89 WriteSize(entry.second);	303 StackFrameDeduplicator* stack_frame_deduplicator,

90 traced_value_->EndDictionary();	304 TypeNameDeduplicator* type_name_deduplicator) {

91 }	305 return heap_dump::Write(heap_dump::Dump(allocations, stack_frame_deduplicator,

92	306 type_name_deduplicator));

93 // Entries with the size per type.

94 for (const auto& entry : sorted_bytes_by_type) {

95 traced_value_->BeginDictionary();

96 // Insert a forward reference to the type name that will be written to the

97 // trace when it is flushed.

98 WriteTypeId(type_name_deduplicator_->Insert(entry.first));

99 WriteSize(entry.second);

100 traced_value_->EndDictionary();

101 }

102

103 traced_value_->EndArray(); // "entries"

104

105 return traced_value_;

106 }

107

108 void HeapDumpWriter::WriteStackFrameIndex(int index) {

109 if (index == -1) {

110 // An empty backtrace (which will have index -1) is represented by the empty

111 // string, because there is no leaf frame to reference in \|stackFrames\|.

112 traced_value_->SetString("bt", "");

113 } else {

114 // Format index of the leaf frame as a string, because \|stackFrames\| is a

115 // dictionary, not an array.

116 SStringPrintf(&buffer_, "%i", index);

117 traced_value_->SetString("bt", buffer_);

118 }

119 }

120

121 void HeapDumpWriter::WriteTypeId(int type_id) {

122 // Format the type ID as a string.

123 SStringPrintf(&buffer_, "%i", type_id);

124 traced_value_->SetString("type", buffer_);

125 }

126

127 void HeapDumpWriter::WriteSize(size_t size) {

128 // Format size as hexadecimal string into \|buffer_\|.

129 SStringPrintf(&buffer_, "%" PRIx64, static_cast<uint64_t>(size));

130 traced_value_->SetString("size", buffer_);

131 }	307 }

132	308

133 } // namespace trace_event	309 } // namespace trace_event

134 } // namespace base	310 } // namespace base

OLD	NEW

« no previous file with comments | « base/trace_event/heap_profiler_heap_dump_writer.h ('k') | base/trace_event/heap_profiler_heap_dump_writer_unittest.cc » ('j') | no next file with comments »