base/trace_event/heap_profiler_heap_dump_writer.cc - Issue 2650863003: [tracing] Switch to new heap dump format.

Side by Side Diff: base/trace_event/heap_profiler_heap_dump_writer.cc

Issue 2650863003: [tracing] Switch to new heap dump format. (Closed)

Patch Set: Fix StringDeduplicator::Insert Created 3 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

« base/trace_event/heap_profiler_event_writer.cc ('K') | « base/trace_event/heap_profiler_heap_dump_writer.h ('k') | base/trace_event/heap_profiler_heap_dump_writer_unittest.cc » ('j') | base/trace_event/heap_profiler_stack_frame_deduplicator.h » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
	(Empty)
1 // Copyright 2015 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.

4

5 #include "base/trace_event/heap_profiler_heap_dump_writer.h"

6

7 #include <stdint.h>

8

9 #include <algorithm>

10 #include <iterator>

11 #include <tuple>

12 #include <utility>

13 #include <vector>

14

15 #include "base/format_macros.h"

16 #include "base/logging.h"

17 #include "base/macros.h"

18 #include "base/strings/stringprintf.h"

19 #include "base/trace_event/heap_profiler_stack_frame_deduplicator.h"

20 #include "base/trace_event/heap_profiler_type_name_deduplicator.h"

21 #include "base/trace_event/memory_dump_session_state.h"

22 #include "base/trace_event/trace_config.h"

23 #include "base/trace_event/trace_event_argument.h"

24 #include "base/trace_event/trace_log.h"

25

26 // Most of what the \|HeapDumpWriter\| does is aggregating detailed information

27 // about the heap and deciding what to dump. The Input to this process is a list

28 // of \|AllocationContext\|s and size pairs.

29 //

30 // The pairs are grouped into \|Bucket\|s. A bucket is a group of (context, size)

31 // pairs where the properties of the contexts share a prefix. (Type name is

32 // considered a list of length one here.) First all pairs are put into one

33 // bucket that represents the entire heap. Then this bucket is recursively

34 // broken down into smaller buckets. Each bucket keeps track of whether further

35 // breakdown is possible.

36

37 namespace base {

38 namespace trace_event {

39 namespace internal {

40 namespace {

41

42 // Denotes a property of \|AllocationContext\| to break down by.

43 enum class BreakDownMode { kByBacktrace, kByTypeName };

44

45 // A group of bytes for which the context shares a prefix.

46 struct Bucket {

47 Bucket()

48 : size(0),

49 count(0),

50 backtrace_cursor(0),

51 is_broken_down_by_type_name(false) {}

52

53 std::vector<std::pair<const AllocationContext*, AllocationMetrics>>

54 metrics_by_context;

55

56 // The sum of the sizes of \|metrics_by_context\|.

57 size_t size;

58

59 // The sum of number of allocations of \|metrics_by_context\|.

60 size_t count;

61

62 // The index of the stack frame that has not yet been broken down by. For all

63 // elements in this bucket, the stack frames 0 up to (but not including) the

64 // cursor, must be equal.

65 size_t backtrace_cursor;

66

67 // When true, the type name for all elements in this bucket must be equal.

68 bool is_broken_down_by_type_name;

69 };

70

71 // Comparison operator to order buckets by their size.

72 bool operator<(const Bucket& lhs, const Bucket& rhs) {

73 return lhs.size < rhs.size;

74 }

75

76 // Groups the allocations in the bucket by \|break_by\|. The buckets in the

77 // returned list will have \|backtrace_cursor\| advanced or

78 // \|is_broken_down_by_type_name\| set depending on the property to group by.

79 std::vector<Bucket> GetSubbuckets(const Bucket& bucket,

80 BreakDownMode break_by) {

81 base::hash_map<const void*, Bucket> breakdown;

82

83

84 if (break_by == BreakDownMode::kByBacktrace) {

85 for (const auto& context_and_metrics : bucket.metrics_by_context) {

86 const Backtrace& backtrace = context_and_metrics.first->backtrace;

87 const StackFrame* begin = std::begin(backtrace.frames);

88 const StackFrame* end = begin + backtrace.frame_count;

89 const StackFrame* cursor = begin + bucket.backtrace_cursor;

90

91 DCHECK_LE(cursor, end);

92

93 if (cursor != end) {

94 Bucket& subbucket = breakdown[cursor->value];

95 subbucket.size += context_and_metrics.second.size;

96 subbucket.count += context_and_metrics.second.count;

97 subbucket.metrics_by_context.push_back(context_and_metrics);

98 subbucket.backtrace_cursor = bucket.backtrace_cursor + 1;

99 subbucket.is_broken_down_by_type_name =

100 bucket.is_broken_down_by_type_name;

101 DCHECK_GT(subbucket.size, 0u);

102 DCHECK_GT(subbucket.count, 0u);

103 }

104 }

105 } else if (break_by == BreakDownMode::kByTypeName) {

106 if (!bucket.is_broken_down_by_type_name) {

107 for (const auto& context_and_metrics : bucket.metrics_by_context) {

108 const AllocationContext* context = context_and_metrics.first;

109 Bucket& subbucket = breakdown[context->type_name];

110 subbucket.size += context_and_metrics.second.size;

111 subbucket.count += context_and_metrics.second.count;

112 subbucket.metrics_by_context.push_back(context_and_metrics);

113 subbucket.backtrace_cursor = bucket.backtrace_cursor;

114 subbucket.is_broken_down_by_type_name = true;

115 DCHECK_GT(subbucket.size, 0u);

116 DCHECK_GT(subbucket.count, 0u);

117 }

118 }

119 }

120

121 std::vector<Bucket> buckets;

122 buckets.reserve(breakdown.size());

123 for (auto key_bucket : breakdown)

124 buckets.push_back(key_bucket.second);

125

126 return buckets;

127 }

128

129 // Breaks down the bucket by \|break_by\|. Returns only buckets that contribute

130 // more than \|min_size_bytes\| to the total size. The long tail is omitted.

131 std::vector<Bucket> BreakDownBy(const Bucket& bucket,

132 BreakDownMode break_by,

133 size_t min_size_bytes) {

134 std::vector<Bucket> buckets = GetSubbuckets(bucket, break_by);

135

136 // Ensure that \|buckets\| is a max-heap (the data structure, not memory heap),

137 // so its front contains the largest bucket. Buckets should be iterated

138 // ordered by size, but sorting the vector is overkill because the long tail

139 // of small buckets will be discarded. By using a max-heap, the optimal case

140 // where all but the first bucket are discarded is O(n). The worst case where

141 // no bucket is discarded is doing a heap sort, which is O(n log n).

142 std::make_heap(buckets.begin(), buckets.end());

143

144 // Keep including buckets until adding one would increase the number of

145 // bytes accounted for by \|min_size_bytes\|. The large buckets end up in

146 // [it, end()), [begin(), it) is the part that contains the max-heap

147 // of small buckets.

148 std::vector<Bucket>::iterator it;

149 for (it = buckets.end(); it != buckets.begin(); --it) {

150 if (buckets.front().size < min_size_bytes)

151 break;

152

153 // Put the largest bucket in [begin, it) at \|it - 1\| and max-heapify

154 // [begin, it - 1). This puts the next largest bucket at \|buckets.front()\|.

155 std::pop_heap(buckets.begin(), it);

156 }

157

158 // At this point, \|buckets\| looks like this (numbers are bucket sizes):

159 //

160 // <-- max-heap of small buckets --->

161 // <-- large buckets by ascending size -->

162 // [ 19 \| 11 \| 13 \| 7 \| 2 \| 5 \| ... \| 83 \| 89 \| 97 ]

163 // ^ ^ ^

164 // \| \| \|

165 // begin() it end()

166

167 // Discard the long tail of buckets that contribute less than a percent.

168 buckets.erase(buckets.begin(), it);

169

170 return buckets;

171 }

172

173 } // namespace

174

175 bool operator<(Entry lhs, Entry rhs) {

176 // There is no need to compare \|size\|. If the backtrace and type name are

177 // equal then the sizes must be equal as well.

178 return std::tie(lhs.stack_frame_id, lhs.type_id) <

179 std::tie(rhs.stack_frame_id, rhs.type_id);

180 }

181

182 HeapDumpWriter::HeapDumpWriter(StackFrameDeduplicator* stack_frame_deduplicator,

183 TypeNameDeduplicator* type_name_deduplicator,

184 uint32_t breakdown_threshold_bytes)

185 : stack_frame_deduplicator_(stack_frame_deduplicator),

186 type_name_deduplicator_(type_name_deduplicator),

187 breakdown_threshold_bytes_(breakdown_threshold_bytes) {

188 }

189

190 HeapDumpWriter::~HeapDumpWriter() {}

191

192 bool HeapDumpWriter::AddEntryForBucket(const Bucket& bucket) {

193 // The contexts in the bucket are all different, but the [begin, cursor) range

194 // is equal for all contexts in the bucket, and the type names are the same if

195 // \|is_broken_down_by_type_name\| is set.

196 DCHECK(!bucket.metrics_by_context.empty());

197

198 const AllocationContext* context = bucket.metrics_by_context.front().first;

199

200 const StackFrame* backtrace_begin = std::begin(context->backtrace.frames);

201 const StackFrame* backtrace_end = backtrace_begin + bucket.backtrace_cursor;

202 DCHECK_LE(bucket.backtrace_cursor, arraysize(context->backtrace.frames));

203

204 Entry entry;

205 entry.stack_frame_id = stack_frame_deduplicator_->Insert(

206 backtrace_begin, backtrace_end);

207

208 // Deduplicate the type name, or use ID -1 if type name is not set.

209 entry.type_id = bucket.is_broken_down_by_type_name

210 ? type_name_deduplicator_->Insert(context->type_name)

211 : -1;

212

213 entry.size = bucket.size;

214 entry.count = bucket.count;

215

216 auto position_and_inserted = entries_.insert(entry);

217 return position_and_inserted.second;

218 }

219

220 void HeapDumpWriter::BreakDown(const Bucket& bucket) {

221 auto by_backtrace = BreakDownBy(bucket,

222 BreakDownMode::kByBacktrace,

223 breakdown_threshold_bytes_);

224 auto by_type_name = BreakDownBy(bucket,

225 BreakDownMode::kByTypeName,

226 breakdown_threshold_bytes_);

227

228 // Insert entries for the buckets. If a bucket was not present before, it has

229 // not been broken down before, so recursively continue breaking down in that

230 // case. There might be multiple routes to the same entry (first break down

231 // by type name, then by backtrace, or first by backtrace and then by type),

232 // so a set is used to avoid dumping and breaking down entries more than once.

233

234 for (const Bucket& subbucket : by_backtrace)

235 if (AddEntryForBucket(subbucket))

236 BreakDown(subbucket);

237

238 for (const Bucket& subbucket : by_type_name)

239 if (AddEntryForBucket(subbucket))

240 BreakDown(subbucket);

241 }

242

243 const std::set<Entry>& HeapDumpWriter::Summarize(

244 const hash_map<AllocationContext, AllocationMetrics>& metrics_by_context) {

245 // Start with one bucket that represents the entire heap. Iterate by

246 // reference, because the allocation contexts are going to point to allocation

247 // contexts stored in \|metrics_by_context\|.

248 Bucket root_bucket;

249 for (const auto& context_and_metrics : metrics_by_context) {

250 DCHECK_GT(context_and_metrics.second.size, 0u);

251 DCHECK_GT(context_and_metrics.second.count, 0u);

252 const AllocationContext* context = &context_and_metrics.first;

253 root_bucket.metrics_by_context.push_back(

254 std::make_pair(context, context_and_metrics.second));

255 root_bucket.size += context_and_metrics.second.size;

256 root_bucket.count += context_and_metrics.second.count;

257 }

258

259 AddEntryForBucket(root_bucket);

260

261 // Recursively break down the heap and fill \|entries_\| with entries to dump.

262 BreakDown(root_bucket);

263

264 return entries_;

265 }

266

267 std::unique_ptr<TracedValue> Serialize(const std::set<Entry>& entries) {

268 std::string buffer;

269 std::unique_ptr<TracedValue> traced_value(new TracedValue);

270

271 traced_value->BeginArray("entries");

272

273 for (const Entry& entry : entries) {

274 traced_value->BeginDictionary();

275

276 // Format size as hexadecimal string into \|buffer\|.

277 SStringPrintf(&buffer, "%" PRIx64, static_cast<uint64_t>(entry.size));

278 traced_value->SetString("size", buffer);

279

280 SStringPrintf(&buffer, "%" PRIx64, static_cast<uint64_t>(entry.count));

281 traced_value->SetString("count", buffer);

282

283 if (entry.stack_frame_id == -1) {

284 // An empty backtrace (which will have ID -1) is represented by the empty

285 // string, because there is no leaf frame to reference in \|stackFrames\|.

286 traced_value->SetString("bt", "");

287 } else {

288 // Format index of the leaf frame as a string, because \|stackFrames\| is a

289 // dictionary, not an array.

290 SStringPrintf(&buffer, "%i", entry.stack_frame_id);

291 traced_value->SetString("bt", buffer);

292 }

293

294 // Type ID -1 (cumulative size for all types) is represented by the absence

295 // of the "type" key in the dictionary.

296 if (entry.type_id != -1) {

297 // Format the type ID as a string.

298 SStringPrintf(&buffer, "%i", entry.type_id);

299 traced_value->SetString("type", buffer);

300 }

301

302 traced_value->EndDictionary();

303 }

304

305 traced_value->EndArray(); // "entries"

306 return traced_value;

307 }

308

309 } // namespace internal

310

311 std::unique_ptr<TracedValue> ExportHeapDump(

312 const hash_map<AllocationContext, AllocationMetrics>& metrics_by_context,

313 const MemoryDumpSessionState& session_state) {

314 internal::HeapDumpWriter writer(

315 session_state.stack_frame_deduplicator(),

316 session_state.type_name_deduplicator(),

317 session_state.heap_profiler_breakdown_threshold_bytes());

318 return Serialize(writer.Summarize(metrics_by_context));

319 }

320

321 } // namespace trace_event

322 } // namespace base

OLD	NEW