base/trace_event/heap_profiler_heap_dump_writer.cc - Issue 2650863003: [tracing] Switch to new heap dump format.

Side by Side Diff: base/trace_event/heap_profiler_heap_dump_writer.cc

Issue 2650863003: [tracing] Switch to new heap dump format. (Closed)

Patch Set: Rebase Created 3 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
	(Empty)
1 // Copyright 2015 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.

4

5 #include "base/trace_event/heap_profiler_heap_dump_writer.h"

6

7 #include <stdint.h>

8

9 #include <algorithm>

10 #include <iterator>

11 #include <tuple>

12 #include <utility>

13 #include <vector>

14

15 #include "base/format_macros.h"

16 #include "base/logging.h"

17 #include "base/macros.h"

18 #include "base/strings/stringprintf.h"

19 #include "base/trace_event/heap_profiler_serialization_state.h"

20 #include "base/trace_event/heap_profiler_stack_frame_deduplicator.h"

21 #include "base/trace_event/heap_profiler_type_name_deduplicator.h"

22 #include "base/trace_event/trace_config.h"

23 #include "base/trace_event/trace_event_argument.h"

24 #include "base/trace_event/trace_log.h"

25

26 // Most of what the \|HeapDumpWriter\| does is aggregating detailed information

27 // about the heap and deciding what to dump. The Input to this process is a list

28 // of \|AllocationContext\|s and size pairs.

29 //

30 // The pairs are grouped into \|Bucket\|s. A bucket is a group of (context, size)

31 // pairs where the properties of the contexts share a prefix. (Type name is

32 // considered a list of length one here.) First all pairs are put into one

33 // bucket that represents the entire heap. Then this bucket is recursively

34 // broken down into smaller buckets. Each bucket keeps track of whether further

35 // breakdown is possible.

36

37 namespace base {

38 namespace trace_event {

39 namespace internal {

40 namespace {

41

42 // Denotes a property of \|AllocationContext\| to break down by.

43 enum class BreakDownMode { kByBacktrace, kByTypeName };

44

45 // A group of bytes for which the context shares a prefix.

46 struct Bucket {

47 Bucket()

48 : size(0),

49 count(0),

50 backtrace_cursor(0),

51 is_broken_down_by_type_name(false) {}

52

53 std::vector<std::pair<const AllocationContext*, AllocationMetrics>>

54 metrics_by_context;

55

56 // The sum of the sizes of \|metrics_by_context\|.

57 size_t size;

58

59 // The sum of number of allocations of \|metrics_by_context\|.

60 size_t count;

61

62 // The index of the stack frame that has not yet been broken down by. For all

63 // elements in this bucket, the stack frames 0 up to (but not including) the

64 // cursor, must be equal.

65 size_t backtrace_cursor;

66

67 // When true, the type name for all elements in this bucket must be equal.

68 bool is_broken_down_by_type_name;

69 };

70

71 // Comparison operator to order buckets by their size.

72 bool operator<(const Bucket& lhs, const Bucket& rhs) {

73 return lhs.size < rhs.size;

74 }

75

76 // Groups the allocations in the bucket by \|break_by\|. The buckets in the

77 // returned list will have \|backtrace_cursor\| advanced or

78 // \|is_broken_down_by_type_name\| set depending on the property to group by.

79 std::vector<Bucket> GetSubbuckets(const Bucket& bucket,

80 BreakDownMode break_by) {

81 std::unordered_map<const void*, Bucket> breakdown;

82

83 if (break_by == BreakDownMode::kByBacktrace) {

84 for (const auto& context_and_metrics : bucket.metrics_by_context) {

85 const Backtrace& backtrace = context_and_metrics.first->backtrace;

86 const StackFrame* begin = std::begin(backtrace.frames);

87 const StackFrame* end = begin + backtrace.frame_count;

88 const StackFrame* cursor = begin + bucket.backtrace_cursor;

89

90 DCHECK_LE(cursor, end);

91

92 if (cursor != end) {

93 Bucket& subbucket = breakdown[cursor->value];

94 subbucket.size += context_and_metrics.second.size;

95 subbucket.count += context_and_metrics.second.count;

96 subbucket.metrics_by_context.push_back(context_and_metrics);

97 subbucket.backtrace_cursor = bucket.backtrace_cursor + 1;

98 subbucket.is_broken_down_by_type_name =

99 bucket.is_broken_down_by_type_name;

100 DCHECK_GT(subbucket.size, 0u);

101 DCHECK_GT(subbucket.count, 0u);

102 }

103 }

104 } else if (break_by == BreakDownMode::kByTypeName) {

105 if (!bucket.is_broken_down_by_type_name) {

106 for (const auto& context_and_metrics : bucket.metrics_by_context) {

107 const AllocationContext* context = context_and_metrics.first;

108 Bucket& subbucket = breakdown[context->type_name];

109 subbucket.size += context_and_metrics.second.size;

110 subbucket.count += context_and_metrics.second.count;

111 subbucket.metrics_by_context.push_back(context_and_metrics);

112 subbucket.backtrace_cursor = bucket.backtrace_cursor;

113 subbucket.is_broken_down_by_type_name = true;

114 DCHECK_GT(subbucket.size, 0u);

115 DCHECK_GT(subbucket.count, 0u);

116 }

117 }

118 }

119

120 std::vector<Bucket> buckets;

121 buckets.reserve(breakdown.size());

122 for (auto key_bucket : breakdown)

123 buckets.push_back(key_bucket.second);

124

125 return buckets;

126 }

127

128 // Breaks down the bucket by \|break_by\|. Returns only buckets that contribute

129 // more than \|min_size_bytes\| to the total size. The long tail is omitted.

130 std::vector<Bucket> BreakDownBy(const Bucket& bucket,

131 BreakDownMode break_by,

132 size_t min_size_bytes) {

133 std::vector<Bucket> buckets = GetSubbuckets(bucket, break_by);

134

135 // Ensure that \|buckets\| is a max-heap (the data structure, not memory heap),

136 // so its front contains the largest bucket. Buckets should be iterated

137 // ordered by size, but sorting the vector is overkill because the long tail

138 // of small buckets will be discarded. By using a max-heap, the optimal case

139 // where all but the first bucket are discarded is O(n). The worst case where

140 // no bucket is discarded is doing a heap sort, which is O(n log n).

141 std::make_heap(buckets.begin(), buckets.end());

142

143 // Keep including buckets until adding one would increase the number of

144 // bytes accounted for by \|min_size_bytes\|. The large buckets end up in

145 // [it, end()), [begin(), it) is the part that contains the max-heap

146 // of small buckets.

147 std::vector<Bucket>::iterator it;

148 for (it = buckets.end(); it != buckets.begin(); --it) {

149 if (buckets.front().size < min_size_bytes)

150 break;

151

152 // Put the largest bucket in [begin, it) at \|it - 1\| and max-heapify

153 // [begin, it - 1). This puts the next largest bucket at \|buckets.front()\|.

154 std::pop_heap(buckets.begin(), it);

155 }

156

157 // At this point, \|buckets\| looks like this (numbers are bucket sizes):

158 //

159 // <-- max-heap of small buckets --->

160 // <-- large buckets by ascending size -->

161 // [ 19 \| 11 \| 13 \| 7 \| 2 \| 5 \| ... \| 83 \| 89 \| 97 ]

162 // ^ ^ ^

163 // \| \| \|

164 // begin() it end()

165

166 // Discard the long tail of buckets that contribute less than a percent.

167 buckets.erase(buckets.begin(), it);

168

169 return buckets;

170 }

171

172 } // namespace

173

174 bool operator<(Entry lhs, Entry rhs) {

175 // There is no need to compare \|size\|. If the backtrace and type name are

176 // equal then the sizes must be equal as well.

177 return std::tie(lhs.stack_frame_id, lhs.type_id) <

178 std::tie(rhs.stack_frame_id, rhs.type_id);

179 }

180

181 HeapDumpWriter::HeapDumpWriter(StackFrameDeduplicator* stack_frame_deduplicator,

182 TypeNameDeduplicator* type_name_deduplicator,

183 uint32_t breakdown_threshold_bytes)

184 : stack_frame_deduplicator_(stack_frame_deduplicator),

185 type_name_deduplicator_(type_name_deduplicator),

186 breakdown_threshold_bytes_(breakdown_threshold_bytes) {

187 }

188

189 HeapDumpWriter::~HeapDumpWriter() {}

190

191 bool HeapDumpWriter::AddEntryForBucket(const Bucket& bucket) {

192 // The contexts in the bucket are all different, but the [begin, cursor) range

193 // is equal for all contexts in the bucket, and the type names are the same if

194 // \|is_broken_down_by_type_name\| is set.

195 DCHECK(!bucket.metrics_by_context.empty());

196

197 const AllocationContext* context = bucket.metrics_by_context.front().first;

198

199 const StackFrame* backtrace_begin = std::begin(context->backtrace.frames);

200 const StackFrame* backtrace_end = backtrace_begin + bucket.backtrace_cursor;

201 DCHECK_LE(bucket.backtrace_cursor, arraysize(context->backtrace.frames));

202

203 Entry entry;

204 entry.stack_frame_id = stack_frame_deduplicator_->Insert(

205 backtrace_begin, backtrace_end);

206

207 // Deduplicate the type name, or use ID -1 if type name is not set.

208 entry.type_id = bucket.is_broken_down_by_type_name

209 ? type_name_deduplicator_->Insert(context->type_name)

210 : -1;

211

212 entry.size = bucket.size;

213 entry.count = bucket.count;

214

215 auto position_and_inserted = entries_.insert(entry);

216 return position_and_inserted.second;

217 }

218

219 void HeapDumpWriter::BreakDown(const Bucket& bucket) {

220 auto by_backtrace = BreakDownBy(bucket,

221 BreakDownMode::kByBacktrace,

222 breakdown_threshold_bytes_);

223 auto by_type_name = BreakDownBy(bucket,

224 BreakDownMode::kByTypeName,

225 breakdown_threshold_bytes_);

226

227 // Insert entries for the buckets. If a bucket was not present before, it has

228 // not been broken down before, so recursively continue breaking down in that

229 // case. There might be multiple routes to the same entry (first break down

230 // by type name, then by backtrace, or first by backtrace and then by type),

231 // so a set is used to avoid dumping and breaking down entries more than once.

232

233 for (const Bucket& subbucket : by_backtrace)

234 if (AddEntryForBucket(subbucket))

235 BreakDown(subbucket);

236

237 for (const Bucket& subbucket : by_type_name)

238 if (AddEntryForBucket(subbucket))

239 BreakDown(subbucket);

240 }

241

242 const std::set<Entry>& HeapDumpWriter::Summarize(

243 const std::unordered_map<AllocationContext, AllocationMetrics>&

244 metrics_by_context) {

245 // Start with one bucket that represents the entire heap. Iterate by

246 // reference, because the allocation contexts are going to point to allocation

247 // contexts stored in \|metrics_by_context\|.

248 Bucket root_bucket;

249 for (const auto& context_and_metrics : metrics_by_context) {

250 DCHECK_GT(context_and_metrics.second.size, 0u);

251 DCHECK_GT(context_and_metrics.second.count, 0u);

252 const AllocationContext* context = &context_and_metrics.first;

253 root_bucket.metrics_by_context.push_back(

254 std::make_pair(context, context_and_metrics.second));

255 root_bucket.size += context_and_metrics.second.size;

256 root_bucket.count += context_and_metrics.second.count;

257 }

258

259 AddEntryForBucket(root_bucket);

260

261 // Recursively break down the heap and fill \|entries_\| with entries to dump.

262 BreakDown(root_bucket);

263

264 return entries_;

265 }

266

267 std::unique_ptr<TracedValue> Serialize(const std::set<Entry>& entries) {

268 std::string buffer;

269 std::unique_ptr<TracedValue> traced_value(new TracedValue);

270

271 traced_value->BeginArray("entries");

272

273 for (const Entry& entry : entries) {

274 traced_value->BeginDictionary();

275

276 // Format size as hexadecimal string into \|buffer\|.

277 SStringPrintf(&buffer, "%" PRIx64, static_cast<uint64_t>(entry.size));

278 traced_value->SetString("size", buffer);

279

280 SStringPrintf(&buffer, "%" PRIx64, static_cast<uint64_t>(entry.count));

281 traced_value->SetString("count", buffer);

282

283 if (entry.stack_frame_id == -1) {

284 // An empty backtrace (which will have ID -1) is represented by the empty

285 // string, because there is no leaf frame to reference in \|stackFrames\|.

286 traced_value->SetString("bt", "");

287 } else {

288 // Format index of the leaf frame as a string, because \|stackFrames\| is a

289 // dictionary, not an array.

290 SStringPrintf(&buffer, "%i", entry.stack_frame_id);

291 traced_value->SetString("bt", buffer);

292 }

293

294 // Type ID -1 (cumulative size for all types) is represented by the absence

295 // of the "type" key in the dictionary.

296 if (entry.type_id != -1) {

297 // Format the type ID as a string.

298 SStringPrintf(&buffer, "%i", entry.type_id);

299 traced_value->SetString("type", buffer);

300 }

301

302 traced_value->EndDictionary();

303 }

304

305 traced_value->EndArray(); // "entries"

306 return traced_value;

307 }

308

309 } // namespace internal

310

311 std::unique_ptr<TracedValue> ExportHeapDump(

312 const std::unordered_map<AllocationContext, AllocationMetrics>&

313 metrics_by_context,

314 const HeapProfilerSerializationState& heap_profiler_serialization_state) {

315 internal::HeapDumpWriter writer(

316 heap_profiler_serialization_state.stack_frame_deduplicator(),

317 heap_profiler_serialization_state.type_name_deduplicator(),

318 heap_profiler_serialization_state

319 .heap_profiler_breakdown_threshold_bytes());

320 return Serialize(writer.Summarize(metrics_by_context));

321 }

322

323 } // namespace trace_event

324 } // namespace base

OLD	NEW

« no previous file with comments | « base/trace_event/heap_profiler_heap_dump_writer.h ('k') | base/trace_event/heap_profiler_heap_dump_writer_unittest.cc » ('j') | no next file with comments »