Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(667)

Unified Diff: base/trace_event/memory_profiler_allocation_register.h

Issue 1371053002: [Tracing] Add allocation register for heap profiling (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@backtrace
Patch Set: Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: base/trace_event/memory_profiler_allocation_register.h
diff --git a/base/trace_event/memory_profiler_allocation_register.h b/base/trace_event/memory_profiler_allocation_register.h
new file mode 100644
index 0000000000000000000000000000000000000000..51d213302fb1e88ef84c4782b26758b640759d33
--- /dev/null
+++ b/base/trace_event/memory_profiler_allocation_register.h
@@ -0,0 +1,336 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef BASE_TRACE_EVENT_MEMORY_PROFILER_ALLOCATION_REGISTER_H_
+#define BASE_TRACE_EVENT_MEMORY_PROFILER_ALLOCATION_REGISTER_H_
+
+#include <stdint.h>
+
+#include "base/logging.h"
+#include "base/trace_event/memory_profiler_allocation_context.h"
+
+namespace base {
+namespace trace_event {
+
+// Returns the size of a page in bytes.
+size_t BASE_EXPORT GetSystemPageSize();
Primiano Tucci (use gerrit) 2015/10/05 14:11:05 Just use GetPageSize() from base/process_metrics.h
Ruud van Asseldonk 2015/10/06 09:26:44 Done.
+
+size_t RoundUpToPageSize(size_t size);
Primiano Tucci (use gerrit) 2015/10/05 14:11:05 Just use bits::Align(size, base::GetPageSize()) wh
Ruud van Asseldonk 2015/10/06 09:26:44 Done.
+
+// Whether to add guard pages around requested virtual address space.
+enum class VirtualMemoryGuard { kNone, kGuardPageAfter };
+
+// Allocates a region of virtual address space of |min_size| rounded up to the
+// system page size. The memory is zeroed by the system.
+void* AllocateVirtualMemory(size_t min_size, VirtualMemoryGuard guard);
Primiano Tucci (use gerrit) 2015/10/05 14:11:05 I think these (Alloc+free) should be private stati
Ruud van Asseldonk 2015/10/06 09:26:44 This is not possible if |AllocationlessHashTable|
+
+// Frees a region of virtual address space allocated by a call to
+// |AllocateVirtualMemory|.
+void FreeVirtualMemory(void* address,
+ size_t allocated_min_size,
+ VirtualMemoryGuard allocated_guard);
+
+// An mmap-backed hash table that stores values indexed by |uintptr_t|. It is
+// intended to be used as the implementation of the allocation register, and it
+// is tailored specifically for this use case. The common case is that a value
+// is inserted and removed after a while, lookup without modifying the table is
+// not an intended use case. The hash table is implemented as an array of
+// linked lists. The size of this array is fixed, but it does not limit the
+// amount of values that can be stored.
+//
+// Replaying a recording of Chrome's allocations and frees against this hash
+// table takes about 15% of the time that it takes to replay them against
+// |std::map|.
+//
+// This is a templated class for clarity; it reads easier when the hash table
+// terminology is not mixed with memory profiler terminology.
+template <typename Value>
Primiano Tucci (use gerrit) 2015/10/05 14:11:05 As you are going to have only one specialization o
+class AllocationlessHashTable {
Primiano Tucci (use gerrit) 2015/10/05 14:11:06 From [1] "Use the specified order of declarations
+ friend class AllocationRegisterTest;
+
+ private:
Primiano Tucci (use gerrit) 2015/10/05 14:11:05 Use a using cell_index_type = uint32_t and the ove
Ruud van Asseldonk 2015/10/06 09:26:44 Great idea, done.
+ // The table size, 2^18, approximately 260 000, has been tuned for Chrome's
+ // typical number of outstanding allocations. (This number varies between
+ // processes. Most processes have a sustained load of ~30k unfreed
+ // allocations, but some processes have peeks around 100k-400k allocations.)
+ // Because of the size of the table, it is likely that every |indices_|
+ // access and every |cells_| access will incur a cache miss. Microbenchmarks
+ // suggest that it is worthwile to use more memory for the table to avoid
+ // chasing down the linked list, until the size is 2^18.
+ const uint32_t table_size_ = 0x40000;
Primiano Tucci (use gerrit) 2015/10/05 14:11:06 I think this should be kNumBuckets (regardless of
Ruud van Asseldonk 2015/10/06 09:26:44 Done.
+ const uint32_t table_size_mask_ = 0x3ffff;
Primiano Tucci (use gerrit) 2015/10/05 14:11:06 kBucketsMask = kNumBuckets - 1? Also you might add
Ruud van Asseldonk 2015/10/06 09:26:44 Done. Added a comment instead.
+
+ // Reserve address space to store at most this number of entries. High
+ // capacity does not imply high memory usage due to the access pattern. The
+ // only constraint on the capacity is that on 32-bit systems address space is
+ // scarce (i.e. reserving 2GiB of address space for the entries is not an
+ // option). A value of ~3M entries is large enough to handle spikes in the
+ // number of allocations, and modest enough to require no more than a few
+ // dozens of mebibytes of address space.
Primiano Tucci (use gerrit) 2015/10/05 14:11:05 Just s/mebibytes/MiB/?
Ruud van Asseldonk 2015/10/06 09:26:45 Done.
+ const uint32_t capacity_ = table_size_ * 10;
Primiano Tucci (use gerrit) 2015/10/05 14:11:05 kNumCells / kNumMaxCells ?
Ruud van Asseldonk 2015/10/06 09:26:44 Done.
+
+ // A cell can store a (key, value) pair. Cells are part of a linked list
+ // via the |next| member. This list is either the list for a particular hash,
+ // or the free list. All cells are contiguous in memory in one big array.
+ // Therefore, on 64-bit systems, space can be saved by storing 32-bit indices
+ // instead of pointers as links. Index 0 is used as the list terminator.
+ struct Cell {
+ uintptr_t key;
+ uint32_t next;
+ Value value;
+ };
+
+ // The array of cells. This array is backed by mmapped memory. Lower indices
+ // are accessed first, higher indices are only accessed when required. In
+ // this way, even if a huge amount of address space has been mmapped, only
+ // the cells that are actually used will be backed by physical memory.
+ Cell* const cells_;
+
+ // The array of indices into |cells_|. |indices_[Hash(key)]| will contain the
+ // index of the head of the linked list for |Hash(key)|. A value of 0
+ // indicates an empty list. This array is backed by mmapped memory.
+ uint32_t* const indices_;
Primiano Tucci (use gerrit) 2015/10/05 14:11:06 This should really be named "buckets"
Ruud van Asseldonk 2015/10/06 09:26:44 I agree. Done.
+
+ // The head of the free list. This is the index of the cell. A value of 0
+ // means that the free list is empty.
+ uint32_t free_;
Primiano Tucci (use gerrit) 2015/10/05 14:11:05 free_list_
Ruud van Asseldonk 2015/10/06 09:26:44 Done.
+
+ // The index of the first cell that has not been used before. If the free
+ // list is empty and a new cell is needed, the cell at this index is used.
+ uint32_t fresh_;
Primiano Tucci (use gerrit) 2015/10/05 14:11:05 next_unused_cell_ Also say in the comment "from th
Ruud van Asseldonk 2015/10/06 09:26:44 Done.
+
+ uint32_t Hash(uintptr_t key) {
+ // The multiplicative hashing scheme from [Knuth 1998]. The value of |a|
+ // has been chosen carefully based on measurements with real-word data
+ // (addresses recorded from a Chrome trace run). It is the first prime
+ // after 2^17. For |shift|, 13, 14 and 15 yield good results. These values
+ // are tuned to the table size of 2^18. Microbenchmarks show that this
+ // simple scheme outperforms fancy hashes like Murmur3 by 20 to 40 percent.
+ const uintptr_t a = 131101;
+ const uintptr_t shift = 14;
+ const uintptr_t h = (key * a) >> shift;
+ return static_cast<uint32_t>(h) & table_size_mask_;
+ }
+
+ // Returns a pointer to the variable that contains or should contain the
+ // index of the cell that stores the entry for |key|. The pointer may point
+ // at an element of |indices_| or at the |next| member of an element of
+ // |cells_|. If the value pointed at is 0, |key| is not in the table.
+ uint32_t* Lookup(uintptr_t key) {
+ // The list head is in |indices_| at the hash offset.
+ uint32_t* idx_ptr = indices_ + Hash(key);
+
+ // Chase down the list until the cell that holds |key| is found, or until
+ // the list ends.
+ while (*idx_ptr != 0 && cells_[*idx_ptr].key != key)
+ idx_ptr = &cells_[*idx_ptr].next;
+
+ return idx_ptr;
+ }
+
+ // Returns the index of a cell that is not being used to store a value.
+ uint32_t GetFreeCell() {
+ // First try to re-use a cell from the freelist.
+ if (free_) {
+ uint32_t idx = free_;
+ free_ = cells_[idx].next;
+ return idx;
+ }
+
+ // Otherwise pick the next fresh cell.
+ uint32_t idx = fresh_;
+ fresh_++;
+
+ // If the hash table has too little capacity (when too little address space
+ // was reserved for |cells_|), |fresh_| can be an index outside of the
+ // allocated storage. A guard page is allocated there to crash the program
+ // in that case. There are alternative solutions:
+ // - Deal with it, increase capacity by reallocating |cells_|.
+ // - Refuse to insert and let the caller deal with it.
+ // Because free cells are re-used before accessing fresh cells with a
+ // higher index, and because reserving address space without touching it is
+ // cheap, the simplest solution is to just allocate a humongous chunk of
+ // address space.
+
+ return idx;
+ }
+
+ public:
+ AllocationlessHashTable()
Primiano Tucci (use gerrit) 2015/10/05 14:11:05 Why this is inline? Move to the .cc same for the d
Ruud van Asseldonk 2015/10/06 09:26:44 Because the class is templated.
+ // Reserve enough address space to store |capacity_| entries if
+ // necessary, with a guard page after it to crash the program when
+ // attempting to store more entries.
+ : cells_(static_cast<Cell*>(
+ AllocateVirtualMemory(capacity_ * sizeof(Cell),
+ VirtualMemoryGuard::kGuardPageAfter))),
+
+ // Allocate memory for the |indices_| array. There is no need for a
+ // guard page because all indexing is masked with the table size. With
+ // a table size of 2^18, the array will fill 256 pages of 4 KiB.
+ indices_(static_cast<uint32_t*>(
+ AllocateVirtualMemory(table_size_ * sizeof(uint32_t),
+ VirtualMemoryGuard::kNone))) {
+ // The free list is empty. The first fresh cell is cell 1, because index 0
+ // is used as list terminator.
+ free_ = 0;
+ fresh_ = 1;
+ }
+
+ ~AllocationlessHashTable() {
+ FreeVirtualMemory(indices_, table_size_ * sizeof(uint32_t),
+ VirtualMemoryGuard::kNone);
+ FreeVirtualMemory(cells_, capacity_ * sizeof(Cell),
+ VirtualMemoryGuard::kGuardPageAfter);
+ }
+
+ // Inserts the key into the table and returns a pointer to the associated
+ // value. If the key was present already, a pointer to the current value is
+ // returned. |key| must not be 0. (This is because 0 is used to mark free
+ // cells, to allow efficient iteration of the hash table.) The value might
+ // contain garbage from previous use.
+ Value* Insert(uintptr_t key) {
+ DCHECK(key != 0);
+
+ uint32_t* idx_ptr = Lookup(key);
+
+ // If the index is 0, the key is not yet present, so insert it.
+ if (*idx_ptr == 0) {
+ *idx_ptr = GetFreeCell();
+
+ cells_[*idx_ptr].key = key;
+ cells_[*idx_ptr].next = 0;
+ }
+
+ return &cells_[*idx_ptr].value;
+ }
+
+ // Removes the key from the table if it is present. It is ok to call this with
+ // 0 as key.
+ void Remove(uintptr_t key) {
+ uint32_t* idx_ptr = Lookup(key);
+
+ // If the index is 0, the key was not there in the first place.
+ if (*idx_ptr == 0)
+ return;
+
+ // The cell at the index is now free, remove it from the linked list for
+ // |Hash(key)|.
+ uint32_t free_idx = *idx_ptr;
+ *idx_ptr = cells_[free_idx].next;
+
+ // Put the free cell at the front of the free list.
+ cells_[free_idx].next = free_;
+ free_ = free_idx;
+
+ // Reset the key, so that on iteration the free cell is ignored.
+ cells_[free_idx].key = 0;
+ }
+
+ // An iterator that iterates entries in the hash table efficiently, but in no
+ // particular order. It can do this by iterating the cells and ignoring the
+ // linked lists altogether. Instead of checking whether a cell is in the free
+ // list to see if it should be skipped, a key value of 0 is used to indicate
+ // that a cell is free.
+ class ConstIterator {
+ friend class AllocationlessHashTable<Value>;
+
+ private:
+ const AllocationlessHashTable<Value>& table_;
+ uint32_t index_;
+
+ ConstIterator(const AllocationlessHashTable<Value>& table, uint32_t index)
+ : table_(table), index_(index) {}
+
+ public:
+ void operator++() {
+ // Find the next cell with a nonzero key until all cells that could
+ // possibly be used have been iterated. Key 0 indicates a free cell.
+ do
+ index_++;
+ while (index_ < table_.fresh_ && table_.cells_[index_].key == 0);
+ };
+
+ bool operator!=(const ConstIterator& other) const {
+ return index_ != other.index_;
+ }
+
+ std::pair<uintptr_t, const Value*> operator*() const {
+ return std::make_pair(table_.cells_[index_].key,
+ &table_.cells_[index_].value);
+ }
+ };
+
+ ConstIterator begin() const {
+ // Initialize the iterator's index to 0. Cell 0 never stores an entry.
+ ConstIterator iterator(*this, 0);
+ // Incrementing will advance the iterator to the first used cell.
+ ++iterator;
+ return iterator;
+ }
+
+ ConstIterator end() const {
+ // Cell |fresh_ - 1| is the last cell that could contain an entry, so
+ // index |fresh_| is an iterator past the last element, in line with the
+ // STL iterator conventions.
+ return ConstIterator(*this, fresh_);
+ }
+};
+
+// The allocation register keeps track of all allocations that have not been
+// freed. It is a thin wrapper around |AllocationlessHashTable|, so it does not
+// call |malloc|.
+class BASE_EXPORT AllocationRegister {
+ friend class AllocationRegisterTest;
Primiano Tucci (use gerrit) 2015/10/05 14:11:05 move this under private:
Ruud van Asseldonk 2015/10/06 09:26:44 Done.
+
+ private:
+ // The value type for the (key, value) pairs stored in the hash table.
+ struct Allocation {
+ size_t size;
+ AllocationContext context;
+ };
+
+ AllocationlessHashTable<Allocation> allocations_;
+
+ public:
Primiano Tucci (use gerrit) 2015/10/05 14:11:06 From [1] "Use the specified order of declarations
Ruud van Asseldonk 2015/10/06 09:26:44 Not possible in this case, but I think it can be d
+ AllocationRegister();
+ ~AllocationRegister();
+
+ // Adds an allocation to the table of non-freed allocations.
+ void Insert(void* address, size_t size, AllocationContext context);
+
+ // Removes an allocation from the table of non-freed allocations.
+ void Remove(void* address);
+
+ // The type that an iterator dereferences to.
+ struct AllocationRef {
+ void* address;
+ size_t size;
+ const AllocationContext* context;
+ };
+
+ class ConstIterator {
+ friend class AllocationRegister;
+
+ private:
+ AllocationlessHashTable<Allocation>::ConstIterator iterator_;
+ ConstIterator(AllocationlessHashTable<Allocation>::ConstIterator iterator)
+ : iterator_(iterator) {}
+
+ public:
+ void operator++() { ++iterator_; }
+ bool operator!=(const ConstIterator& other) const {
+ return iterator_ != other.iterator_;
+ };
+
+ AllocationRef operator*() const;
+ };
+
+ ConstIterator begin() const { return ConstIterator(allocations_.begin()); }
+ ConstIterator end() const { return ConstIterator(allocations_.end()); }
+};
+
+} // namespace trace_event
+} // namespace base
+
+#endif // BASE_TRACE_EVENT_MEMORY_PROFILER_ALLOCATION_REGISTER_H_

Powered by Google App Engine
This is Rietveld 408576698