Index: base/trace_event/memory_profiler_allocation_register.h |
diff --git a/base/trace_event/memory_profiler_allocation_register.h b/base/trace_event/memory_profiler_allocation_register.h |
new file mode 100644 |
index 0000000000000000000000000000000000000000..51d213302fb1e88ef84c4782b26758b640759d33 |
--- /dev/null |
+++ b/base/trace_event/memory_profiler_allocation_register.h |
@@ -0,0 +1,336 @@ |
+// Copyright 2015 The Chromium Authors. All rights reserved. |
+// Use of this source code is governed by a BSD-style license that can be |
+// found in the LICENSE file. |
+ |
+#ifndef BASE_TRACE_EVENT_MEMORY_PROFILER_ALLOCATION_REGISTER_H_ |
+#define BASE_TRACE_EVENT_MEMORY_PROFILER_ALLOCATION_REGISTER_H_ |
+ |
+#include <stdint.h> |
+ |
+#include "base/logging.h" |
+#include "base/trace_event/memory_profiler_allocation_context.h" |
+ |
+namespace base { |
+namespace trace_event { |
+ |
+// Returns the size of a page in bytes. |
+size_t BASE_EXPORT GetSystemPageSize(); |
Primiano Tucci (use gerrit)
2015/10/05 14:11:05
Just use GetPageSize() from base/process_metrics.h
Ruud van Asseldonk
2015/10/06 09:26:44
Done.
|
+ |
+size_t RoundUpToPageSize(size_t size); |
Primiano Tucci (use gerrit)
2015/10/05 14:11:05
Just use bits::Align(size, base::GetPageSize()) wh
Ruud van Asseldonk
2015/10/06 09:26:44
Done.
|
+ |
+// Whether to add guard pages around requested virtual address space. |
+enum class VirtualMemoryGuard { kNone, kGuardPageAfter }; |
+ |
+// Allocates a region of virtual address space of |min_size| rounded up to the |
+// system page size. The memory is zeroed by the system. |
+void* AllocateVirtualMemory(size_t min_size, VirtualMemoryGuard guard); |
Primiano Tucci (use gerrit)
2015/10/05 14:11:05
I think these (Alloc+free) should be private stati
Ruud van Asseldonk
2015/10/06 09:26:44
This is not possible if |AllocationlessHashTable|
|
+ |
+// Frees a region of virtual address space allocated by a call to |
+// |AllocateVirtualMemory|. |
+void FreeVirtualMemory(void* address, |
+ size_t allocated_min_size, |
+ VirtualMemoryGuard allocated_guard); |
+ |
+// An mmap-backed hash table that stores values indexed by |uintptr_t|. It is |
+// intended to be used as the implementation of the allocation register, and it |
+// is tailored specifically for this use case. The common case is that a value |
+// is inserted and removed after a while, lookup without modifying the table is |
+// not an intended use case. The hash table is implemented as an array of |
+// linked lists. The size of this array is fixed, but it does not limit the |
+// amount of values that can be stored. |
+// |
+// Replaying a recording of Chrome's allocations and frees against this hash |
+// table takes about 15% of the time that it takes to replay them against |
+// |std::map|. |
+// |
+// This is a templated class for clarity; it reads easier when the hash table |
+// terminology is not mixed with memory profiler terminology. |
+template <typename Value> |
Primiano Tucci (use gerrit)
2015/10/05 14:11:05
As you are going to have only one specialization o
|
+class AllocationlessHashTable { |
Primiano Tucci (use gerrit)
2015/10/05 14:11:06
From [1] "Use the specified order of declarations
|
+ friend class AllocationRegisterTest; |
+ |
+ private: |
Primiano Tucci (use gerrit)
2015/10/05 14:11:05
Use a using cell_index_type = uint32_t
and the ove
Ruud van Asseldonk
2015/10/06 09:26:44
Great idea, done.
|
+ // The table size, 2^18, approximately 260 000, has been tuned for Chrome's |
+ // typical number of outstanding allocations. (This number varies between |
+ // processes. Most processes have a sustained load of ~30k unfreed |
+ // allocations, but some processes have peeks around 100k-400k allocations.) |
+ // Because of the size of the table, it is likely that every |indices_| |
+ // access and every |cells_| access will incur a cache miss. Microbenchmarks |
+ // suggest that it is worthwile to use more memory for the table to avoid |
+ // chasing down the linked list, until the size is 2^18. |
+ const uint32_t table_size_ = 0x40000; |
Primiano Tucci (use gerrit)
2015/10/05 14:11:06
I think this should be kNumBuckets (regardless of
Ruud van Asseldonk
2015/10/06 09:26:44
Done.
|
+ const uint32_t table_size_mask_ = 0x3ffff; |
Primiano Tucci (use gerrit)
2015/10/05 14:11:06
kBucketsMask = kNumBuckets - 1?
Also you might add
Ruud van Asseldonk
2015/10/06 09:26:44
Done. Added a comment instead.
|
+ |
+ // Reserve address space to store at most this number of entries. High |
+ // capacity does not imply high memory usage due to the access pattern. The |
+ // only constraint on the capacity is that on 32-bit systems address space is |
+ // scarce (i.e. reserving 2GiB of address space for the entries is not an |
+ // option). A value of ~3M entries is large enough to handle spikes in the |
+ // number of allocations, and modest enough to require no more than a few |
+ // dozens of mebibytes of address space. |
Primiano Tucci (use gerrit)
2015/10/05 14:11:05
Just s/mebibytes/MiB/?
Ruud van Asseldonk
2015/10/06 09:26:45
Done.
|
+ const uint32_t capacity_ = table_size_ * 10; |
Primiano Tucci (use gerrit)
2015/10/05 14:11:05
kNumCells / kNumMaxCells ?
Ruud van Asseldonk
2015/10/06 09:26:44
Done.
|
+ |
+ // A cell can store a (key, value) pair. Cells are part of a linked list |
+ // via the |next| member. This list is either the list for a particular hash, |
+ // or the free list. All cells are contiguous in memory in one big array. |
+ // Therefore, on 64-bit systems, space can be saved by storing 32-bit indices |
+ // instead of pointers as links. Index 0 is used as the list terminator. |
+ struct Cell { |
+ uintptr_t key; |
+ uint32_t next; |
+ Value value; |
+ }; |
+ |
+ // The array of cells. This array is backed by mmapped memory. Lower indices |
+ // are accessed first, higher indices are only accessed when required. In |
+ // this way, even if a huge amount of address space has been mmapped, only |
+ // the cells that are actually used will be backed by physical memory. |
+ Cell* const cells_; |
+ |
+ // The array of indices into |cells_|. |indices_[Hash(key)]| will contain the |
+ // index of the head of the linked list for |Hash(key)|. A value of 0 |
+ // indicates an empty list. This array is backed by mmapped memory. |
+ uint32_t* const indices_; |
Primiano Tucci (use gerrit)
2015/10/05 14:11:06
This should really be named "buckets"
Ruud van Asseldonk
2015/10/06 09:26:44
I agree. Done.
|
+ |
+ // The head of the free list. This is the index of the cell. A value of 0 |
+ // means that the free list is empty. |
+ uint32_t free_; |
Primiano Tucci (use gerrit)
2015/10/05 14:11:05
free_list_
Ruud van Asseldonk
2015/10/06 09:26:44
Done.
|
+ |
+ // The index of the first cell that has not been used before. If the free |
+ // list is empty and a new cell is needed, the cell at this index is used. |
+ uint32_t fresh_; |
Primiano Tucci (use gerrit)
2015/10/05 14:11:05
next_unused_cell_
Also say in the comment "from th
Ruud van Asseldonk
2015/10/06 09:26:44
Done.
|
+ |
+ uint32_t Hash(uintptr_t key) { |
+ // The multiplicative hashing scheme from [Knuth 1998]. The value of |a| |
+ // has been chosen carefully based on measurements with real-word data |
+ // (addresses recorded from a Chrome trace run). It is the first prime |
+ // after 2^17. For |shift|, 13, 14 and 15 yield good results. These values |
+ // are tuned to the table size of 2^18. Microbenchmarks show that this |
+ // simple scheme outperforms fancy hashes like Murmur3 by 20 to 40 percent. |
+ const uintptr_t a = 131101; |
+ const uintptr_t shift = 14; |
+ const uintptr_t h = (key * a) >> shift; |
+ return static_cast<uint32_t>(h) & table_size_mask_; |
+ } |
+ |
+ // Returns a pointer to the variable that contains or should contain the |
+ // index of the cell that stores the entry for |key|. The pointer may point |
+ // at an element of |indices_| or at the |next| member of an element of |
+ // |cells_|. If the value pointed at is 0, |key| is not in the table. |
+ uint32_t* Lookup(uintptr_t key) { |
+ // The list head is in |indices_| at the hash offset. |
+ uint32_t* idx_ptr = indices_ + Hash(key); |
+ |
+ // Chase down the list until the cell that holds |key| is found, or until |
+ // the list ends. |
+ while (*idx_ptr != 0 && cells_[*idx_ptr].key != key) |
+ idx_ptr = &cells_[*idx_ptr].next; |
+ |
+ return idx_ptr; |
+ } |
+ |
+ // Returns the index of a cell that is not being used to store a value. |
+ uint32_t GetFreeCell() { |
+ // First try to re-use a cell from the freelist. |
+ if (free_) { |
+ uint32_t idx = free_; |
+ free_ = cells_[idx].next; |
+ return idx; |
+ } |
+ |
+ // Otherwise pick the next fresh cell. |
+ uint32_t idx = fresh_; |
+ fresh_++; |
+ |
+ // If the hash table has too little capacity (when too little address space |
+ // was reserved for |cells_|), |fresh_| can be an index outside of the |
+ // allocated storage. A guard page is allocated there to crash the program |
+ // in that case. There are alternative solutions: |
+ // - Deal with it, increase capacity by reallocating |cells_|. |
+ // - Refuse to insert and let the caller deal with it. |
+ // Because free cells are re-used before accessing fresh cells with a |
+ // higher index, and because reserving address space without touching it is |
+ // cheap, the simplest solution is to just allocate a humongous chunk of |
+ // address space. |
+ |
+ return idx; |
+ } |
+ |
+ public: |
+ AllocationlessHashTable() |
Primiano Tucci (use gerrit)
2015/10/05 14:11:05
Why this is inline? Move to the .cc
same for the d
Ruud van Asseldonk
2015/10/06 09:26:44
Because the class is templated.
|
+ // Reserve enough address space to store |capacity_| entries if |
+ // necessary, with a guard page after it to crash the program when |
+ // attempting to store more entries. |
+ : cells_(static_cast<Cell*>( |
+ AllocateVirtualMemory(capacity_ * sizeof(Cell), |
+ VirtualMemoryGuard::kGuardPageAfter))), |
+ |
+ // Allocate memory for the |indices_| array. There is no need for a |
+ // guard page because all indexing is masked with the table size. With |
+ // a table size of 2^18, the array will fill 256 pages of 4 KiB. |
+ indices_(static_cast<uint32_t*>( |
+ AllocateVirtualMemory(table_size_ * sizeof(uint32_t), |
+ VirtualMemoryGuard::kNone))) { |
+ // The free list is empty. The first fresh cell is cell 1, because index 0 |
+ // is used as list terminator. |
+ free_ = 0; |
+ fresh_ = 1; |
+ } |
+ |
+ ~AllocationlessHashTable() { |
+ FreeVirtualMemory(indices_, table_size_ * sizeof(uint32_t), |
+ VirtualMemoryGuard::kNone); |
+ FreeVirtualMemory(cells_, capacity_ * sizeof(Cell), |
+ VirtualMemoryGuard::kGuardPageAfter); |
+ } |
+ |
+ // Inserts the key into the table and returns a pointer to the associated |
+ // value. If the key was present already, a pointer to the current value is |
+ // returned. |key| must not be 0. (This is because 0 is used to mark free |
+ // cells, to allow efficient iteration of the hash table.) The value might |
+ // contain garbage from previous use. |
+ Value* Insert(uintptr_t key) { |
+ DCHECK(key != 0); |
+ |
+ uint32_t* idx_ptr = Lookup(key); |
+ |
+ // If the index is 0, the key is not yet present, so insert it. |
+ if (*idx_ptr == 0) { |
+ *idx_ptr = GetFreeCell(); |
+ |
+ cells_[*idx_ptr].key = key; |
+ cells_[*idx_ptr].next = 0; |
+ } |
+ |
+ return &cells_[*idx_ptr].value; |
+ } |
+ |
+ // Removes the key from the table if it is present. It is ok to call this with |
+ // 0 as key. |
+ void Remove(uintptr_t key) { |
+ uint32_t* idx_ptr = Lookup(key); |
+ |
+ // If the index is 0, the key was not there in the first place. |
+ if (*idx_ptr == 0) |
+ return; |
+ |
+ // The cell at the index is now free, remove it from the linked list for |
+ // |Hash(key)|. |
+ uint32_t free_idx = *idx_ptr; |
+ *idx_ptr = cells_[free_idx].next; |
+ |
+ // Put the free cell at the front of the free list. |
+ cells_[free_idx].next = free_; |
+ free_ = free_idx; |
+ |
+ // Reset the key, so that on iteration the free cell is ignored. |
+ cells_[free_idx].key = 0; |
+ } |
+ |
+ // An iterator that iterates entries in the hash table efficiently, but in no |
+ // particular order. It can do this by iterating the cells and ignoring the |
+ // linked lists altogether. Instead of checking whether a cell is in the free |
+ // list to see if it should be skipped, a key value of 0 is used to indicate |
+ // that a cell is free. |
+ class ConstIterator { |
+ friend class AllocationlessHashTable<Value>; |
+ |
+ private: |
+ const AllocationlessHashTable<Value>& table_; |
+ uint32_t index_; |
+ |
+ ConstIterator(const AllocationlessHashTable<Value>& table, uint32_t index) |
+ : table_(table), index_(index) {} |
+ |
+ public: |
+ void operator++() { |
+ // Find the next cell with a nonzero key until all cells that could |
+ // possibly be used have been iterated. Key 0 indicates a free cell. |
+ do |
+ index_++; |
+ while (index_ < table_.fresh_ && table_.cells_[index_].key == 0); |
+ }; |
+ |
+ bool operator!=(const ConstIterator& other) const { |
+ return index_ != other.index_; |
+ } |
+ |
+ std::pair<uintptr_t, const Value*> operator*() const { |
+ return std::make_pair(table_.cells_[index_].key, |
+ &table_.cells_[index_].value); |
+ } |
+ }; |
+ |
+ ConstIterator begin() const { |
+ // Initialize the iterator's index to 0. Cell 0 never stores an entry. |
+ ConstIterator iterator(*this, 0); |
+ // Incrementing will advance the iterator to the first used cell. |
+ ++iterator; |
+ return iterator; |
+ } |
+ |
+ ConstIterator end() const { |
+ // Cell |fresh_ - 1| is the last cell that could contain an entry, so |
+ // index |fresh_| is an iterator past the last element, in line with the |
+ // STL iterator conventions. |
+ return ConstIterator(*this, fresh_); |
+ } |
+}; |
+ |
+// The allocation register keeps track of all allocations that have not been |
+// freed. It is a thin wrapper around |AllocationlessHashTable|, so it does not |
+// call |malloc|. |
+class BASE_EXPORT AllocationRegister { |
+ friend class AllocationRegisterTest; |
Primiano Tucci (use gerrit)
2015/10/05 14:11:05
move this under private:
Ruud van Asseldonk
2015/10/06 09:26:44
Done.
|
+ |
+ private: |
+ // The value type for the (key, value) pairs stored in the hash table. |
+ struct Allocation { |
+ size_t size; |
+ AllocationContext context; |
+ }; |
+ |
+ AllocationlessHashTable<Allocation> allocations_; |
+ |
+ public: |
Primiano Tucci (use gerrit)
2015/10/05 14:11:06
From [1] "Use the specified order of declarations
Ruud van Asseldonk
2015/10/06 09:26:44
Not possible in this case, but I think it can be d
|
+ AllocationRegister(); |
+ ~AllocationRegister(); |
+ |
+ // Adds an allocation to the table of non-freed allocations. |
+ void Insert(void* address, size_t size, AllocationContext context); |
+ |
+ // Removes an allocation from the table of non-freed allocations. |
+ void Remove(void* address); |
+ |
+ // The type that an iterator dereferences to. |
+ struct AllocationRef { |
+ void* address; |
+ size_t size; |
+ const AllocationContext* context; |
+ }; |
+ |
+ class ConstIterator { |
+ friend class AllocationRegister; |
+ |
+ private: |
+ AllocationlessHashTable<Allocation>::ConstIterator iterator_; |
+ ConstIterator(AllocationlessHashTable<Allocation>::ConstIterator iterator) |
+ : iterator_(iterator) {} |
+ |
+ public: |
+ void operator++() { ++iterator_; } |
+ bool operator!=(const ConstIterator& other) const { |
+ return iterator_ != other.iterator_; |
+ }; |
+ |
+ AllocationRef operator*() const; |
+ }; |
+ |
+ ConstIterator begin() const { return ConstIterator(allocations_.begin()); } |
+ ConstIterator end() const { return ConstIterator(allocations_.end()); } |
+}; |
+ |
+} // namespace trace_event |
+} // namespace base |
+ |
+#endif // BASE_TRACE_EVENT_MEMORY_PROFILER_ALLOCATION_REGISTER_H_ |