Chromium Code Reviews| Index: base/trace_event/memory_profiler_allocation_register.h |
| diff --git a/base/trace_event/memory_profiler_allocation_register.h b/base/trace_event/memory_profiler_allocation_register.h |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..51d213302fb1e88ef84c4782b26758b640759d33 |
| --- /dev/null |
| +++ b/base/trace_event/memory_profiler_allocation_register.h |
| @@ -0,0 +1,336 @@ |
| +// Copyright 2015 The Chromium Authors. All rights reserved. |
| +// Use of this source code is governed by a BSD-style license that can be |
| +// found in the LICENSE file. |
| + |
| +#ifndef BASE_TRACE_EVENT_MEMORY_PROFILER_ALLOCATION_REGISTER_H_ |
| +#define BASE_TRACE_EVENT_MEMORY_PROFILER_ALLOCATION_REGISTER_H_ |
| + |
| +#include <stdint.h> |
| + |
| +#include "base/logging.h" |
| +#include "base/trace_event/memory_profiler_allocation_context.h" |
| + |
| +namespace base { |
| +namespace trace_event { |
| + |
| +// Returns the size of a page in bytes. |
| +size_t BASE_EXPORT GetSystemPageSize(); |
|
Primiano Tucci (use gerrit)
2015/10/05 14:11:05
Just use GetPageSize() from base/process_metrics.h
Ruud van Asseldonk
2015/10/06 09:26:44
Done.
|
| + |
| +size_t RoundUpToPageSize(size_t size); |
|
Primiano Tucci (use gerrit)
2015/10/05 14:11:05
Just use bits::Align(size, base::GetPageSize()) wh
Ruud van Asseldonk
2015/10/06 09:26:44
Done.
|
| + |
| +// Whether to add guard pages around requested virtual address space. |
| +enum class VirtualMemoryGuard { kNone, kGuardPageAfter }; |
| + |
| +// Allocates a region of virtual address space of |min_size| rounded up to the |
| +// system page size. The memory is zeroed by the system. |
| +void* AllocateVirtualMemory(size_t min_size, VirtualMemoryGuard guard); |
|
Primiano Tucci (use gerrit)
2015/10/05 14:11:05
I think these (Alloc+free) should be private stati
Ruud van Asseldonk
2015/10/06 09:26:44
This is not possible if |AllocationlessHashTable|
|
| + |
| +// Frees a region of virtual address space allocated by a call to |
| +// |AllocateVirtualMemory|. |
| +void FreeVirtualMemory(void* address, |
| + size_t allocated_min_size, |
| + VirtualMemoryGuard allocated_guard); |
| + |
| +// An mmap-backed hash table that stores values indexed by |uintptr_t|. It is |
| +// intended to be used as the implementation of the allocation register, and it |
| +// is tailored specifically for this use case. The common case is that a value |
| +// is inserted and removed after a while, lookup without modifying the table is |
| +// not an intended use case. The hash table is implemented as an array of |
| +// linked lists. The size of this array is fixed, but it does not limit the |
| +// amount of values that can be stored. |
| +// |
| +// Replaying a recording of Chrome's allocations and frees against this hash |
| +// table takes about 15% of the time that it takes to replay them against |
| +// |std::map|. |
| +// |
| +// This is a templated class for clarity; it reads easier when the hash table |
| +// terminology is not mixed with memory profiler terminology. |
| +template <typename Value> |
|
Primiano Tucci (use gerrit)
2015/10/05 14:11:05
As you are going to have only one specialization o
|
| +class AllocationlessHashTable { |
|
Primiano Tucci (use gerrit)
2015/10/05 14:11:06
From [1] "Use the specified order of declarations
|
| + friend class AllocationRegisterTest; |
| + |
| + private: |
|
Primiano Tucci (use gerrit)
2015/10/05 14:11:05
Use a using cell_index_type = uint32_t
and the ove
Ruud van Asseldonk
2015/10/06 09:26:44
Great idea, done.
|
| + // The table size, 2^18, approximately 260 000, has been tuned for Chrome's |
| + // typical number of outstanding allocations. (This number varies between |
| + // processes. Most processes have a sustained load of ~30k unfreed |
| + // allocations, but some processes have peeks around 100k-400k allocations.) |
| + // Because of the size of the table, it is likely that every |indices_| |
| + // access and every |cells_| access will incur a cache miss. Microbenchmarks |
| + // suggest that it is worthwile to use more memory for the table to avoid |
| + // chasing down the linked list, until the size is 2^18. |
| + const uint32_t table_size_ = 0x40000; |
|
Primiano Tucci (use gerrit)
2015/10/05 14:11:06
I think this should be kNumBuckets (regardless of
Ruud van Asseldonk
2015/10/06 09:26:44
Done.
|
| + const uint32_t table_size_mask_ = 0x3ffff; |
|
Primiano Tucci (use gerrit)
2015/10/05 14:11:06
kBucketsMask = kNumBuckets - 1?
Also you might add
Ruud van Asseldonk
2015/10/06 09:26:44
Done. Added a comment instead.
|
| + |
| + // Reserve address space to store at most this number of entries. High |
| + // capacity does not imply high memory usage due to the access pattern. The |
| + // only constraint on the capacity is that on 32-bit systems address space is |
| + // scarce (i.e. reserving 2GiB of address space for the entries is not an |
| + // option). A value of ~3M entries is large enough to handle spikes in the |
| + // number of allocations, and modest enough to require no more than a few |
| + // dozens of mebibytes of address space. |
|
Primiano Tucci (use gerrit)
2015/10/05 14:11:05
Just s/mebibytes/MiB/?
Ruud van Asseldonk
2015/10/06 09:26:45
Done.
|
| + const uint32_t capacity_ = table_size_ * 10; |
|
Primiano Tucci (use gerrit)
2015/10/05 14:11:05
kNumCells / kNumMaxCells ?
Ruud van Asseldonk
2015/10/06 09:26:44
Done.
|
| + |
| + // A cell can store a (key, value) pair. Cells are part of a linked list |
| + // via the |next| member. This list is either the list for a particular hash, |
| + // or the free list. All cells are contiguous in memory in one big array. |
| + // Therefore, on 64-bit systems, space can be saved by storing 32-bit indices |
| + // instead of pointers as links. Index 0 is used as the list terminator. |
| + struct Cell { |
| + uintptr_t key; |
| + uint32_t next; |
| + Value value; |
| + }; |
| + |
| + // The array of cells. This array is backed by mmapped memory. Lower indices |
| + // are accessed first, higher indices are only accessed when required. In |
| + // this way, even if a huge amount of address space has been mmapped, only |
| + // the cells that are actually used will be backed by physical memory. |
| + Cell* const cells_; |
| + |
| + // The array of indices into |cells_|. |indices_[Hash(key)]| will contain the |
| + // index of the head of the linked list for |Hash(key)|. A value of 0 |
| + // indicates an empty list. This array is backed by mmapped memory. |
| + uint32_t* const indices_; |
|
Primiano Tucci (use gerrit)
2015/10/05 14:11:06
This should really be named "buckets"
Ruud van Asseldonk
2015/10/06 09:26:44
I agree. Done.
|
| + |
| + // The head of the free list. This is the index of the cell. A value of 0 |
| + // means that the free list is empty. |
| + uint32_t free_; |
|
Primiano Tucci (use gerrit)
2015/10/05 14:11:05
free_list_
Ruud van Asseldonk
2015/10/06 09:26:44
Done.
|
| + |
| + // The index of the first cell that has not been used before. If the free |
| + // list is empty and a new cell is needed, the cell at this index is used. |
| + uint32_t fresh_; |
|
Primiano Tucci (use gerrit)
2015/10/05 14:11:05
next_unused_cell_
Also say in the comment "from th
Ruud van Asseldonk
2015/10/06 09:26:44
Done.
|
| + |
| + uint32_t Hash(uintptr_t key) { |
| + // The multiplicative hashing scheme from [Knuth 1998]. The value of |a| |
| + // has been chosen carefully based on measurements with real-word data |
| + // (addresses recorded from a Chrome trace run). It is the first prime |
| + // after 2^17. For |shift|, 13, 14 and 15 yield good results. These values |
| + // are tuned to the table size of 2^18. Microbenchmarks show that this |
| + // simple scheme outperforms fancy hashes like Murmur3 by 20 to 40 percent. |
| + const uintptr_t a = 131101; |
| + const uintptr_t shift = 14; |
| + const uintptr_t h = (key * a) >> shift; |
| + return static_cast<uint32_t>(h) & table_size_mask_; |
| + } |
| + |
| + // Returns a pointer to the variable that contains or should contain the |
| + // index of the cell that stores the entry for |key|. The pointer may point |
| + // at an element of |indices_| or at the |next| member of an element of |
| + // |cells_|. If the value pointed at is 0, |key| is not in the table. |
| + uint32_t* Lookup(uintptr_t key) { |
| + // The list head is in |indices_| at the hash offset. |
| + uint32_t* idx_ptr = indices_ + Hash(key); |
| + |
| + // Chase down the list until the cell that holds |key| is found, or until |
| + // the list ends. |
| + while (*idx_ptr != 0 && cells_[*idx_ptr].key != key) |
| + idx_ptr = &cells_[*idx_ptr].next; |
| + |
| + return idx_ptr; |
| + } |
| + |
| + // Returns the index of a cell that is not being used to store a value. |
| + uint32_t GetFreeCell() { |
| + // First try to re-use a cell from the freelist. |
| + if (free_) { |
| + uint32_t idx = free_; |
| + free_ = cells_[idx].next; |
| + return idx; |
| + } |
| + |
| + // Otherwise pick the next fresh cell. |
| + uint32_t idx = fresh_; |
| + fresh_++; |
| + |
| + // If the hash table has too little capacity (when too little address space |
| + // was reserved for |cells_|), |fresh_| can be an index outside of the |
| + // allocated storage. A guard page is allocated there to crash the program |
| + // in that case. There are alternative solutions: |
| + // - Deal with it, increase capacity by reallocating |cells_|. |
| + // - Refuse to insert and let the caller deal with it. |
| + // Because free cells are re-used before accessing fresh cells with a |
| + // higher index, and because reserving address space without touching it is |
| + // cheap, the simplest solution is to just allocate a humongous chunk of |
| + // address space. |
| + |
| + return idx; |
| + } |
| + |
| + public: |
| + AllocationlessHashTable() |
|
Primiano Tucci (use gerrit)
2015/10/05 14:11:05
Why this is inline? Move to the .cc
same for the d
Ruud van Asseldonk
2015/10/06 09:26:44
Because the class is templated.
|
| + // Reserve enough address space to store |capacity_| entries if |
| + // necessary, with a guard page after it to crash the program when |
| + // attempting to store more entries. |
| + : cells_(static_cast<Cell*>( |
| + AllocateVirtualMemory(capacity_ * sizeof(Cell), |
| + VirtualMemoryGuard::kGuardPageAfter))), |
| + |
| + // Allocate memory for the |indices_| array. There is no need for a |
| + // guard page because all indexing is masked with the table size. With |
| + // a table size of 2^18, the array will fill 256 pages of 4 KiB. |
| + indices_(static_cast<uint32_t*>( |
| + AllocateVirtualMemory(table_size_ * sizeof(uint32_t), |
| + VirtualMemoryGuard::kNone))) { |
| + // The free list is empty. The first fresh cell is cell 1, because index 0 |
| + // is used as list terminator. |
| + free_ = 0; |
| + fresh_ = 1; |
| + } |
| + |
| + ~AllocationlessHashTable() { |
| + FreeVirtualMemory(indices_, table_size_ * sizeof(uint32_t), |
| + VirtualMemoryGuard::kNone); |
| + FreeVirtualMemory(cells_, capacity_ * sizeof(Cell), |
| + VirtualMemoryGuard::kGuardPageAfter); |
| + } |
| + |
| + // Inserts the key into the table and returns a pointer to the associated |
| + // value. If the key was present already, a pointer to the current value is |
| + // returned. |key| must not be 0. (This is because 0 is used to mark free |
| + // cells, to allow efficient iteration of the hash table.) The value might |
| + // contain garbage from previous use. |
| + Value* Insert(uintptr_t key) { |
| + DCHECK(key != 0); |
| + |
| + uint32_t* idx_ptr = Lookup(key); |
| + |
| + // If the index is 0, the key is not yet present, so insert it. |
| + if (*idx_ptr == 0) { |
| + *idx_ptr = GetFreeCell(); |
| + |
| + cells_[*idx_ptr].key = key; |
| + cells_[*idx_ptr].next = 0; |
| + } |
| + |
| + return &cells_[*idx_ptr].value; |
| + } |
| + |
| + // Removes the key from the table if it is present. It is ok to call this with |
| + // 0 as key. |
| + void Remove(uintptr_t key) { |
| + uint32_t* idx_ptr = Lookup(key); |
| + |
| + // If the index is 0, the key was not there in the first place. |
| + if (*idx_ptr == 0) |
| + return; |
| + |
| + // The cell at the index is now free, remove it from the linked list for |
| + // |Hash(key)|. |
| + uint32_t free_idx = *idx_ptr; |
| + *idx_ptr = cells_[free_idx].next; |
| + |
| + // Put the free cell at the front of the free list. |
| + cells_[free_idx].next = free_; |
| + free_ = free_idx; |
| + |
| + // Reset the key, so that on iteration the free cell is ignored. |
| + cells_[free_idx].key = 0; |
| + } |
| + |
| + // An iterator that iterates entries in the hash table efficiently, but in no |
| + // particular order. It can do this by iterating the cells and ignoring the |
| + // linked lists altogether. Instead of checking whether a cell is in the free |
| + // list to see if it should be skipped, a key value of 0 is used to indicate |
| + // that a cell is free. |
| + class ConstIterator { |
| + friend class AllocationlessHashTable<Value>; |
| + |
| + private: |
| + const AllocationlessHashTable<Value>& table_; |
| + uint32_t index_; |
| + |
| + ConstIterator(const AllocationlessHashTable<Value>& table, uint32_t index) |
| + : table_(table), index_(index) {} |
| + |
| + public: |
| + void operator++() { |
| + // Find the next cell with a nonzero key until all cells that could |
| + // possibly be used have been iterated. Key 0 indicates a free cell. |
| + do |
| + index_++; |
| + while (index_ < table_.fresh_ && table_.cells_[index_].key == 0); |
| + }; |
| + |
| + bool operator!=(const ConstIterator& other) const { |
| + return index_ != other.index_; |
| + } |
| + |
| + std::pair<uintptr_t, const Value*> operator*() const { |
| + return std::make_pair(table_.cells_[index_].key, |
| + &table_.cells_[index_].value); |
| + } |
| + }; |
| + |
| + ConstIterator begin() const { |
| + // Initialize the iterator's index to 0. Cell 0 never stores an entry. |
| + ConstIterator iterator(*this, 0); |
| + // Incrementing will advance the iterator to the first used cell. |
| + ++iterator; |
| + return iterator; |
| + } |
| + |
| + ConstIterator end() const { |
| + // Cell |fresh_ - 1| is the last cell that could contain an entry, so |
| + // index |fresh_| is an iterator past the last element, in line with the |
| + // STL iterator conventions. |
| + return ConstIterator(*this, fresh_); |
| + } |
| +}; |
| + |
| +// The allocation register keeps track of all allocations that have not been |
| +// freed. It is a thin wrapper around |AllocationlessHashTable|, so it does not |
| +// call |malloc|. |
| +class BASE_EXPORT AllocationRegister { |
| + friend class AllocationRegisterTest; |
|
Primiano Tucci (use gerrit)
2015/10/05 14:11:05
move this under private:
Ruud van Asseldonk
2015/10/06 09:26:44
Done.
|
| + |
| + private: |
| + // The value type for the (key, value) pairs stored in the hash table. |
| + struct Allocation { |
| + size_t size; |
| + AllocationContext context; |
| + }; |
| + |
| + AllocationlessHashTable<Allocation> allocations_; |
| + |
| + public: |
|
Primiano Tucci (use gerrit)
2015/10/05 14:11:06
From [1] "Use the specified order of declarations
Ruud van Asseldonk
2015/10/06 09:26:44
Not possible in this case, but I think it can be d
|
| + AllocationRegister(); |
| + ~AllocationRegister(); |
| + |
| + // Adds an allocation to the table of non-freed allocations. |
| + void Insert(void* address, size_t size, AllocationContext context); |
| + |
| + // Removes an allocation from the table of non-freed allocations. |
| + void Remove(void* address); |
| + |
| + // The type that an iterator dereferences to. |
| + struct AllocationRef { |
| + void* address; |
| + size_t size; |
| + const AllocationContext* context; |
| + }; |
| + |
| + class ConstIterator { |
| + friend class AllocationRegister; |
| + |
| + private: |
| + AllocationlessHashTable<Allocation>::ConstIterator iterator_; |
| + ConstIterator(AllocationlessHashTable<Allocation>::ConstIterator iterator) |
| + : iterator_(iterator) {} |
| + |
| + public: |
| + void operator++() { ++iterator_; } |
| + bool operator!=(const ConstIterator& other) const { |
| + return iterator_ != other.iterator_; |
| + }; |
| + |
| + AllocationRef operator*() const; |
| + }; |
| + |
| + ConstIterator begin() const { return ConstIterator(allocations_.begin()); } |
| + ConstIterator end() const { return ConstIterator(allocations_.end()); } |
| +}; |
| + |
| +} // namespace trace_event |
| +} // namespace base |
| + |
| +#endif // BASE_TRACE_EVENT_MEMORY_PROFILER_ALLOCATION_REGISTER_H_ |