base/debug/activity_tracker.h - Issue 2255503002: New cache for the activity tracker.

Unified Diff: base/debug/activity_tracker.h

Issue 2255503002: New cache for the activity tracker. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: fix broken pop operation Created 4 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: base/debug/activity_tracker.h

diff --git a/base/debug/activity_tracker.h b/base/debug/activity_tracker.h

index d6ff724cf2daee6106c3fbce75b9622f1d7a9eda..d7c8f6d3aaa3a852416cf8b129e744a69942d5cb 100644

--- a/base/debug/activity_tracker.h

+++ b/base/debug/activity_tracker.h

@@ -23,6 +23,7 @@

#include "base/base_export.h"

#include "base/location.h"

#include "base/metrics/persistent_memory_allocator.h"

+#include "base/threading/platform_thread.h"

#include "base/threading/thread_checker.h"

#include "base/threading/thread_local_storage.h"

@@ -41,6 +42,179 @@ namespace debug {

class ThreadActivityTracker;

+//=============================================================================

+// This class provides a lock-free queue of any atomic type with the

+// limitation that there must be at least one "invalid" value. This is

+// built as a completely generic type and can (hopefully) be moved to a

+// more generally useful place in the future.

+template <typename T>

+class LockFreeSimpleQueue {

+ public:

+ // Construct a simple lock-free queue with the specified |size| but

+ // not alowed to hold the |invalid_value|.

+ LockFreeSimpleQueue(size_t size, T invalid_value)

+ : size_(size + 1), invalid_value_(invalid_value), head_(0), tail_(0) {

+ DCHECK_LE(1U, size);

+ // Allocate memory for the queue value. Its size is the requested size +1

+ // because it can never be full (head == tail is reserved to mean "empty").

+ values_.reset(new std::atomic<T>[size_]);

+ // Ensure that the underlying atomics are also lock-free. This should

+ // evaluate to a constant at compile time and so produce no code, but

+ // a static_assert will not compile.

+ CHECK(head_.is_lock_free());

+ CHECK(values_[0].is_lock_free());

+ // All elements must be "invalid" to start in order for the push/pop

+ // operations to work.

+ for (size_t i = 0; i < size_; ++i)

+ values_[i].store(invalid_value_, std::memory_order_relaxed);

+ }

+ T invalid_value() { return invalid_value_; }

+ size_t size() { return size_ - 1; }

+ size_t used() {

+ return (head_.load(std::memory_order_relaxed) + size_ -

+ tail_.load(std::memory_order_relaxed)) %

+ size_;

+ }

+ bool empty() {

+ return empty(head_.load(std::memory_order_relaxed),

+ tail_.load(std::memory_order_relaxed));

+ }

+ bool full() {

+ return full(head_.load(std::memory_order_relaxed),

+ tail_.load(std::memory_order_relaxed));

+ }

+ // Adds a new |value| to the end of the queue and returns true on success

+ // or false if the stack was full.

+ bool push(T value);

+ // Retrieves the first value off the queue and returns it or the "invalid"

+ // value if the stack is empty.

+ T pop();

+ private:

+ // Reports if the stack is empty/full based on explicit head/tail values.

+ // Note that the % operaton in C/C++ is a "remainder" operator and thus will

+ // not provide correct "modular arithmetic" if the left value is negative;

+ // adding |size_| keeps it positive.

+ bool empty(size_t head, size_t tail) { return head == tail; }

+ bool full(size_t head, size_t tail) {

+ return (tail + size_ - 1) % size_ == head;

+ }

+ const size_t size_; // Size of the internal |values_|: requested + 1

+ const T invalid_value_; // A value not allowed to be stored.

+ std::atomic<size_t> head_; // One past the newest value; where to push.

+ std::atomic<size_t> tail_; // The oldest value; first to pop.

+ // Array holding pushed values.

+ std::unique_ptr<std::atomic<T>[]> values_;

+ DISALLOW_COPY_AND_ASSIGN(LockFreeSimpleQueue);

+};

+template <typename T>

+bool LockFreeSimpleQueue<T>::push(T value) {

+ // Pushing the "invalid" value is not allowed; it would cause an infinite

+ // loop in pop.

+ CHECK_NE(invalid_value_, value);

+ // Get the head of the stack and acquire its contents.

+ size_t head = head_.load(std::memory_order_acquire);

+ // In short: allocate a slot at the head of the queue, write the value to

+ // it, try again if anything gets in the way.

+ while (true) {

+ DCHECK_LE(0U, head);

+ DCHECK_GT(size_, head);

+ // If the stack is full, fail.

+ if (full(head, tail_.load(std::memory_order_relaxed)))

+ return false;

+ // The "head" is the critical resource so allocate a slot from the

+ // |values_| buffer at its current location, acquiring the value there.

+ // A "weak" operation is used because it's relatively trivial to try

+ // this operation again.

+ size_t slot = head;

+ if (!head_.compare_exchange_weak(slot, (head + 1) % size_,

+ std::memory_order_acquire,

+ std::memory_order_relaxed)) {

+ // The exchange will have loaded the latest "head" into |slot|.

+ head = slot;

+ continue;

+ }

+ // Save the value being pushed to the reserved slot, overwriting the

+ // "invalid" value that should be there. If it's not, it's because the

+ // slot was released by a pop() but that method hasn't yet extracted

+ // the value. Wait for it to do so. Use a "strong" exchange to avoid

+ // mistakenly releasing the CPU.

+ T expected_value = invalid_value_;

+ while (!values_[slot].compare_exchange_strong(expected_value, value,

+ std::memory_order_relaxed,

+ std::memory_order_relaxed)) {

+ PlatformThread::YieldCurrentThread();

+ expected_value = invalid_value_;

+ }

+ // Success!

+ return true;

+ }

+template <typename T>

+T LockFreeSimpleQueue<T>::pop() {

+ // Get the current number of elements on the stack.

+ size_t tail = tail_.load(std::memory_order_acquire);

+ // In short: deallocate the slot at the tail of the queue, read the value

manzagop (departed) 2016/08/23 19:46:08 I think it's possible that slot wasn't written to

bcwhite 2016/08/23 20:16:26 Either push could get written first but they'll ge

manzagop (departed) 2016/08/23 21:20:28 Sorry, I wasn't super clear. I mean that if there

bcwhite 2016/08/24 11:56:28 Let's see... 1) T1 push allocates slot X but does

manzagop (departed) 2016/08/24 13:17:52 That's my understanding as well. In the current us

That's my understanding as well. In the current use case it doesn't matter if the order isn't preserved. So we could go with a disclaimer comment.

bcwhite 2016/08/24 13:41:21 I think it goes without saying that parallel opera

On 2016/08/24 13:17:52, manzagop wrote: > On 2016/08/24 11:56:28, bcwhite wrote: > > On 2016/08/23 21:20:28, manzagop wrote: > > > On 2016/08/23 20:16:26, bcwhite wrote: > > > > On 2016/08/23 19:46:08, manzagop wrote: > > > > > I think it's possible that slot wasn't written to yet (a push is in > > flight). > > > > By > > > > > deallocating, we're making it possible for another push to happen > > (requires > > > 2 > > > > > pops), in which case either one of the push values could get written > > first? > > > > > > > > Either push could get written first but they'll get written to numbered > > slots > > > > based on the order in which they succeed in the increment-of-head > exchange. > > > > Waiting for the value to be stored is on line 207. > > > > > > > > Pop will wait for the lowest-numbered one to be written and return that, > > > though > > > > parallel pops could return in any order. > > > > > > Sorry, I wasn't super clear. I mean that if there are lots of pushes that > > > complete while that first push is in flight (granted that's unlikely) then > you > > > could wrap around and start performing a second push at the same location, > > while > > > the first is still in flight. Either of those might complete first. Again, > > it's > > > unlikely but possible? > > > > Let's see... > > > > 1) T1 push allocates slot X but doesn't write > > 2) other threads successfully push N-1 values (more will fail) > > 3) T2 pop deallocates slot X but can't read so yields > > 4) other threads successfully pop some values; queue no longer full > > 5) T3 push allocates contended slot X... > > > > Going forward, either T1 or T3 (but not both) can complete the write of slot X > > and T2 will read it. As soon as it's read, though, the other can then > complete > > its write at any time (we're back at step #1) and be available for the next > pop > > operation. > > > > Ordering would go to hell and we had two threads go into an extended stall > (for > > due to the scheduler and one waiting for it)... but I don't think anything > gets > > lost or otherwise breaks. > > That's my understanding as well. In the current use case it doesn't matter if > the order isn't preserved. So we could go with a disclaimer comment.

I think it goes without saying that parallel operations don't have a guaranteed order in any case.

+ // from it, try again if anything goes wrong.

+ while (true) {

+ DCHECK_LE(0U, tail);

+ DCHECK_GT(size_, tail);

+ // If the stack is empty, fail.

+ if (empty(head_.load(std::memory_order_relaxed), tail))

+ return invalid_value_;

+ // The "tail" is the critical resource so retrieve a slot from the

+ // |values_| buffer at its current location, acquiring the value there.

+ // A "weak" operation is used because it's relatively trivial to try

+ // this operation again.

+ size_t slot = tail;

+ if (!tail_.compare_exchange_weak(slot, (tail + 1) % size_,

+ std::memory_order_acquire,

+ std::memory_order_relaxed)) {

+ // The exchange will have loaded the latest "tail" into |slot|.

+ tail = slot;

+ continue;

+ }

+ // Read a value from the bottom of the queue, writing the "invalid" value

+ // in its place. If the retrieved value is invalid then the slot was

+ // acquired by push() but that method hasn't yet written the value. Wait

+ // for it to do so.

+ T value;

+ while ((value = values_[slot].exchange(

+ invalid_value_, std::memory_order_relaxed)) == invalid_value_) {

+ PlatformThread::YieldCurrentThread();

+ }

+ // Success!

+ DCHECK_NE(invalid_value_, value);

+ return value;

+ }

+//=============================================================================

enum : int {

// The maximum number of call-stack addresses stored per activity. This

// cannot be changed without also changing the version number of the

@@ -508,9 +682,7 @@ class BASE_EXPORT GlobalActivityTracker {

// These have to be lock-free because lock activity is tracked and causes

// re-entry problems.

std::atomic<int> thread_tracker_count_;

- std::atomic<int> available_memories_count_;

- std::atomic<PersistentMemoryAllocator::Reference>

- available_memories_[kMaxThreadCount];

+ LockFreeSimpleQueue<PersistentMemoryAllocator::Reference> available_memories_;

// The active global activity tracker.

static GlobalActivityTracker* g_tracker_;

« no previous file with comments | « no previous file | base/debug/activity_tracker.cc » ('j') | no next file with comments »