base/metrics/sample_vector.cc - Issue 2811713003: Embed a single sample in histogram metadata.

Unified Diff: base/metrics/sample_vector.cc

Issue 2811713003: Embed a single sample in histogram metadata. (Closed)

Patch Set: addressed review comments by asvitkine Created 3 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: base/metrics/sample_vector.cc

diff --git a/base/metrics/sample_vector.cc b/base/metrics/sample_vector.cc

index 477b8aff8c70386848d197ac04b3c87cd1efe942..f55fb947751a85f8358dcec15ba787bd5b23649a 100644

--- a/base/metrics/sample_vector.cc

+++ b/base/metrics/sample_vector.cc

@@ -4,103 +4,204 @@

#include "base/metrics/sample_vector.h"

+#include "base/lazy_instance.h"

#include "base/logging.h"

+#include "base/memory/ptr_util.h"

#include "base/metrics/bucket_ranges.h"

+#include "base/metrics/persistent_memory_allocator.h"

+#include "base/synchronization/lock.h"

+#include "base/threading/platform_thread.h"

+// This SampleVector makes use of the single-sample embedded in the base

+// HistogramSamples class. If the count is non-zero then there is guaranteed

+// (within the bounds of "eventual consistency") to be no allocated external

+// storage. Once the full counts storage is allocated, the single-sample must

+// be extracted and disabled.

namespace base {

typedef HistogramBase::Count Count;

typedef HistogramBase::Sample Sample;

-SampleVector::SampleVector(const BucketRanges* bucket_ranges)

- : SampleVector(0, bucket_ranges) {}

-SampleVector::SampleVector(uint64_t id, const BucketRanges* bucket_ranges)

+SampleVectorBase::SampleVectorBase(uint64_t id,

+ const BucketRanges* bucket_ranges)

: HistogramSamples(id),

- local_counts_(bucket_ranges->bucket_count()),

- counts_(&local_counts_[0]),

- counts_size_(local_counts_.size()),

+ counts_size_(bucket_ranges->bucket_count()),

bucket_ranges_(bucket_ranges) {

CHECK_GE(bucket_ranges_->bucket_count(), 1u);

}

-SampleVector::SampleVector(uint64_t id,

- HistogramBase::AtomicCount* counts,

- size_t counts_size,

- Metadata* meta,

- const BucketRanges* bucket_ranges)

+SampleVectorBase::SampleVectorBase(uint64_t id,

+ Metadata* meta,

+ const BucketRanges* bucket_ranges)

: HistogramSamples(id, meta),

- counts_(counts),

counts_size_(bucket_ranges->bucket_count()),

bucket_ranges_(bucket_ranges) {

- CHECK_LE(bucket_ranges_->bucket_count(), counts_size_);

CHECK_GE(bucket_ranges_->bucket_count(), 1u);

}

-SampleVector::~SampleVector() {}

+SampleVectorBase::~SampleVectorBase() {}

+void SampleVectorBase::Accumulate(Sample value, Count count) {

+ // Handle the single-sample case.

+ if (!counts()) {

Alexei Svitkine (slow) 2017/04/18 20:52:20 Nit: The multi-sample case is less code - so rever

bcwhite 2017/04/19 18:13:02 This case falls through to the multi-sample case i

+ // Try to accumulate the parameters into the single-count entry.

+ if (AccumulateSingleSample(value, count)) {

+ // A race condition could lead to a new single-sample being accumulated

+ // above just after another thread executed the MountCountsStorage()

+ // below. Since it is mounted, it could be mounted elsewhere and have

+ // values written to it. It's not allowed to have both a single-sample

+ // and entries in the counts array so move the single-sample.

+ if (counts())

+ MoveSingleSampleToCounts();

-void SampleVector::Accumulate(Sample value, Count count) {

+ return;

+ }

+ // Need real storage to store both what was in the single-sample plus the

+ // parameter information.

+ MountCountsStorage();

Alexei Svitkine (slow) 2017/04/18 20:52:21 Can the name of this function be changed so it's c

bcwhite 2017/04/19 18:13:02 Sure.

+ }

+ // Handle the multi-sample case.

size_t bucket_index = GetBucketIndex(value);

- subtle::NoBarrier_AtomicIncrement(&counts_[bucket_index], count);

- IncreaseSum(static_cast<int64_t>(count) * value);

- IncreaseRedundantCount(count);

+ subtle::NoBarrier_AtomicIncrement(&counts()[bucket_index], count);

+ IncreaseSumAndCount(static_cast<int64_t>(count) * value, count);

}

-Count SampleVector::GetCount(Sample value) const {

- size_t bucket_index = GetBucketIndex(value);

- return subtle::NoBarrier_Load(&counts_[bucket_index]);

+Count SampleVectorBase::GetCount(Sample value) const {

+ return GetCountAtIndex(GetBucketIndex(value));

}

-Count SampleVector::TotalCount() const {

- Count count = 0;

- for (size_t i = 0; i < counts_size_; i++) {

- count += subtle::NoBarrier_Load(&counts_[i]);

+Count SampleVectorBase::TotalCount() const {

+ // Handle the single-sample case.

+ SingleSample sample = single_sample().Load();

+ if (sample.count)

+ return sample.count;

+ // Handle the multi-sample case.

+ if (counts() || MountExistingCountsStorage()) {

+ Count count = 0;

+ const HistogramBase::AtomicCount* counts_array = counts();

+ for (size_t i = 0; i < counts_size_; i++) {

+ count += subtle::NoBarrier_Load(&counts_array[i]);

+ }

+ return count;

}

- return count;

+ // And the no-value case.

+ return 0;

}

-Count SampleVector::GetCountAtIndex(size_t bucket_index) const {

+Count SampleVectorBase::GetCountAtIndex(size_t bucket_index) const {

DCHECK(bucket_index < counts_size_);

- return subtle::NoBarrier_Load(&counts_[bucket_index]);

+ // Handle the single-sample case.

+ SingleSample sample = single_sample().Load();

+ if (sample.count != 0) {

+ if (sample.value >= bucket_ranges_->range(bucket_index) &&

+ sample.value < bucket_ranges_->range(bucket_index + 1)) {

+ return sample.count;

+ }

+ return 0;

+ }

+ // Handle the multi-sample case.

+ if (counts() || MountExistingCountsStorage())

+ return subtle::NoBarrier_Load(&counts()[bucket_index]);

+ // And the no-value case.

+ return 0;

}

-std::unique_ptr<SampleCountIterator> SampleVector::Iterator() const {

- return std::unique_ptr<SampleCountIterator>(

- new SampleVectorIterator(counts_, counts_size_, bucket_ranges_));

+std::unique_ptr<SampleCountIterator> SampleVectorBase::Iterator() const {

+ // Handle the single-sample case.

+ SingleSample sample = single_sample().Load();

+ if (sample.count != 0) {

+ size_t index = GetBucketIndex(sample.value);

+ return MakeUnique<SingleSampleIterator>(bucket_ranges_->range(index),

+ bucket_ranges_->range(index + 1),

+ sample.count, index);

+ }

+ // Handle the multi-sample case.

+ if (counts() || MountExistingCountsStorage()) {

+ return MakeUnique<SampleVectorIterator>(counts(), counts_size_,

+ bucket_ranges_);

+ }

+ // And the no-value case.

+ return MakeUnique<SampleVectorIterator>(nullptr, 0, bucket_ranges_);

}

-bool SampleVector::AddSubtractImpl(SampleCountIterator* iter,

- HistogramSamples::Operator op) {

+bool SampleVectorBase::AddSubtractImpl(SampleCountIterator* iter,

+ HistogramSamples::Operator op) {

HistogramBase::Sample min;

HistogramBase::Sample max;

HistogramBase::Count count;

+ // Stop now if there's nothing to do.

+ if (iter->Done())

+ return true;

+ // Get the first value. Single-value storage is possible if there is no

+ // counts storage and the retrieved entry is the only one in the iterator.

+ iter->Get(&min, &max, &count);

+ iter->Next();

+ if (!counts()) {

+ if (iter->Done()) {

+ // Don't call AccumulateSingleSample because that udpates sum and count

Alexei Svitkine (slow) 2017/04/18 20:52:20 updates

bcwhite 2017/04/19 18:13:02 Done.

+ // which was already done by the caller of this method.

+ if (single_sample().Accumulate(

+ min, op == HistogramSamples::ADD ? count : -count)) {

+ if (!counts())

+ return true;

+ // Handle race-condition creating counts during the accumulate.

+ MoveSingleSampleToCounts();

+ // This will fall through to MountCountsStorage which is sub-optimal

+ // (since we just detected that such has already been done) but it's

+ // not worth optimizing because of the extreme rarity of this execution

+ // path.

+ }

+ // Definitely need the counts storage for what is to follow.

+ MountCountsStorage();

+ }

// Go through the iterator and add the counts into correct bucket.

size_t index = 0;

- while (index < counts_size_ && !iter->Done()) {

- iter->Get(&min, &max, &count);

- if (min == bucket_ranges_->range(index) &&

- max == bucket_ranges_->range(index + 1)) {

- // Sample matches this bucket!

- subtle::NoBarrier_AtomicIncrement(

- &counts_[index], op == HistogramSamples::ADD ? count : -count);

- iter->Next();

- } else if (min > bucket_ranges_->range(index)) {

- // Sample is larger than current bucket range. Try next.

- index++;

- } else {

- // Sample is smaller than current bucket range. We scan buckets from

- // smallest to largest, so the sample value must be invalid.

- return false;

+ while (true) {

+ while (index < counts_size_) {

Alexei Svitkine (slow) 2017/04/18 20:52:20 Nested loops make this hard to follow. I suggest

bcwhite 2017/04/19 18:13:02 Could do though there is state (index) that is pre

+ if (min == bucket_ranges_->range(index) &&

+ max == bucket_ranges_->range(index + 1)) {

+ subtle::NoBarrier_AtomicIncrement(

+ &counts()[index], op == HistogramSamples::ADD ? count : -count);

+ break;

+ } else if (min > bucket_ranges_->range(index)) {

+ // Sample is larger than current bucket range. Try next.

+ index++;

+ } else {

+ // Sample is smaller than current bucket range. We scan buckets from

+ // smallest to largest, so the sample value must be invalid.

+ return false;

+ }

}

- }

+ if (index == counts_size_)

Alexei Svitkine (slow) 2017/04/18 20:52:20 Add a comment about this case.

bcwhite 2017/04/19 18:13:02 Done.

+ return false;

- return iter->Done();

+ if (iter->Done())

+ return true;

+ iter->Get(&min, &max, &count);

+ iter->Next();

+ }

}

// Use simple binary search. This is very general, but there are better

// approaches if we knew that the buckets were linearly distributed.

-size_t SampleVector::GetBucketIndex(Sample value) const {

+size_t SampleVectorBase::GetBucketIndex(Sample value) const {

size_t bucket_count = bucket_ranges_->bucket_count();

CHECK_GE(bucket_count, 1u);

CHECK_GE(value, bucket_ranges_->range(0));

@@ -125,6 +226,110 @@ size_t SampleVector::GetBucketIndex(Sample value) const {

return mid;

}

+void SampleVectorBase::MoveSingleSampleToCounts() {

+ DCHECK(counts());

+ // Disable the single-sample since there is now counts storage for the data.

+ SingleSample sample = single_sample().Extract(/*disable=*/true);

+ // Stop here if there is no "count" as trying to find the bucket index of

+ // a invalid (including zero) "value" will crash.

Alexei Svitkine (slow) 2017/04/18 20:52:20 Nit: an invalid

bcwhite 2017/04/19 18:13:02 Done.

+ if (sample.count == 0)

+ return;

+ // Move the value into storage. Sum and redundant-count already account

+ // for this entry so no need to call IncreaseSumAndCount().

+ size_t bucket_index = GetBucketIndex(sample.value);

+ subtle::NoBarrier_AtomicIncrement(&counts()[bucket_index], sample.count);

+void SampleVectorBase::MountCountsStorage() {

+ // There are many of SampleVector objects and the lock is needed very

Alexei Svitkine (slow) 2017/04/18 20:52:21 Nit: remove "of"

bcwhite 2017/04/19 18:13:02 Done.

+ // infrequently (just when advancing from single-sample to multi-sample) so

+ // define a single, global lock that all can use. This lock only prevents

+ // concurrent entry into the code below; access and updates to |counts_|

+ // still requires atomic operations.

+ static LazyInstance<Lock>::Leaky counts_lock = LAZY_INSTANCE_INITIALIZER;

+ {

+ AutoLock lock(counts_lock.Get());

+ if (subtle::NoBarrier_Load(&counts_) == 0) {

+ // Create the actual counts storage while the above lock is acquired.

+ HistogramBase::Count* counts = CreateCountsStorageWhileLocked();

+ DCHECK(counts);

+ // Point |counts_| to the newly created storage. This is done while

+ // locked to prevent possible concurrent calls to CreateCountsStorage

+ // but, between that call and here, other threads could notice the

+ // existance of the storage and race with this to set_counts(). That's

+ // okay because (a) it's atomic and (b) it always writes the same value.

+ set_counts(counts);

+ }

+ // Move any single-sample into the newly mounted storage.

+ MoveSingleSampleToCounts();

+SampleVector::SampleVector(const BucketRanges* bucket_ranges)

+ : SampleVector(0, bucket_ranges) {}

+SampleVector::SampleVector(uint64_t id, const BucketRanges* bucket_ranges)

+ : SampleVectorBase(id, bucket_ranges) {}

+SampleVector::~SampleVector() {}

+bool SampleVector::MountExistingCountsStorage() const {

+ // There is never any existing storage other than what is already in use.

+ return counts() != nullptr;

+HistogramBase::AtomicCount* SampleVector::CreateCountsStorageWhileLocked() {

+ local_counts_.resize(counts_size());

+ return &local_counts_[0];

+PersistentSampleVector::PersistentSampleVector(

+ uint64_t id,

+ const BucketRanges* bucket_ranges,

+ Metadata* meta,

+ const DelayedPersistentAllocation& counts)

+ : SampleVectorBase(id, meta, bucket_ranges), persistent_counts_(counts) {

+ // If the "counts" array has already been created, mount it now.

+ if (persistent_counts_.reference())

Alexei Svitkine (slow) 2017/04/18 20:52:20 Nit: {} Should this just call MountExistingCounts

bcwhite 2017/04/19 18:13:02 Done.

+ set_counts(

+ static_cast<HistogramBase::AtomicCount*>(persistent_counts_.Get()));

+PersistentSampleVector::~PersistentSampleVector() {}

+bool PersistentSampleVector::MountExistingCountsStorage() const {

+ // There is no check that counts is not yet mounted because, given that this

+ // is a virtual function, it's more efficient to do that at the call-site.

+ // There is no danger, however, because at worst the counts value would be

+ // overwritten (in an atomic manner) with the exact same address.

+ if (!persistent_counts_.reference())

+ return false; // Nothing to mount.

+ // Mount the counts array in position.

+ set_counts(

+ static_cast<HistogramBase::AtomicCount*>(persistent_counts_.Get()));

+ return true;

+HistogramBase::AtomicCount*

+PersistentSampleVector::CreateCountsStorageWhileLocked() {

+ void* mem = persistent_counts_.Get();

+ if (!mem) {

+ // The above shouldn't fail but can if Bad Things(tm) are occurring in the

+ // persistent allocator. Crashing isn't a good option so instead just

+ // allocate something from the heap and return that. There will be no

+ // sharing or persistence but worse things are already happening.

+ return new HistogramBase::AtomicCount[counts_size()];

+ }

+ return static_cast<HistogramBase::AtomicCount*>(mem);

SampleVectorIterator::SampleVectorIterator(

const std::vector<HistogramBase::AtomicCount>* counts,

const BucketRanges* bucket_ranges)

« base/metrics/sample_vector.h ('K') | « base/metrics/sample_vector.h ('k') | base/metrics/sample_vector_unittest.cc » ('j') | no next file with comments »