net/disk_cache/simple/simple_index.cc - Issue 2918893002: evict larger entries first

Unified Diff: net/disk_cache/simple/simple_index.cc

Issue 2918893002: evict larger entries first (Closed)

Patch Set: update experiment param Created 3 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: net/disk_cache/simple/simple_index.cc

diff --git a/net/disk_cache/simple/simple_index.cc b/net/disk_cache/simple/simple_index.cc

index 3513b59856580adf8c157831ad264b0c28dc31ff..663d1ac864c59cc6a5c766be37b6c656e85ed92b 100644

--- a/net/disk_cache/simple/simple_index.cc

+++ b/net/disk_cache/simple/simple_index.cc

@@ -11,6 +11,7 @@

#include "base/bind.h"

#include "base/bind_helpers.h"

+#include "base/bits.h"

#include "base/files/file_enumerator.h"

#include "base/files/file_util.h"

#include "base/logging.h"

@@ -25,6 +26,7 @@

#include "base/trace_event/memory_usage_estimator.h"

#include "net/base/net_errors.h"

#include "net/disk_cache/simple/simple_entry_format.h"

+#include "net/disk_cache/simple/simple_experiment.h"

#include "net/disk_cache/simple/simple_histogram_macros.h"

#include "net/disk_cache/simple/simple_index_delegate.h"

#include "net/disk_cache/simple/simple_index_file.h"

@@ -49,6 +51,13 @@ const uint32_t kEvictionMarginDivisor = 20;

const uint32_t kBytesInKb = 1024;

+// This is added to the size of each entry before using the size

+// to determine which entries to evict first. It's basically an

+// estimate of the filesystem overhead, but it also serves to flatten

+// the curve so that 1-byte entries and 2-byte entries are basically

+// treated the same.

+static const int kEstimatedEntryOverhead = 512;

} // namespace

namespace disk_cache {

@@ -309,31 +318,83 @@ void SimpleIndex::StartEvictionIfNeeded() {

static_cast<base::HistogramBase::Sample>(max_size_ / kBytesInKb));

// Flatten for sorting.

- std::vector<const std::pair<const uint64_t, EntryMetadata>*> entries;

+ typedef std::pair<uint64_t, const EntrySet::value_type*> SortHelper;

+ std::vector<SortHelper> entries;

entries.reserve(entries_set_.size());

+ uint32_t now = (base::Time::Now() - base::Time::UnixEpoch()).InSeconds();

+ bool use_size = base::FeatureList::IsEnabled(kSimpleCacheEvictionWithSize);

jkarlin 2017/07/06 13:58:18 We shouldn't use base::FeatureList::IsEnabled for

hubbe 2017/07/10 18:23:53 Done. Also changed the experiment enabling code to

for (EntrySet::const_iterator i = entries_set_.begin();

i != entries_set_.end(); ++i) {

- entries.push_back(&*i);

+ uint64_t sort_value = now - i->second.RawTimeForSorting();

+ if (use_size) {

+ // Will not overflow since we're multiplying two 32-bit values and storing

+ // them in a 64-bit variable.

+ sort_value *= i->second.GetEntrySize() + kEstimatedEntryOverhead;

+ }

+ // Subtract so we don't need a custom comparator.

+ entries.emplace_back(std::numeric_limits<uint64_t>::max() - sort_value,

+ &*i);

}

- std::sort(entries.begin(), entries.end(),

- [](const std::pair<const uint64_t, EntryMetadata>* a,

- const std::pair<const uint64_t, EntryMetadata>* b) -> bool {

- return a->second.RawTimeForSorting() <

- b->second.RawTimeForSorting();

- });

- // Remove as many entries from the index to get below |low_watermark_|,

- // collecting least recently used hashes into |entry_hashes|.

- std::vector<uint64_t> entry_hashes;

- std::vector<const std::pair<const uint64_t, EntryMetadata>*>::iterator it =

- entries.begin();

+ // introselect algorithm:

+ // Pick a pivot element, partition elements into elements

+ // smaller than the pivot and not smaller than pivot.

+ // Then sum all the sizes from the "smaller" bucket. If it

+ // is still too large, then iterate again, but only consider

+ // elements from the "smaller" bucket. If not, add all elements

+ // in the "smaller" bucket to the evict list.

+ // Then iterate again, to partition the "not smaller" bucket.

+ // If we go over 2 * log2(|entries|) iterations, resort to calling

+ // std::sort() to avoid N-squared worst case scenario.

uint64_t evicted_so_far_size = 0;

- while (evicted_so_far_size < cache_size_ - low_watermark_) {

- DCHECK(it != entries.end());

- entry_hashes.push_back((*it)->first);

- evicted_so_far_size += (*it)->second.GetEntrySize();

- ++it;

+ const uint64_t amount_to_evict = cache_size_ - low_watermark_;

+ std::vector<uint64_t> entry_hashes;

+ int loops = 2 * base::bits::Log2Ceiling(entries.size());

+ auto begin = entries.begin();

+ auto end = entries.end();

+ while (loops > 0 && evicted_so_far_size < amount_to_evict &&

+ end - begin >= 3) {

+ loops--;

+ // Select a pivot.

+ uint64_t tmp[3];

+ tmp[0] = begin->first;

+ tmp[1] = (end - 1)->first;

+ tmp[2] = (begin + (end - begin) / 2)->first;

+ std::sort(tmp, tmp + 3);

+ uint64_t pivot = tmp[1];

+ // Partition

+ auto middle = std::partition(begin, end, [pivot](const SortHelper& entry) {

+ return entry.first < pivot;

+ });

+ if (middle == end || middle == begin) {

+ // No progress will be made.

+ break;

+ }

+ uint64_t lt_sum = 0;

+ for (auto i = begin; i != middle; ++i)

+ lt_sum += i->second->second.GetEntrySize();

+ if (evicted_so_far_size + lt_sum > amount_to_evict) {

+ end = middle;

+ } else {

+ evicted_so_far_size += lt_sum;

+ for (auto i = begin; i != middle; ++i)

+ entry_hashes.push_back(i->second->first);

+ begin = middle;

+ }

+ if (end > begin && evicted_so_far_size < amount_to_evict) {

+ std::sort(begin, end);

+ for (auto i = begin; i < end; ++i) {

+ if (evicted_so_far_size >= amount_to_evict)

+ break;

+ evicted_so_far_size += i->second->second.GetEntrySize();

+ entry_hashes.push_back(i->second->first);

+ }

}

SIMPLE_CACHE_UMA(COUNTS_1M,

« no previous file with comments | « net/disk_cache/simple/simple_experiment.cc ('k') | no next file » | no next file with comments »