Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(25)

Unified Diff: client/isolateserver.py

Issue 2414543003: isolateserver: DiskCache format v2 (Closed)
Patch Set: docs Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | client/tests/isolateserver_test.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: client/isolateserver.py
diff --git a/client/isolateserver.py b/client/isolateserver.py
index 8a8bed6763c2ba98dbe89e007a77131fd99fedd8..5c7a5cf3ed252f640e486248f5e24a8bcedd123a 100755
--- a/client/isolateserver.py
+++ b/client/isolateserver.py
@@ -1481,27 +1481,35 @@ class CachePolicies(object):
class DiskCache(LocalCache):
- """Stateful LRU cache in a flat hash table in a directory.
+ """Stateful LRU cache in a semi-flat hash table in a directory.
Saves its state as json file.
"""
+ VERSION = 2
+ VERSION_FILE = u'VERSION'
STATE_FILE = u'state.json'
- def __init__(self, cache_dir, policies, hash_algo):
+ # All protected methods (starting with '_') except _path should be called
+ # with self._lock held.
+
+ def __init__(self, cache_dir, policies, hash_algo, time_fn=None):
"""
Arguments:
cache_dir: directory where to place the cache.
policies: cache retention policies.
algo: hashing algorithm used.
+ time_fn: function to take current timestamp when adding new items.
+ Defaults to time.time.
"""
- # All protected methods (starting with '_') except _path should be called
- # with self._lock held.
super(DiskCache, self).__init__()
+ self.time_fn = time_fn or time.time
self.cache_dir = cache_dir
self.policies = policies
self.hash_algo = hash_algo
self.state_file = os.path.join(cache_dir, self.STATE_FILE)
- # Items in a LRU lookup dict(digest: size).
+ self.version_file = os.path.join(cache_dir, self.VERSION_FILE)
M-A Ruel 2016/10/13 02:19:37 I really prefer to keep using a single file, not t
+ # Items in a LRU lookup dict(digest: [size, timestamp]).
+ # We use lists instead of tuples because JSON arrays are parsed to lists.
self._lru = lru.LRUDict()
# Current cached free disk space. It is updated by self._trim().
self._free_disk = 0
@@ -1512,9 +1520,11 @@ class DiskCache(LocalCache):
self._protected = None
# Cleanup operations done by self._load(), if any.
self._operations = []
+
+ self._free_disk = file_path.get_free_space(self.cache_dir)
+
with tools.Profiler('Setup'):
with self._lock:
- # self._load() calls self._trim() which initializes self._free_disk.
self._load()
def __contains__(self, digest):
@@ -1535,7 +1545,7 @@ class DiskCache(LocalCache):
logging.info(
'%5d (%8dkb) current',
len(self._lru),
- sum(self._lru.itervalues()) / 1024)
+ self._cache_disk_size() / 1024)
logging.info(
'%5d (%8dkb) evicted',
len(self._evicted), sum(self._evicted) / 1024)
@@ -1544,6 +1554,12 @@ class DiskCache(LocalCache):
self._free_disk / 1024)
return False
+ def _sizes(self):
+ """Returns an iterator of pairs (digest, size)."""
+ return (
+ (digest, size)
+ for digest, (size, _) in self._lru._items.iteritems())
+
def cached_set(self):
with self._lock:
return self._lru.keys_set()
@@ -1557,36 +1573,43 @@ class DiskCache(LocalCache):
At that point, the cache was already loaded, trimmed to respect cache
policies.
"""
+ def try_remove(filename):
+ assert os.path.isabs(filename)
+ logging.warning('Removing unknown file %s from cache', filename)
+ if fs.isdir(filename):
+ try:
+ file_path.rmtree(filename)
+ except OSError:
+ pass
+ else:
+ file_path.try_remove(filename)
+
fs.chmod(self.cache_dir, 0700)
# Ensure that all files listed in the state still exist and add new ones.
previous = self._lru.keys_set()
# It'd be faster if there were a readdir() function.
- for filename in fs.listdir(self.cache_dir):
- if filename == self.STATE_FILE:
- fs.chmod(os.path.join(self.cache_dir, filename), 0600)
+ for filename_l1 in fs.listdir(self.cache_dir):
+ full_name_l1 = os.path.join(self.cache_dir, filename_l1)
+ if filename_l1 in (self.STATE_FILE, self.VERSION_FILE):
+ fs.chmod(full_name_l1, 0600)
continue
- if filename in previous:
- fs.chmod(os.path.join(self.cache_dir, filename), 0400)
- previous.remove(filename)
+ if len(filename_l1) != 2:
+ try_remove(full_name_l1)
continue
-
- # An untracked file. Delete it.
- logging.warning('Removing unknown file %s from cache', filename)
- p = self._path(filename)
- if fs.isdir(p):
- try:
- file_path.rmtree(p)
- except OSError:
- pass
- else:
- file_path.try_remove(p)
- continue
+ for filename_l2 in fs.listdir(full_name_l1):
+ digest = filename_l1 + filename_l2
+ full_name_l2 = os.path.join(full_name_l1, filename_l2)
+ if digest in previous:
+ fs.chmod(full_name_l2, 0400)
+ previous.remove(digest)
+ else:
+ try_remove(full_name_l2)
if previous:
# Filter out entries that were not found.
logging.warning('Removed %d lost files', len(previous))
- for filename in previous:
- self._lru.pop(filename)
+ for digest in previous:
+ self._lru.pop(digest)
# What remains to be done is to hash every single item to
# detect corruption, then save to ensure state.json is up to date.
@@ -1618,7 +1641,7 @@ class DiskCache(LocalCache):
with self._lock:
if digest not in self._lru:
return False
- self._lru.touch(digest)
M-A Ruel 2016/10/13 02:19:37 Please remove the touch method from LRUDict to be
+ self._lru.add(digest, [size, self.time_fn()])
self._protected = self._protected or digest
return True
@@ -1633,7 +1656,7 @@ class DiskCache(LocalCache):
try:
f = fs.open(self._path(digest), 'rb')
with self._lock:
- self._used.append(self._lru[digest])
+ self._used.append(self._lru[digest][0])
return f
except IOError:
raise CacheMiss(digest)
@@ -1665,6 +1688,10 @@ class DiskCache(LocalCache):
self._add(digest, size)
return digest
+ def _cache_disk_size(self):
+ """Returns number of bytes that cache files take."""
+ return sum(size for (size, _) in self._lru.itervalues())
+
def _load(self):
"""Loads state of the cache from json file.
@@ -1672,9 +1699,24 @@ class DiskCache(LocalCache):
"""
self._lock.assert_locked()
- if not fs.isfile(self.state_file):
- if not os.path.isdir(self.cache_dir):
- fs.makedirs(self.cache_dir)
+ # Read version file.
+ version = None
+ try:
+ with fs.open(self.version_file, 'r') as f:
+ version = f.read()
+ except IOError:
+ pass
+ else:
+ try:
+ version = int(version)
+ except ValueError:
+ logging.error('%s is corrupted: not an integer', self.version_file)
+
+ if version != self.VERSION:
+ # Possibly, cache dir is in the old format.
+ file_path.try_remove(self.cache_dir)
+ self._lru = lru.LRUDict()
+ self._save() # create state.json
else:
# Load state of the cache.
try:
@@ -1683,11 +1725,11 @@ class DiskCache(LocalCache):
logging.error('Failed to load cache state: %s' % (err,))
# Don't want to keep broken state file.
file_path.try_remove(self.state_file)
- self._trim()
+ self._trim()
# We want the initial cache size after trimming, i.e. what is readily
# avaiable.
self._initial_number_items = len(self._lru)
- self._initial_size = sum(self._lru.itervalues())
+ self._initial_size = self._cache_disk_size()
if self._evicted:
logging.info(
'Trimming evicted items with the following sizes: %s',
@@ -1701,6 +1743,11 @@ class DiskCache(LocalCache):
if fs.isdir(d):
# Necessary otherwise the file can't be created.
file_path.set_read_only(d, False)
+
+ if fs.isfile(self.version_file):
+ file_path.set_read_only(self.version_file, False)
+ file_write(self.version_file, [str(self.VERSION)])
+
if fs.isfile(self.state_file):
file_path.set_read_only(self.state_file, False)
self._lru.save(self.state_file)
@@ -1711,7 +1758,7 @@ class DiskCache(LocalCache):
# Ensure maximum cache size.
if self.policies.max_cache_size:
- total_size = sum(self._lru.itervalues())
+ total_size = self._cache_disk_size()
while total_size > self.policies.max_cache_size:
total_size -= self._remove_lru_file(True)
@@ -1731,7 +1778,7 @@ class DiskCache(LocalCache):
self._remove_lru_file(True)
if trimmed_due_to_space:
- total_usage = sum(self._lru.itervalues())
+ total_usage = self._cache_disk_size()
usage_percent = 0.
if total_usage:
usage_percent = 100. * float(total_usage) / self.policies.max_cache_size
@@ -1748,29 +1795,35 @@ class DiskCache(LocalCache):
def _path(self, digest):
"""Returns the path to one item."""
- return os.path.join(self.cache_dir, digest)
+ assert len(digest) > 2
+ return os.path.join(self.cache_dir, digest[:2], digest[2:])
def _remove_lru_file(self, allow_protected):
"""Removes the lastest recently used file and returns its size."""
self._lock.assert_locked()
+
try:
- digest, size = self._lru.get_oldest()
+ digest, _ = self._lru.get_oldest()
if not allow_protected and digest == self._protected:
raise Error('Not enough space to map the whole isolated tree')
except KeyError:
raise Error('Nothing to remove')
- digest, size = self._lru.pop_oldest()
+
+ digest, (size, _) = self._lru.pop_oldest()
logging.debug("Removing LRU file %s", digest)
self._delete_file(digest, size)
return size
def _add(self, digest, size=UNKNOWN_FILE_SIZE):
- """Adds an item into LRU cache marking it as a newest one."""
+ """Adds an item into LRU cache marking it as a newest one.
+
+ Asumes the file exists.
+ """
self._lock.assert_locked()
if size == UNKNOWN_FILE_SIZE:
size = fs.stat(self._path(digest)).st_size
self._added.append(size)
- self._lru.add(digest, size)
+ self._lru.add(digest, [size, self.time_fn()])
self._free_disk -= size
# Do a quicker version of self._trim(). It only enforces free disk space,
# not cache size limits. It doesn't actually look at real free disk space,
@@ -1790,9 +1843,14 @@ class DiskCache(LocalCache):
try:
if size == UNKNOWN_FILE_SIZE:
size = fs.stat(self._path(digest)).st_size
- file_path.try_remove(self._path(digest))
+ path = self._path(digest)
+ file_path.try_remove(path)
self._evicted.append(size)
self._free_disk += size
+
+ dir = os.path.dirname(path)
+ if len(os.listdir(dir)) == 0:
+ fs.rmtree(dir)
except OSError as e:
logging.error('Error attempting to delete a file %s:\n%s' % (digest, e))
« no previous file with comments | « no previous file | client/tests/isolateserver_test.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698