Chromium Code Reviews| Index: client/isolateserver.py |
| diff --git a/client/isolateserver.py b/client/isolateserver.py |
| index 8a8bed6763c2ba98dbe89e007a77131fd99fedd8..5c7a5cf3ed252f640e486248f5e24a8bcedd123a 100755 |
| --- a/client/isolateserver.py |
| +++ b/client/isolateserver.py |
| @@ -1481,27 +1481,35 @@ class CachePolicies(object): |
| class DiskCache(LocalCache): |
| - """Stateful LRU cache in a flat hash table in a directory. |
| + """Stateful LRU cache in a semi-flat hash table in a directory. |
| Saves its state as json file. |
| """ |
| + VERSION = 2 |
| + VERSION_FILE = u'VERSION' |
| STATE_FILE = u'state.json' |
| - def __init__(self, cache_dir, policies, hash_algo): |
| + # All protected methods (starting with '_') except _path should be called |
| + # with self._lock held. |
| + |
| + def __init__(self, cache_dir, policies, hash_algo, time_fn=None): |
| """ |
| Arguments: |
| cache_dir: directory where to place the cache. |
| policies: cache retention policies. |
| algo: hashing algorithm used. |
| + time_fn: function to take current timestamp when adding new items. |
| + Defaults to time.time. |
| """ |
| - # All protected methods (starting with '_') except _path should be called |
| - # with self._lock held. |
| super(DiskCache, self).__init__() |
| + self.time_fn = time_fn or time.time |
| self.cache_dir = cache_dir |
| self.policies = policies |
| self.hash_algo = hash_algo |
| self.state_file = os.path.join(cache_dir, self.STATE_FILE) |
| - # Items in a LRU lookup dict(digest: size). |
| + self.version_file = os.path.join(cache_dir, self.VERSION_FILE) |
|
M-A Ruel
2016/10/13 02:19:37
I really prefer to keep using a single file, not t
|
| + # Items in a LRU lookup dict(digest: [size, timestamp]). |
| + # We use lists instead of tuples because JSON arrays are parsed to lists. |
| self._lru = lru.LRUDict() |
| # Current cached free disk space. It is updated by self._trim(). |
| self._free_disk = 0 |
| @@ -1512,9 +1520,11 @@ class DiskCache(LocalCache): |
| self._protected = None |
| # Cleanup operations done by self._load(), if any. |
| self._operations = [] |
| + |
| + self._free_disk = file_path.get_free_space(self.cache_dir) |
| + |
| with tools.Profiler('Setup'): |
| with self._lock: |
| - # self._load() calls self._trim() which initializes self._free_disk. |
| self._load() |
| def __contains__(self, digest): |
| @@ -1535,7 +1545,7 @@ class DiskCache(LocalCache): |
| logging.info( |
| '%5d (%8dkb) current', |
| len(self._lru), |
| - sum(self._lru.itervalues()) / 1024) |
| + self._cache_disk_size() / 1024) |
| logging.info( |
| '%5d (%8dkb) evicted', |
| len(self._evicted), sum(self._evicted) / 1024) |
| @@ -1544,6 +1554,12 @@ class DiskCache(LocalCache): |
| self._free_disk / 1024) |
| return False |
| + def _sizes(self): |
| + """Returns an iterator of pairs (digest, size).""" |
| + return ( |
| + (digest, size) |
| + for digest, (size, _) in self._lru._items.iteritems()) |
| + |
| def cached_set(self): |
| with self._lock: |
| return self._lru.keys_set() |
| @@ -1557,36 +1573,43 @@ class DiskCache(LocalCache): |
| At that point, the cache was already loaded, trimmed to respect cache |
| policies. |
| """ |
| + def try_remove(filename): |
| + assert os.path.isabs(filename) |
| + logging.warning('Removing unknown file %s from cache', filename) |
| + if fs.isdir(filename): |
| + try: |
| + file_path.rmtree(filename) |
| + except OSError: |
| + pass |
| + else: |
| + file_path.try_remove(filename) |
| + |
| fs.chmod(self.cache_dir, 0700) |
| # Ensure that all files listed in the state still exist and add new ones. |
| previous = self._lru.keys_set() |
| # It'd be faster if there were a readdir() function. |
| - for filename in fs.listdir(self.cache_dir): |
| - if filename == self.STATE_FILE: |
| - fs.chmod(os.path.join(self.cache_dir, filename), 0600) |
| + for filename_l1 in fs.listdir(self.cache_dir): |
| + full_name_l1 = os.path.join(self.cache_dir, filename_l1) |
| + if filename_l1 in (self.STATE_FILE, self.VERSION_FILE): |
| + fs.chmod(full_name_l1, 0600) |
| continue |
| - if filename in previous: |
| - fs.chmod(os.path.join(self.cache_dir, filename), 0400) |
| - previous.remove(filename) |
| + if len(filename_l1) != 2: |
| + try_remove(full_name_l1) |
| continue |
| - |
| - # An untracked file. Delete it. |
| - logging.warning('Removing unknown file %s from cache', filename) |
| - p = self._path(filename) |
| - if fs.isdir(p): |
| - try: |
| - file_path.rmtree(p) |
| - except OSError: |
| - pass |
| - else: |
| - file_path.try_remove(p) |
| - continue |
| + for filename_l2 in fs.listdir(full_name_l1): |
| + digest = filename_l1 + filename_l2 |
| + full_name_l2 = os.path.join(full_name_l1, filename_l2) |
| + if digest in previous: |
| + fs.chmod(full_name_l2, 0400) |
| + previous.remove(digest) |
| + else: |
| + try_remove(full_name_l2) |
| if previous: |
| # Filter out entries that were not found. |
| logging.warning('Removed %d lost files', len(previous)) |
| - for filename in previous: |
| - self._lru.pop(filename) |
| + for digest in previous: |
| + self._lru.pop(digest) |
| # What remains to be done is to hash every single item to |
| # detect corruption, then save to ensure state.json is up to date. |
| @@ -1618,7 +1641,7 @@ class DiskCache(LocalCache): |
| with self._lock: |
| if digest not in self._lru: |
| return False |
| - self._lru.touch(digest) |
|
M-A Ruel
2016/10/13 02:19:37
Please remove the touch method from LRUDict to be
|
| + self._lru.add(digest, [size, self.time_fn()]) |
| self._protected = self._protected or digest |
| return True |
| @@ -1633,7 +1656,7 @@ class DiskCache(LocalCache): |
| try: |
| f = fs.open(self._path(digest), 'rb') |
| with self._lock: |
| - self._used.append(self._lru[digest]) |
| + self._used.append(self._lru[digest][0]) |
| return f |
| except IOError: |
| raise CacheMiss(digest) |
| @@ -1665,6 +1688,10 @@ class DiskCache(LocalCache): |
| self._add(digest, size) |
| return digest |
| + def _cache_disk_size(self): |
| + """Returns number of bytes that cache files take.""" |
| + return sum(size for (size, _) in self._lru.itervalues()) |
| + |
| def _load(self): |
| """Loads state of the cache from json file. |
| @@ -1672,9 +1699,24 @@ class DiskCache(LocalCache): |
| """ |
| self._lock.assert_locked() |
| - if not fs.isfile(self.state_file): |
| - if not os.path.isdir(self.cache_dir): |
| - fs.makedirs(self.cache_dir) |
| + # Read version file. |
| + version = None |
| + try: |
| + with fs.open(self.version_file, 'r') as f: |
| + version = f.read() |
| + except IOError: |
| + pass |
| + else: |
| + try: |
| + version = int(version) |
| + except ValueError: |
| + logging.error('%s is corrupted: not an integer', self.version_file) |
| + |
| + if version != self.VERSION: |
| + # Possibly, cache dir is in the old format. |
| + file_path.try_remove(self.cache_dir) |
| + self._lru = lru.LRUDict() |
| + self._save() # create state.json |
| else: |
| # Load state of the cache. |
| try: |
| @@ -1683,11 +1725,11 @@ class DiskCache(LocalCache): |
| logging.error('Failed to load cache state: %s' % (err,)) |
| # Don't want to keep broken state file. |
| file_path.try_remove(self.state_file) |
| - self._trim() |
| + self._trim() |
| # We want the initial cache size after trimming, i.e. what is readily |
| # avaiable. |
| self._initial_number_items = len(self._lru) |
| - self._initial_size = sum(self._lru.itervalues()) |
| + self._initial_size = self._cache_disk_size() |
| if self._evicted: |
| logging.info( |
| 'Trimming evicted items with the following sizes: %s', |
| @@ -1701,6 +1743,11 @@ class DiskCache(LocalCache): |
| if fs.isdir(d): |
| # Necessary otherwise the file can't be created. |
| file_path.set_read_only(d, False) |
| + |
| + if fs.isfile(self.version_file): |
| + file_path.set_read_only(self.version_file, False) |
| + file_write(self.version_file, [str(self.VERSION)]) |
| + |
| if fs.isfile(self.state_file): |
| file_path.set_read_only(self.state_file, False) |
| self._lru.save(self.state_file) |
| @@ -1711,7 +1758,7 @@ class DiskCache(LocalCache): |
| # Ensure maximum cache size. |
| if self.policies.max_cache_size: |
| - total_size = sum(self._lru.itervalues()) |
| + total_size = self._cache_disk_size() |
| while total_size > self.policies.max_cache_size: |
| total_size -= self._remove_lru_file(True) |
| @@ -1731,7 +1778,7 @@ class DiskCache(LocalCache): |
| self._remove_lru_file(True) |
| if trimmed_due_to_space: |
| - total_usage = sum(self._lru.itervalues()) |
| + total_usage = self._cache_disk_size() |
| usage_percent = 0. |
| if total_usage: |
| usage_percent = 100. * float(total_usage) / self.policies.max_cache_size |
| @@ -1748,29 +1795,35 @@ class DiskCache(LocalCache): |
| def _path(self, digest): |
| """Returns the path to one item.""" |
| - return os.path.join(self.cache_dir, digest) |
| + assert len(digest) > 2 |
| + return os.path.join(self.cache_dir, digest[:2], digest[2:]) |
| def _remove_lru_file(self, allow_protected): |
| """Removes the lastest recently used file and returns its size.""" |
| self._lock.assert_locked() |
| + |
| try: |
| - digest, size = self._lru.get_oldest() |
| + digest, _ = self._lru.get_oldest() |
| if not allow_protected and digest == self._protected: |
| raise Error('Not enough space to map the whole isolated tree') |
| except KeyError: |
| raise Error('Nothing to remove') |
| - digest, size = self._lru.pop_oldest() |
| + |
| + digest, (size, _) = self._lru.pop_oldest() |
| logging.debug("Removing LRU file %s", digest) |
| self._delete_file(digest, size) |
| return size |
| def _add(self, digest, size=UNKNOWN_FILE_SIZE): |
| - """Adds an item into LRU cache marking it as a newest one.""" |
| + """Adds an item into LRU cache marking it as a newest one. |
| + |
| + Asumes the file exists. |
| + """ |
| self._lock.assert_locked() |
| if size == UNKNOWN_FILE_SIZE: |
| size = fs.stat(self._path(digest)).st_size |
| self._added.append(size) |
| - self._lru.add(digest, size) |
| + self._lru.add(digest, [size, self.time_fn()]) |
| self._free_disk -= size |
| # Do a quicker version of self._trim(). It only enforces free disk space, |
| # not cache size limits. It doesn't actually look at real free disk space, |
| @@ -1790,9 +1843,14 @@ class DiskCache(LocalCache): |
| try: |
| if size == UNKNOWN_FILE_SIZE: |
| size = fs.stat(self._path(digest)).st_size |
| - file_path.try_remove(self._path(digest)) |
| + path = self._path(digest) |
| + file_path.try_remove(path) |
| self._evicted.append(size) |
| self._free_disk += size |
| + |
| + dir = os.path.dirname(path) |
| + if len(os.listdir(dir)) == 0: |
| + fs.rmtree(dir) |
| except OSError as e: |
| logging.error('Error attempting to delete a file %s:\n%s' % (digest, e)) |