Chromium Code Reviews| Index: client/isolateserver.py |
| diff --git a/client/isolateserver.py b/client/isolateserver.py |
| index 8a8bed6763c2ba98dbe89e007a77131fd99fedd8..b41ea3f2fa1c9457b2fa555d67b69595290b7c9a 100755 |
| --- a/client/isolateserver.py |
| +++ b/client/isolateserver.py |
| @@ -5,11 +5,13 @@ |
| """Archives a set of files or directories to an Isolate Server.""" |
| -__version__ = '0.6.0' |
| +__version__ = '0.7.0' |
| import base64 |
| +import collections |
| import errno |
| import functools |
| +import json |
| import io |
| import logging |
| import optparse |
| @@ -32,7 +34,6 @@ from libs import arfile |
| from utils import file_path |
| from utils import fs |
| from utils import logging_utils |
| -from utils import lru |
| from utils import net |
| from utils import on_error |
| from utils import subprocess42 |
| @@ -1481,28 +1482,35 @@ class CachePolicies(object): |
| class DiskCache(LocalCache): |
| - """Stateful LRU cache in a flat hash table in a directory. |
| + """Stateful LRU cache in a semi-flat hash table in a directory. |
| Saves its state as json file. |
| """ |
| STATE_FILE = u'state.json' |
| - def __init__(self, cache_dir, policies, hash_algo): |
| + # All protected methods (starting with '_') except _path should be called |
| + # with self._lock held. |
| + |
| + def __init__(self, cache_dir, policies, hash_algo, time_fn=None): |
| """ |
| Arguments: |
| cache_dir: directory where to place the cache. |
| policies: cache retention policies. |
| algo: hashing algorithm used. |
| + time_fn: function to take current timestamp when adding new items. |
| + Defaults to time.time. |
| """ |
| - # All protected methods (starting with '_') except _path should be called |
| - # with self._lock held. |
| super(DiskCache, self).__init__() |
| + self.time_fn = time_fn or time.time |
| self.cache_dir = cache_dir |
| self.policies = policies |
| self.hash_algo = hash_algo |
| self.state_file = os.path.join(cache_dir, self.STATE_FILE) |
| - # Items in a LRU lookup dict(digest: size). |
| - self._lru = lru.LRUDict() |
| + # Items in a LRU lookup dict(digest: [size, timestamp]). |
| + # We use lists instead of tuples because JSON arrays are parsed to lists. |
| + # Mutations must be followed by `self._dirty = True`. |
| + self._lru = collections.OrderedDict() |
| + self._dirty = False # if True, self._lru was modified since loading/saving. |
| # Current cached free disk space. It is updated by self._trim(). |
| self._free_disk = 0 |
| # The first item in the LRU cache that must not be evicted during this run |
| @@ -1512,9 +1520,11 @@ class DiskCache(LocalCache): |
| self._protected = None |
| # Cleanup operations done by self._load(), if any. |
| self._operations = [] |
| + |
| + self._free_disk = file_path.get_free_space(self.cache_dir) |
| + |
| with tools.Profiler('Setup'): |
| with self._lock: |
| - # self._load() calls self._trim() which initializes self._free_disk. |
| self._load() |
| def __contains__(self, digest): |
| @@ -1535,7 +1545,7 @@ class DiskCache(LocalCache): |
| logging.info( |
| '%5d (%8dkb) current', |
| len(self._lru), |
| - sum(self._lru.itervalues()) / 1024) |
| + self._cache_disk_size() / 1024) |
| logging.info( |
| '%5d (%8dkb) evicted', |
| len(self._evicted), sum(self._evicted) / 1024) |
| @@ -1544,9 +1554,15 @@ class DiskCache(LocalCache): |
| self._free_disk / 1024) |
| return False |
| + def _sizes(self): |
| + """Returns an iterator of pairs (digest, size).""" |
| + return ( |
| + (digest, size) |
| + for digest, (size, _) in self._lru.iteritems()) |
| + |
| def cached_set(self): |
| with self._lock: |
| - return self._lru.keys_set() |
| + return set(self._lru) |
| def cleanup(self): |
| """Cleans up the cache directory. |
| @@ -1557,36 +1573,44 @@ class DiskCache(LocalCache): |
| At that point, the cache was already loaded, trimmed to respect cache |
| policies. |
| """ |
| - fs.chmod(self.cache_dir, 0700) |
| - # Ensure that all files listed in the state still exist and add new ones. |
| - previous = self._lru.keys_set() |
| - # It'd be faster if there were a readdir() function. |
| - for filename in fs.listdir(self.cache_dir): |
| - if filename == self.STATE_FILE: |
| - fs.chmod(os.path.join(self.cache_dir, filename), 0600) |
| - continue |
| - if filename in previous: |
| - fs.chmod(os.path.join(self.cache_dir, filename), 0400) |
| - previous.remove(filename) |
| - continue |
| - |
| - # An untracked file. Delete it. |
| + def try_remove(filename): |
| + assert os.path.isabs(filename) |
| logging.warning('Removing unknown file %s from cache', filename) |
| - p = self._path(filename) |
| - if fs.isdir(p): |
| + if fs.isdir(filename): |
| try: |
| - file_path.rmtree(p) |
| + file_path.rmtree(filename) |
| except OSError: |
| pass |
| else: |
| - file_path.try_remove(p) |
| - continue |
| + file_path.try_remove(filename) |
| + |
| + fs.chmod(self.cache_dir, 0700) |
| + # Ensure that all files listed in the state still exist and add new ones. |
| + previous = set(self._lru) |
|
M-A Ruel
2016/10/13 20:51:12
self.cached_set() ? just for the form. :)
|
| + # It'd be faster if there were a readdir() function. |
| + for filename_l1 in fs.listdir(self.cache_dir): |
| + full_name_l1 = os.path.join(self.cache_dir, filename_l1) |
| + if filename_l1 == self.STATE_FILE: |
| + fs.chmod(full_name_l1, 0600) |
| + continue |
| + if len(filename_l1) != 2: |
| + try_remove(full_name_l1) |
| + continue |
| + for filename_l2 in fs.listdir(full_name_l1): |
| + digest = filename_l1 + filename_l2 |
| + full_name_l2 = os.path.join(full_name_l1, filename_l2) |
| + if digest in previous: |
| + fs.chmod(full_name_l2, 0400) |
| + previous.remove(digest) |
| + else: |
| + try_remove(full_name_l2) |
| if previous: |
| # Filter out entries that were not found. |
| logging.warning('Removed %d lost files', len(previous)) |
| - for filename in previous: |
| - self._lru.pop(filename) |
| + for digest in previous: |
| + self._lru.pop(digest) |
| + self._dirty = True |
| # What remains to be done is to hash every single item to |
| # detect corruption, then save to ensure state.json is up to date. |
| @@ -1616,9 +1640,10 @@ class DiskCache(LocalCache): |
| # Update it's LRU position. |
| with self._lock: |
| - if digest not in self._lru: |
| + if self._lru.pop(digest, None) is None: |
| return False |
| - self._lru.touch(digest) |
| + self._lru[digest] = [size, self.time_fn()] |
| + self._dirty = True |
| self._protected = self._protected or digest |
| return True |
| @@ -1626,14 +1651,15 @@ class DiskCache(LocalCache): |
| with self._lock: |
| # Do not check for 'digest == self._protected' since it could be because |
| # the object is corrupted. |
| - self._lru.pop(digest) |
| + self._lru.pop(digest, None) |
| + self._dirty = True |
| self._delete_file(digest, UNKNOWN_FILE_SIZE) |
| def getfileobj(self, digest): |
| try: |
| f = fs.open(self._path(digest), 'rb') |
| with self._lock: |
| - self._used.append(self._lru[digest]) |
| + self._used.append(self._lru[digest][0]) |
| return f |
| except IOError: |
| raise CacheMiss(digest) |
| @@ -1665,10 +1691,46 @@ class DiskCache(LocalCache): |
| self._add(digest, size) |
| return digest |
| + def _cache_disk_size(self): |
|
M-A Ruel
2016/10/13 20:51:12
can you move it aside cache_set? I feel it'd make
|
| + """Returns number of bytes that cache files take.""" |
| + return sum(size for (size, _) in self._lru.itervalues()) |
| + |
| + @staticmethod |
| + def _validate_state(state): |
| + """Raises ValueError if the given serialized state is invalid.""" |
| + if not isinstance(state, dict): |
| + raise ValueError('not a JSON object') |
| + |
| + version = state.get('version') |
| + if not isinstance(version, int): |
| + raise ValueError('version %r is not an integer' % (version,)) |
| + |
| + entries = state.get('entries') |
| + if not isinstance(entries, list): |
| + raise ValueError('entries is not an array') |
| + for entry in entries: |
| + try: |
| + if not isinstance(entry, list) or len(entry) != 2: |
| + raise ValueError('not a pair') |
| + if not isinstance(entry[0], basestring): |
| + raise ValueError('first item is not a string') |
| + |
| + pair_of_numbers = ( |
| + isinstance(entry[1], list) and |
| + len(entry[1]) == 2 and |
| + all(isinstance(x, (int, float)) for x in entry[1]) |
| + ) |
| + if not pair_of_numbers: |
| + raise ValueError('second item is not a pair of numbers') |
| + except ValueError as ex: |
| + raise ValueError('invalid entry %r: %s' % (entry, ex)) |
| + |
| def _load(self): |
| - """Loads state of the cache from json file. |
| + """Loads state of the cache from json file if it exists. |
| If cache_dir does not exist on disk, it is created. |
| + |
| + Raises ValueError if the state file is broken. |
| """ |
| self._lock.assert_locked() |
| @@ -1678,32 +1740,54 @@ class DiskCache(LocalCache): |
| else: |
| # Load state of the cache. |
| try: |
| - self._lru = lru.LRUDict.load(self.state_file) |
| - except ValueError as err: |
| - logging.error('Failed to load cache state: %s' % (err,)) |
| - # Don't want to keep broken state file. |
| - file_path.try_remove(self.state_file) |
| + with open(self.state_file, 'r') as f: |
|
M-A Ruel
2016/10/13 20:51:12
'rb'
|
| + state = json.load(f) |
| + if not isinstance(state, dict): |
| + # Possibly old format. |
|
M-A Ruel
2016/10/13 20:51:12
# Possibly old format. Hard reset with a clear cac
|
| + file_path.rmtree(self.cache_dir) |
| + fs.makedirs(self.cache_dir) |
| + self._lru = collections.OrderedDict() |
| + else: |
| + self._validate_state(state) |
| + if state['version'] != 2: |
| + raise ValueError( |
| + 'unsupported version %s, supported is 2' % (state['version'])) |
| + self._lru = collections.OrderedDict(state['entries']) |
| + self._dirty = False |
| + except (IOError, ValueError) as e: |
| + raise ValueError('Cannot load state file %s: %s' % (self.state_file, e)) |
| + |
| self._trim() |
| # We want the initial cache size after trimming, i.e. what is readily |
| # avaiable. |
| self._initial_number_items = len(self._lru) |
| - self._initial_size = sum(self._lru.itervalues()) |
| + self._initial_size = self._cache_disk_size() |
| if self._evicted: |
| logging.info( |
| 'Trimming evicted items with the following sizes: %s', |
| sorted(self._evicted)) |
| def _save(self): |
| - """Saves the LRU ordering.""" |
| + """Saves the LRU ordering if changed.""" |
| self._lock.assert_locked() |
| + if not self._dirty: |
| + return |
| + |
| if sys.platform != 'win32': |
| d = os.path.dirname(self.state_file) |
| if fs.isdir(d): |
| # Necessary otherwise the file can't be created. |
| file_path.set_read_only(d, False) |
| + |
| if fs.isfile(self.state_file): |
| file_path.set_read_only(self.state_file, False) |
| - self._lru.save(self.state_file) |
| + with open(self.state_file, 'w') as f: |
|
M-A Ruel
2016/10/13 20:51:12
'wb'
|
| + contents = { |
| + 'version': 2, |
| + 'entries': self._lru.items(), |
| + } |
| + json.dump(contents, f) |
|
M-A Ruel
2016/10/13 20:51:12
separators=(',',':')
will save a few valuable byt
|
| + self._dirty = False |
| def _trim(self): |
| """Trims anything we don't know, make sure enough free space exists.""" |
| @@ -1711,7 +1795,7 @@ class DiskCache(LocalCache): |
| # Ensure maximum cache size. |
| if self.policies.max_cache_size: |
| - total_size = sum(self._lru.itervalues()) |
| + total_size = self._cache_disk_size() |
| while total_size > self.policies.max_cache_size: |
| total_size -= self._remove_lru_file(True) |
| @@ -1731,7 +1815,7 @@ class DiskCache(LocalCache): |
| self._remove_lru_file(True) |
| if trimmed_due_to_space: |
| - total_usage = sum(self._lru.itervalues()) |
| + total_usage = self._cache_disk_size() |
| usage_percent = 0. |
| if total_usage: |
| usage_percent = 100. * float(total_usage) / self.policies.max_cache_size |
| @@ -1748,29 +1832,41 @@ class DiskCache(LocalCache): |
| def _path(self, digest): |
| """Returns the path to one item.""" |
| - return os.path.join(self.cache_dir, digest) |
| + assert len(digest) > 2 |
| + return os.path.join(self.cache_dir, digest[:2], digest[2:]) |
| def _remove_lru_file(self, allow_protected): |
| """Removes the lastest recently used file and returns its size.""" |
|
M-A Ruel
2016/10/13 20:51:12
Probably a typo of mine; latest ?
|
| self._lock.assert_locked() |
| - try: |
| - digest, size = self._lru.get_oldest() |
| - if not allow_protected and digest == self._protected: |
| - raise Error('Not enough space to map the whole isolated tree') |
| - except KeyError: |
| + |
| + digest = None |
| + for key in self._lru: |
| + digest = key |
| + break |
| + if digest is None: |
| raise Error('Nothing to remove') |
| - digest, size = self._lru.pop_oldest() |
| + |
| + if not allow_protected and digest == self._protected: |
| + raise Error('Not enough space to map the whole isolated tree') |
| + |
| + digest, (size, _) = self._lru.popitem(last=False) |
| + self._dirty = True |
| logging.debug("Removing LRU file %s", digest) |
| self._delete_file(digest, size) |
| return size |
| def _add(self, digest, size=UNKNOWN_FILE_SIZE): |
| - """Adds an item into LRU cache marking it as a newest one.""" |
| + """Adds an item into LRU cache marking it as a newest one. |
| + |
| + Asumes the file exists. |
|
M-A Ruel
2016/10/13 20:51:12
Assumes
|
| + """ |
| self._lock.assert_locked() |
| if size == UNKNOWN_FILE_SIZE: |
| size = fs.stat(self._path(digest)).st_size |
| self._added.append(size) |
| - self._lru.add(digest, size) |
| + self._lru.pop(digest, None) |
| + self._lru[digest] = [size, self.time_fn()] |
|
M-A Ruel
2016/10/13 20:51:12
self._lru[digest] = (size, self.time_fn())
it's s
|
| + self._dirty = True |
| self._free_disk -= size |
| # Do a quicker version of self._trim(). It only enforces free disk space, |
| # not cache size limits. It doesn't actually look at real free disk space, |
| @@ -1790,9 +1886,14 @@ class DiskCache(LocalCache): |
| try: |
| if size == UNKNOWN_FILE_SIZE: |
| size = fs.stat(self._path(digest)).st_size |
| - file_path.try_remove(self._path(digest)) |
| + path = self._path(digest) |
| + file_path.try_remove(path) |
| self._evicted.append(size) |
| self._free_disk += size |
| + |
| + parent = os.path.dirname(path) |
| + if len(os.listdir(parent)) == 0: |
|
M-A Ruel
2016/10/13 20:51:12
I don't think it should be done. It's better to le
|
| + fs.rmtree(parent) |
|
M-A Ruel
2016/10/13 20:51:12
I don't see a corresponding call to mkdir() to cre
|
| except OSError as e: |
| logging.error('Error attempting to delete a file %s:\n%s' % (digest, e)) |