| OLD | NEW |
| 1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 import posixpath | 5 import posixpath |
| 6 import sys | 6 import sys |
| 7 | 7 |
| 8 from file_system import FileSystem, StatInfo, FileNotFoundError | 8 from file_system import FileSystem, StatInfo, FileNotFoundError |
| 9 from future import All, Future | 9 from future import All, Future |
| 10 from path_util import AssertIsDirectory, IsDirectory, ToDirectory | 10 from path_util import AssertIsDirectory, IsDirectory, ToDirectory |
| 11 from third_party.json_schema_compiler.memoize import memoize | 11 from third_party.json_schema_compiler.memoize import memoize |
| 12 | 12 |
| 13 | 13 |
| 14 |
| 15 class CacheMissError(Exception): |
| 16 '''Raise when data is not found in a CachingFileSystem which is not allowed |
| 17 to reference its backing FileSystem.''' |
| 18 def __init__(self, message): |
| 19 Exception.__init__(self, message) |
| 20 |
| 21 |
| 22 |
| 14 class CachingFileSystem(FileSystem): | 23 class CachingFileSystem(FileSystem): |
| 15 '''FileSystem which implements a caching layer on top of |file_system|. It's | 24 '''FileSystem which implements a caching layer on top of |file_system|. If |
| 16 smart, using Stat() to decided whether to skip Read()ing from |file_system|, | 25 |fail_on_miss| is True then cache misses throw a CacheMissError rather than |
| 17 and only Stat()ing directories never files. | 26 falling back onto the underlying FileSystem. |
| 27 |
| 28 If |empty_stat_cache| is True (default), its stat cache is initialized empty. |
| 29 This should be set to False when wrapping a FileSystem that makes a proper |
| 30 distinction between stable identity and unstable identity. |
| 31 |
| 32 The working assumption is that a file system's unstable identity changes any |
| 33 time any contents of the file system change, and therefore a stat cache keyed |
| 34 on unstable identity will never need to be refreshed. |
| 18 ''' | 35 ''' |
| 19 def __init__(self, file_system, object_store_creator): | 36 def __init__(self, |
| 37 file_system, |
| 38 object_store_creator, |
| 39 fail_on_miss=False, |
| 40 empty_stat_cache=True): |
| 20 self._file_system = file_system | 41 self._file_system = file_system |
| 21 def create_object_store(category, **optargs): | 42 self._fail_on_miss = fail_on_miss |
| 43 def create_object_store(category, use_stable_identity=True, **optargs): |
| 44 if use_stable_identity: |
| 45 identity = file_system.GetStableIdentity() |
| 46 else: |
| 47 identity = file_system.GetUnstableIdentity() |
| 22 return object_store_creator.Create( | 48 return object_store_creator.Create( |
| 23 CachingFileSystem, | 49 CachingFileSystem, |
| 24 category='%s/%s' % (file_system.GetIdentity(), category), | 50 category='%s/%s' % (identity, category), |
| 25 **optargs) | 51 **optargs) |
| 26 self._stat_cache = create_object_store('stat') | 52 # The stable stat cache caches file stat info keyed by the file system's |
| 53 # stable identity (or unstable identity for persistent caches). |
| 54 self._stat_cache = create_object_store('stat', |
| 55 use_stable_identity=empty_stat_cache, |
| 56 start_empty=empty_stat_cache) |
| 27 # The read caches can start populated (start_empty=False) because file | 57 # The read caches can start populated (start_empty=False) because file |
| 28 # updates are picked up by the stat, so it doesn't need the force-refresh | 58 # updates are picked up by the stat, so it doesn't need the force-refresh |
| 29 # which starting empty is designed for. Without this optimisation, cron | 59 # which starting empty is designed for. Without this optimisation, cron |
| 30 # runs are extra slow. | 60 # runs are extra slow. |
| 31 self._read_cache = create_object_store('read', start_empty=False) | 61 self._read_cache = create_object_store('read', start_empty=False) |
| 32 self._walk_cache = create_object_store('walk', start_empty=False) | 62 self._walk_cache = create_object_store('walk', start_empty=False) |
| 33 | 63 |
| 34 def Refresh(self): | 64 def Refresh(self): |
| 35 return self._file_system.Refresh() | 65 return self._file_system.Refresh() |
| 36 | 66 |
| (...skipping 12 matching lines...) Expand all Loading... |
| 49 ''' | 79 ''' |
| 50 if path == dir_path: | 80 if path == dir_path: |
| 51 return dir_stat | 81 return dir_stat |
| 52 # Was a file stat. Extract that file. | 82 # Was a file stat. Extract that file. |
| 53 file_version = dir_stat.child_versions.get(file_path) | 83 file_version = dir_stat.child_versions.get(file_path) |
| 54 if file_version is None: | 84 if file_version is None: |
| 55 raise FileNotFoundError('No stat found for %s in %s (found %s)' % | 85 raise FileNotFoundError('No stat found for %s in %s (found %s)' % |
| 56 (path, dir_path, dir_stat.child_versions)) | 86 (path, dir_path, dir_stat.child_versions)) |
| 57 return StatInfo(file_version) | 87 return StatInfo(file_version) |
| 58 | 88 |
| 89 def raise_cache_miss(path): |
| 90 raise FileNotFoundError('Got cache miss when trying to stat %s' % path) |
| 91 |
| 59 dir_stat = self._stat_cache.Get(dir_path).Get() | 92 dir_stat = self._stat_cache.Get(dir_path).Get() |
| 60 if dir_stat is not None: | 93 if dir_stat is not None: |
| 61 return Future(callback=lambda: make_stat_info(dir_stat)) | 94 return Future(callback=lambda: make_stat_info(dir_stat)) |
| 62 | 95 |
| 96 if self._fail_on_miss: |
| 97 logging.info('Bailing on stat cache miss for %s' % dir_path) |
| 98 return Future(callback=lambda: raise_cache_miss(dir_path)) |
| 99 |
| 63 def next(dir_stat): | 100 def next(dir_stat): |
| 64 assert dir_stat is not None # should have raised a FileNotFoundError | 101 assert dir_stat is not None # should have raised a FileNotFoundError |
| 65 # We only ever need to cache the dir stat. | 102 # We only ever need to cache the dir stat. |
| 66 self._stat_cache.Set(dir_path, dir_stat) | 103 self._stat_cache.Set(dir_path, dir_stat) |
| 67 return make_stat_info(dir_stat) | 104 return make_stat_info(dir_stat) |
| 68 return self._MemoizedStatAsyncFromFileSystem(dir_path).Then(next) | 105 return self._MemoizedStatAsyncFromFileSystem(dir_path).Then(next) |
| 69 | 106 |
| 70 @memoize | 107 @memoize |
| 71 def _MemoizedStatAsyncFromFileSystem(self, dir_path): | 108 def _MemoizedStatAsyncFromFileSystem(self, dir_path): |
| 72 '''This is a simple wrapper to memoize Futures to directory stats, since | 109 '''This is a simple wrapper to memoize Futures to directory stats, since |
| 73 StatAsync makes heavy use of it. Only cache directories so that the | 110 StatAsync makes heavy use of it. Only cache directories so that the |
| 74 memoized cache doesn't blow up. | 111 memoized cache doesn't blow up. |
| 75 ''' | 112 ''' |
| 76 assert IsDirectory(dir_path) | 113 assert IsDirectory(dir_path) |
| 77 return self._file_system.StatAsync(dir_path) | 114 return self._file_system.StatAsync(dir_path) |
| 78 | 115 |
| 79 def Read(self, paths, skip_not_found=False): | 116 def Read(self, paths, skip_not_found=False): |
| 80 '''Reads a list of files. If a file is cached and it is not out of | 117 '''Reads a list of files. If a file is cached and it is not out of |
| 81 date, it is returned. Otherwise, the file is retrieved from the file system. | 118 date, it is returned. Otherwise, the file is retrieved from the file system. |
| 82 ''' | 119 ''' |
| 83 # Files which aren't found are cached in the read object store as | 120 # Files which aren't found are cached in the read object store as |
| 84 # (path, None, None). This is to prevent re-reads of files we know | 121 # (path, None, None). This is to prevent re-reads of files we know |
| 85 # do not exist. | 122 # do not exist. |
| 86 cached_read_values = self._read_cache.GetMulti(paths).Get() | 123 cached_read_values = self._read_cache.GetMulti(paths).Get() |
| 87 cached_stat_values = self._stat_cache.GetMulti(paths).Get() | 124 cached_stat_info = self._stat_cache.GetMulti(paths).Get() |
| 88 | 125 |
| 89 # Populate a map of paths to Futures to their stat. They may have already | 126 # Populate a map of paths to Futures to their stat. They may have already |
| 90 # been cached in which case their Future will already have been constructed | 127 # been cached in which case their Future will already have been constructed |
| 91 # with a value. | 128 # with a value. |
| 92 stat_futures = {} | 129 stat_futures = {} |
| 93 | 130 |
| 94 def handle(error): | 131 def handle(error): |
| 95 if isinstance(error, FileNotFoundError): | 132 if isinstance(error, FileNotFoundError): |
| 96 return None | 133 return None |
| 97 raise error | 134 raise error |
| 98 | 135 |
| 99 for path in paths: | 136 for path in paths: |
| 100 stat_value = cached_stat_values.get(path) | 137 stat_info = cached_stat_info.get(path) |
| 101 if stat_value is None: | 138 if stat_info is None: |
| 102 stat_future = self.StatAsync(path) | 139 stat_future = self.StatAsync(path) |
| 103 if skip_not_found: | 140 if skip_not_found: |
| 104 stat_future = stat_future.Then(lambda x: x, handle) | 141 stat_future = stat_future.Then(lambda x: x, handle) |
| 105 else: | 142 else: |
| 106 stat_future = Future(value=stat_value) | 143 stat_future = Future(value=stat_info) |
| 107 stat_futures[path] = stat_future | 144 stat_futures[path] = stat_future |
| 108 | 145 |
| 109 # Filter only the cached data which is up to date by comparing to the latest | 146 # Filter only the cached data which is up to date by comparing to the latest |
| 110 # stat. The cached read data includes the cached version. Remove it for | 147 # stat. The cached read data includes the cached version. Remove it for |
| 111 # the result returned to callers. |version| == None implies a non-existent | 148 # the result returned to callers. |version| == None implies a non-existent |
| 112 # file, so skip it. | 149 # file, so skip it. |
| 113 up_to_date_data = dict( | 150 up_to_date_data = dict( |
| 114 (path, data) for path, (data, version) in cached_read_values.iteritems() | 151 (path, data) for path, (data, version) in cached_read_values.iteritems() |
| 115 if version is not None and stat_futures[path].Get().version == version) | 152 if version is not None and stat_futures[path].Get().version == version) |
| 116 | 153 |
| (...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 162 for f in self.ReadSingle(root).Get(): | 199 for f in self.ReadSingle(root).Get(): |
| 163 if IsDirectory(f): | 200 if IsDirectory(f): |
| 164 dirs.append(f) | 201 dirs.append(f) |
| 165 else: | 202 else: |
| 166 files.append(f) | 203 files.append(f) |
| 167 # Update the cache. This is a root -> (dirs, files, version) mapping. | 204 # Update the cache. This is a root -> (dirs, files, version) mapping. |
| 168 self._walk_cache.Set(root, (dirs, files, root_stat.version)) | 205 self._walk_cache.Set(root, (dirs, files, root_stat.version)) |
| 169 return dirs, files | 206 return dirs, files |
| 170 return self._file_system.Walk(root, depth=depth, file_lister=file_lister) | 207 return self._file_system.Walk(root, depth=depth, file_lister=file_lister) |
| 171 | 208 |
| 172 def GetCommitID(self): | |
| 173 return self._file_system.GetCommitID() | |
| 174 | |
| 175 def GetPreviousCommitID(self): | |
| 176 return self._file_system.GetPreviousCommitID() | |
| 177 | |
| 178 def GetIdentity(self): | 209 def GetIdentity(self): |
| 179 return self._file_system.GetIdentity() | 210 return self._file_system.GetIdentity() |
| 180 | 211 |
| 181 def __repr__(self): | 212 def __repr__(self): |
| 182 return '%s of <%s>' % (type(self).__name__, repr(self._file_system)) | 213 return '%s of <%s>' % (type(self).__name__, repr(self._file_system)) |
| OLD | NEW |