OLD | NEW |
1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 | 4 |
5 import posixpath | 5 import posixpath |
6 import sys | 6 import sys |
7 | 7 |
8 from file_system import FileSystem, StatInfo, FileNotFoundError | 8 from file_system import FileSystem, StatInfo, FileNotFoundError |
9 from future import All, Future | 9 from future import All, Future |
10 from path_util import AssertIsDirectory, IsDirectory, ToDirectory | 10 from path_util import AssertIsDirectory, IsDirectory, ToDirectory |
11 from third_party.json_schema_compiler.memoize import memoize | 11 from third_party.json_schema_compiler.memoize import memoize |
12 | 12 |
13 | 13 |
| 14 |
| 15 class CacheMissError(Exception): |
| 16 '''Raise when data is not found in a CachingFileSystem which is not allowed |
| 17 to reference its backing FileSystem.''' |
| 18 def __init__(self, message): |
| 19 Exception.__init__(self, message) |
| 20 |
| 21 |
| 22 |
14 class CachingFileSystem(FileSystem): | 23 class CachingFileSystem(FileSystem): |
15 '''FileSystem which implements a caching layer on top of |file_system|. It's | 24 '''FileSystem which implements a caching layer on top of |file_system|. If |
16 smart, using Stat() to decided whether to skip Read()ing from |file_system|, | 25 |fail_on_miss| is True then cache misses throw a CacheMissError rather than |
17 and only Stat()ing directories never files. | 26 falling back onto the underlying FileSystem. |
| 27 |
| 28 If |empty_stat_cache| is True (default), its stat cache is initialized empty. |
| 29 This should be set to False when wrapping a FileSystem that makes a proper |
| 30 distinction between stable identity and unstable identity. |
| 31 |
| 32 The working assumption is that a file system's unstable identity changes any |
| 33 time any contents of the file system change, and therefore a stat cache keyed |
| 34 on unstable identity will never need to be refreshed. |
18 ''' | 35 ''' |
19 def __init__(self, file_system, object_store_creator): | 36 def __init__(self, |
| 37 file_system, |
| 38 object_store_creator, |
| 39 fail_on_miss=False, |
| 40 empty_stat_cache=True): |
20 self._file_system = file_system | 41 self._file_system = file_system |
21 def create_object_store(category, **optargs): | 42 self._fail_on_miss = fail_on_miss |
| 43 def create_object_store(category, use_stable_identity=True, **optargs): |
| 44 if use_stable_identity: |
| 45 identity = file_system.GetStableIdentity() |
| 46 else: |
| 47 identity = file_system.GetUnstableIdentity() |
22 return object_store_creator.Create( | 48 return object_store_creator.Create( |
23 CachingFileSystem, | 49 CachingFileSystem, |
24 category='%s/%s' % (file_system.GetIdentity(), category), | 50 category='%s/%s' % (identity, category), |
25 **optargs) | 51 **optargs) |
26 self._stat_cache = create_object_store('stat') | 52 # The stable stat cache caches file stat info keyed by the file system's |
| 53 # stable identity (or unstable identity for persistent caches). |
| 54 self._stat_cache = create_object_store('stat', |
| 55 use_stable_identity=empty_stat_cache, |
| 56 start_empty=empty_stat_cache) |
27 # The read caches can start populated (start_empty=False) because file | 57 # The read caches can start populated (start_empty=False) because file |
28 # updates are picked up by the stat, so it doesn't need the force-refresh | 58 # updates are picked up by the stat, so it doesn't need the force-refresh |
29 # which starting empty is designed for. Without this optimisation, cron | 59 # which starting empty is designed for. Without this optimisation, cron |
30 # runs are extra slow. | 60 # runs are extra slow. |
31 self._read_cache = create_object_store('read', start_empty=False) | 61 self._read_cache = create_object_store('read', start_empty=False) |
32 self._walk_cache = create_object_store('walk', start_empty=False) | 62 self._walk_cache = create_object_store('walk', start_empty=False) |
33 | 63 |
34 def Refresh(self): | 64 def Refresh(self): |
35 return self._file_system.Refresh() | 65 return self._file_system.Refresh() |
36 | 66 |
(...skipping 12 matching lines...) Expand all Loading... |
49 ''' | 79 ''' |
50 if path == dir_path: | 80 if path == dir_path: |
51 return dir_stat | 81 return dir_stat |
52 # Was a file stat. Extract that file. | 82 # Was a file stat. Extract that file. |
53 file_version = dir_stat.child_versions.get(file_path) | 83 file_version = dir_stat.child_versions.get(file_path) |
54 if file_version is None: | 84 if file_version is None: |
55 raise FileNotFoundError('No stat found for %s in %s (found %s)' % | 85 raise FileNotFoundError('No stat found for %s in %s (found %s)' % |
56 (path, dir_path, dir_stat.child_versions)) | 86 (path, dir_path, dir_stat.child_versions)) |
57 return StatInfo(file_version) | 87 return StatInfo(file_version) |
58 | 88 |
| 89 def raise_cache_miss(path): |
| 90 raise FileNotFoundError('Got cache miss when trying to stat %s' % path) |
| 91 |
59 dir_stat = self._stat_cache.Get(dir_path).Get() | 92 dir_stat = self._stat_cache.Get(dir_path).Get() |
60 if dir_stat is not None: | 93 if dir_stat is not None: |
61 return Future(callback=lambda: make_stat_info(dir_stat)) | 94 return Future(callback=lambda: make_stat_info(dir_stat)) |
62 | 95 |
| 96 if self._fail_on_miss: |
| 97 logging.info('Bailing on stat cache miss for %s' % dir_path) |
| 98 return Future(callback=lambda: raise_cache_miss(dir_path)) |
| 99 |
63 def next(dir_stat): | 100 def next(dir_stat): |
64 assert dir_stat is not None # should have raised a FileNotFoundError | 101 assert dir_stat is not None # should have raised a FileNotFoundError |
65 # We only ever need to cache the dir stat. | 102 # We only ever need to cache the dir stat. |
66 self._stat_cache.Set(dir_path, dir_stat) | 103 self._stat_cache.Set(dir_path, dir_stat) |
67 return make_stat_info(dir_stat) | 104 return make_stat_info(dir_stat) |
68 return self._MemoizedStatAsyncFromFileSystem(dir_path).Then(next) | 105 return self._MemoizedStatAsyncFromFileSystem(dir_path).Then(next) |
69 | 106 |
70 @memoize | 107 @memoize |
71 def _MemoizedStatAsyncFromFileSystem(self, dir_path): | 108 def _MemoizedStatAsyncFromFileSystem(self, dir_path): |
72 '''This is a simple wrapper to memoize Futures to directory stats, since | 109 '''This is a simple wrapper to memoize Futures to directory stats, since |
73 StatAsync makes heavy use of it. Only cache directories so that the | 110 StatAsync makes heavy use of it. Only cache directories so that the |
74 memoized cache doesn't blow up. | 111 memoized cache doesn't blow up. |
75 ''' | 112 ''' |
76 assert IsDirectory(dir_path) | 113 assert IsDirectory(dir_path) |
77 return self._file_system.StatAsync(dir_path) | 114 return self._file_system.StatAsync(dir_path) |
78 | 115 |
79 def Read(self, paths, skip_not_found=False): | 116 def Read(self, paths, skip_not_found=False): |
80 '''Reads a list of files. If a file is cached and it is not out of | 117 '''Reads a list of files. If a file is cached and it is not out of |
81 date, it is returned. Otherwise, the file is retrieved from the file system. | 118 date, it is returned. Otherwise, the file is retrieved from the file system. |
82 ''' | 119 ''' |
83 # Files which aren't found are cached in the read object store as | 120 # Files which aren't found are cached in the read object store as |
84 # (path, None, None). This is to prevent re-reads of files we know | 121 # (path, None, None). This is to prevent re-reads of files we know |
85 # do not exist. | 122 # do not exist. |
86 cached_read_values = self._read_cache.GetMulti(paths).Get() | 123 cached_read_values = self._read_cache.GetMulti(paths).Get() |
87 cached_stat_values = self._stat_cache.GetMulti(paths).Get() | 124 cached_stat_info = self._stat_cache.GetMulti(paths).Get() |
88 | 125 |
89 # Populate a map of paths to Futures to their stat. They may have already | 126 # Populate a map of paths to Futures to their stat. They may have already |
90 # been cached in which case their Future will already have been constructed | 127 # been cached in which case their Future will already have been constructed |
91 # with a value. | 128 # with a value. |
92 stat_futures = {} | 129 stat_futures = {} |
93 | 130 |
94 def handle(error): | 131 def handle(error): |
95 if isinstance(error, FileNotFoundError): | 132 if isinstance(error, FileNotFoundError): |
96 return None | 133 return None |
97 raise error | 134 raise error |
98 | 135 |
99 for path in paths: | 136 for path in paths: |
100 stat_value = cached_stat_values.get(path) | 137 stat_info = cached_stat_info.get(path) |
101 if stat_value is None: | 138 if stat_info is None: |
102 stat_future = self.StatAsync(path) | 139 stat_future = self.StatAsync(path) |
103 if skip_not_found: | 140 if skip_not_found: |
104 stat_future = stat_future.Then(lambda x: x, handle) | 141 stat_future = stat_future.Then(lambda x: x, handle) |
105 else: | 142 else: |
106 stat_future = Future(value=stat_value) | 143 stat_future = Future(value=stat_info) |
107 stat_futures[path] = stat_future | 144 stat_futures[path] = stat_future |
108 | 145 |
109 # Filter only the cached data which is up to date by comparing to the latest | 146 # Filter only the cached data which is up to date by comparing to the latest |
110 # stat. The cached read data includes the cached version. Remove it for | 147 # stat. The cached read data includes the cached version. Remove it for |
111 # the result returned to callers. |version| == None implies a non-existent | 148 # the result returned to callers. |version| == None implies a non-existent |
112 # file, so skip it. | 149 # file, so skip it. |
113 up_to_date_data = dict( | 150 up_to_date_data = dict( |
114 (path, data) for path, (data, version) in cached_read_values.iteritems() | 151 (path, data) for path, (data, version) in cached_read_values.iteritems() |
115 if version is not None and stat_futures[path].Get().version == version) | 152 if version is not None and stat_futures[path].Get().version == version) |
116 | 153 |
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
162 for f in self.ReadSingle(root).Get(): | 199 for f in self.ReadSingle(root).Get(): |
163 if IsDirectory(f): | 200 if IsDirectory(f): |
164 dirs.append(f) | 201 dirs.append(f) |
165 else: | 202 else: |
166 files.append(f) | 203 files.append(f) |
167 # Update the cache. This is a root -> (dirs, files, version) mapping. | 204 # Update the cache. This is a root -> (dirs, files, version) mapping. |
168 self._walk_cache.Set(root, (dirs, files, root_stat.version)) | 205 self._walk_cache.Set(root, (dirs, files, root_stat.version)) |
169 return dirs, files | 206 return dirs, files |
170 return self._file_system.Walk(root, depth=depth, file_lister=file_lister) | 207 return self._file_system.Walk(root, depth=depth, file_lister=file_lister) |
171 | 208 |
172 def GetCommitID(self): | |
173 return self._file_system.GetCommitID() | |
174 | |
175 def GetPreviousCommitID(self): | |
176 return self._file_system.GetPreviousCommitID() | |
177 | |
178 def GetIdentity(self): | 209 def GetIdentity(self): |
179 return self._file_system.GetIdentity() | 210 return self._file_system.GetIdentity() |
180 | 211 |
181 def __repr__(self): | 212 def __repr__(self): |
182 return '%s of <%s>' % (type(self).__name__, repr(self._file_system)) | 213 return '%s of <%s>' % (type(self).__name__, repr(self._file_system)) |
OLD | NEW |