OLD | NEW |
1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 | 4 |
5 import logging | 5 import logging |
6 import posixpath | 6 import posixpath |
7 import sys | 7 import sys |
8 | 8 |
9 from file_system import FileSystem, StatInfo, FileNotFoundError | 9 from file_system import FileSystem, StatInfo, FileNotFoundError |
10 from future import All, Future | 10 from future import All, Future |
11 from path_util import AssertIsDirectory, IsDirectory, ToDirectory | 11 from path_util import AssertIsDirectory, IsDirectory, ToDirectory |
12 from third_party.json_schema_compiler.memoize import memoize | 12 from third_party.json_schema_compiler.memoize import memoize |
13 | 13 |
14 | 14 |
15 class CachingFileSystem(FileSystem): | 15 class CachingFileSystem(FileSystem): |
16 '''FileSystem which implements a caching layer on top of |file_system|. If | 16 '''FileSystem which implements a caching layer on top of |file_system|. If |
17 |fail_on_miss| is True then cache misses throw a FileNotFoundError rather than | 17 |fail_on_miss| is True then cache misses throw a FileNotFoundError rather than |
18 falling back onto the underlying FileSystem. | 18 falling back onto the underlying FileSystem. |
19 | |
20 If the underlying FileSystem is versioned (i.e., it implements GetVersion to | |
21 return something other than None), this will create a persistent stat cache | |
22 (keyed on the FileSystem instance's version) as an additional optimization. | |
23 ''' | 19 ''' |
24 def __init__(self, file_system, object_store_creator, fail_on_miss=False): | 20 def __init__(self, file_system, object_store_creator, fail_on_miss=False): |
25 self._file_system = file_system | 21 self._file_system = file_system |
26 self._fail_on_miss = fail_on_miss | 22 self._fail_on_miss = fail_on_miss |
27 def create_object_store(category, try_versioning=False, **optargs): | 23 def create_object_store(category, start_empty=True): |
28 version = file_system.GetVersion() | |
29 versioned = try_versioning and version is not None | |
30 if versioned: | |
31 identity = '%s/%s' % (file_system.GetIdentity(), version) | |
32 else: | |
33 identity = file_system.GetIdentity() | |
34 optargs['start_empty'] = optargs.get('start_empty', not versioned) | |
35 return object_store_creator.Create( | 24 return object_store_creator.Create( |
36 CachingFileSystem, | 25 CachingFileSystem, |
37 category='%s/%s' % (identity, category), | 26 category='%s/%s' % (file_system.GetIdentity(), category), |
38 **optargs) | 27 start_empty=start_empty) |
39 self._stat_cache = create_object_store('stat', try_versioning=True) | 28 # We only start the stat cache empty if |fail_on_miss| is False, i.e. if |
40 # The read caches can start populated (start_empty=False) because file | 29 # we're NOT running on a live instance and we can afford to fall back onto |
41 # updates are picked up by the stat, so it doesn't need the force-refresh | 30 # the underlying FileSystem impl. |
42 # which starting empty is designed for. Without this optimisation, cron | 31 self._stat_cache = create_object_store('stat', start_empty=not fail_on_miss) |
43 # runs are extra slow. | |
44 self._read_cache = create_object_store('read', start_empty=False) | 32 self._read_cache = create_object_store('read', start_empty=False) |
45 self._walk_cache = create_object_store('walk', start_empty=False) | 33 self._walk_cache = create_object_store('walk', start_empty=False) |
46 | 34 |
47 def Refresh(self): | 35 def Refresh(self): |
48 return self._file_system.Refresh() | 36 return self._file_system.Refresh() |
49 | 37 |
50 def StatAsync(self, path): | 38 def StatAsync(self, path): |
51 '''Stats the directory given, or if a file is given, stats the file's parent | 39 '''Stats the directory given, or if a file is given, stats the file's parent |
52 directory to get info about the file. | 40 directory to get info about the file. |
53 ''' | 41 ''' |
(...skipping 16 matching lines...) Expand all Loading... |
70 return StatInfo(file_version) | 58 return StatInfo(file_version) |
71 | 59 |
72 def raise_cache_miss(path): | 60 def raise_cache_miss(path): |
73 raise FileNotFoundError('Got cache miss when trying to stat %s' % path) | 61 raise FileNotFoundError('Got cache miss when trying to stat %s' % path) |
74 | 62 |
75 dir_stat = self._stat_cache.Get(dir_path).Get() | 63 dir_stat = self._stat_cache.Get(dir_path).Get() |
76 if dir_stat is not None: | 64 if dir_stat is not None: |
77 return Future(callback=lambda: make_stat_info(dir_stat)) | 65 return Future(callback=lambda: make_stat_info(dir_stat)) |
78 | 66 |
79 if self._fail_on_miss: | 67 if self._fail_on_miss: |
80 logging.warning('Bailing on stat cache miss for %s' % dir_path) | 68 logging.warning('Bailing on stat cache miss for %s on %s' % |
| 69 (dir_path, self.GetIdentity())) |
81 return Future(callback=lambda: raise_cache_miss(dir_path)) | 70 return Future(callback=lambda: raise_cache_miss(dir_path)) |
82 | 71 |
83 def next(dir_stat): | 72 def next(dir_stat): |
84 assert dir_stat is not None # should have raised a FileNotFoundError | 73 assert dir_stat is not None # should have raised a FileNotFoundError |
85 # We only ever need to cache the dir stat. | 74 # We only ever need to cache the dir stat. |
86 self._stat_cache.Set(dir_path, dir_stat) | 75 self._stat_cache.Set(dir_path, dir_stat) |
87 return make_stat_info(dir_stat) | 76 return make_stat_info(dir_stat) |
88 return self._MemoizedStatAsyncFromFileSystem(dir_path).Then(next) | 77 return self._MemoizedStatAsyncFromFileSystem(dir_path).Then(next) |
89 | 78 |
90 @memoize | 79 @memoize |
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
134 (path, data) for path, (data, version) in cached_read_values.iteritems() | 123 (path, data) for path, (data, version) in cached_read_values.iteritems() |
135 if version is not None and stat_futures[path].Get().version == version) | 124 if version is not None and stat_futures[path].Get().version == version) |
136 | 125 |
137 if skip_not_found: | 126 if skip_not_found: |
138 # Filter out paths which we know do not exist, i.e. if |path| is in | 127 # Filter out paths which we know do not exist, i.e. if |path| is in |
139 # |cached_read_values| *and* has a None version, then it doesn't exist. | 128 # |cached_read_values| *and* has a None version, then it doesn't exist. |
140 # See the above declaration of |cached_read_values| for more information. | 129 # See the above declaration of |cached_read_values| for more information. |
141 paths = [path for path in paths | 130 paths = [path for path in paths |
142 if cached_read_values.get(path, (None, True))[1]] | 131 if cached_read_values.get(path, (None, True))[1]] |
143 | 132 |
144 if len(up_to_date_data) == len(paths): | 133 remaining_paths = set(paths) - set(up_to_date_data.iterkeys()) |
| 134 if len(remaining_paths) == 0: |
145 # Everything was cached and up-to-date. | 135 # Everything was cached and up-to-date. |
146 return Future(value=up_to_date_data) | 136 return Future(value=up_to_date_data) |
147 | 137 |
| 138 def raise_cache_miss(paths): |
| 139 raise FileNotFoundError('Got cache miss when trying to stat %s' % paths) |
| 140 |
| 141 if self._fail_on_miss: |
| 142 # Ignore missing values and return anyway. |
| 143 logging.warn('Read cache miss for %s on %s' % |
| 144 (remaining_paths, self.GetIdentity())) |
| 145 return Future(callback=lambda: raise_cache_miss(remaining_paths)) |
| 146 |
148 def next(new_results): | 147 def next(new_results): |
149 # Update the cache. This is a path -> (data, version) mapping. | 148 # Update the cache. This is a path -> (data, version) mapping. |
150 self._read_cache.SetMulti( | 149 self._read_cache.SetMulti( |
151 dict((path, (new_result, stat_futures[path].Get().version)) | 150 dict((path, (new_result, stat_futures[path].Get().version)) |
152 for path, new_result in new_results.iteritems())) | 151 for path, new_result in new_results.iteritems())) |
153 # Update the read cache to include files that weren't found, to prevent | 152 # Update the read cache to include files that weren't found, to prevent |
154 # constantly trying to read a file we now know doesn't exist. | 153 # constantly trying to read a file we now know doesn't exist. |
155 self._read_cache.SetMulti( | 154 self._read_cache.SetMulti( |
156 dict((path, (None, None)) for path in paths | 155 dict((path, (None, None)) for path in paths |
157 if stat_futures[path].Get() is None)) | 156 if stat_futures[path].Get() is None)) |
158 new_results.update(up_to_date_data) | 157 new_results.update(up_to_date_data) |
159 return new_results | 158 return new_results |
| 159 |
160 # Read in the values that were uncached or old. | 160 # Read in the values that were uncached or old. |
161 return self._file_system.Read(set(paths) - set(up_to_date_data.iterkeys()), | 161 return self._file_system.Read(remaining_paths, |
162 skip_not_found=skip_not_found).Then(next) | 162 skip_not_found=skip_not_found).Then(next) |
163 | 163 |
164 def GetCommitID(self): | 164 def GetCommitID(self): |
165 return self._file_system.GetCommitID() | 165 return self._file_system.GetCommitID() |
166 | 166 |
167 def GetPreviousCommitID(self): | 167 def GetPreviousCommitID(self): |
168 return self._file_system.GetPreviousCommitID() | 168 return self._file_system.GetPreviousCommitID() |
169 | 169 |
170 def Walk(self, root, depth=-1): | 170 def Walk(self, root, depth=-1): |
171 '''Overrides FileSystem.Walk() to provide caching functionality. | 171 '''Overrides FileSystem.Walk() to provide caching functionality. |
(...skipping 18 matching lines...) Expand all Loading... |
190 return self._file_system.Walk(root, depth=depth, file_lister=file_lister) | 190 return self._file_system.Walk(root, depth=depth, file_lister=file_lister) |
191 | 191 |
192 def GetIdentity(self): | 192 def GetIdentity(self): |
193 return self._file_system.GetIdentity() | 193 return self._file_system.GetIdentity() |
194 | 194 |
195 def GetVersion(self): | 195 def GetVersion(self): |
196 return self._file_system.GetVersion() | 196 return self._file_system.GetVersion() |
197 | 197 |
198 def __repr__(self): | 198 def __repr__(self): |
199 return '%s of <%s>' % (type(self).__name__, repr(self._file_system)) | 199 return '%s of <%s>' % (type(self).__name__, repr(self._file_system)) |
OLD | NEW |