OLD | NEW |
---|---|
1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 | 4 |
5 import logging | |
5 import posixpath | 6 import posixpath |
6 import sys | 7 import sys |
7 | 8 |
8 from file_system import FileSystem, StatInfo, FileNotFoundError | 9 from file_system import FileSystem, StatInfo, FileNotFoundError |
9 from future import All, Future | 10 from future import All, Future |
10 from path_util import AssertIsDirectory, IsDirectory, ToDirectory | 11 from path_util import AssertIsDirectory, IsDirectory, ToDirectory |
11 from third_party.json_schema_compiler.memoize import memoize | 12 from third_party.json_schema_compiler.memoize import memoize |
12 | 13 |
13 | 14 |
14 class CachingFileSystem(FileSystem): | 15 class CachingFileSystem(FileSystem): |
15 '''FileSystem which implements a caching layer on top of |file_system|. It's | 16 '''FileSystem which implements a caching layer on top of |file_system|. If |
16 smart, using Stat() to decided whether to skip Read()ing from |file_system|, | 17 |fail_on_miss| is True then cache misses throw a FileNotFoundError rather than |
17 and only Stat()ing directories never files. | 18 falling back onto the underlying FileSystem. |
19 | |
20 If the underlying FileSystem is versioned (i.e., it implements GetVersion to | |
21 return something other than None), this will create a persistent stat cache | |
22 (keyed on the FileSystem instance's version) as an additional optimization. | |
18 ''' | 23 ''' |
19 def __init__(self, file_system, object_store_creator): | 24 def __init__(self, file_system, object_store_creator, fail_on_miss=False): |
20 self._file_system = file_system | 25 self._file_system = file_system |
21 def create_object_store(category, **optargs): | 26 self._fail_on_miss = fail_on_miss |
27 def create_object_store(category, try_versioning=False, **optargs): | |
28 version = file_system.GetVersion() | |
not at google - send to devlin
2014/10/24 00:04:50
It seems like these changes should be reflected in
Ken Rockot(use gerrit already)
2014/10/24 21:26:53
Done. Added a test to verify caching behavior on v
| |
29 versioned = try_versioning and version is not None | |
not at google - send to devlin
2014/10/24 00:04:50
And do you need to update CompiledFileSystem at al
Ken Rockot(use gerrit already)
2014/10/24 21:26:53
I don't think so? This change only affects the cac
| |
30 if versioned: | |
31 identity = '%s/%s' % (file_system.GetIdentity(), version) | |
32 else: | |
33 identity = file_system.GetIdentity() | |
34 optargs['start_empty'] = optargs.get('start_empty', not versioned) | |
Ken Rockot(use gerrit already)
2014/10/23 22:36:16
This is gross. Is there a more clever way to expre
not at google - send to devlin
2014/10/24 00:04:50
Maybe only have a single flag like "controls_versi
Ken Rockot(use gerrit already)
2014/10/24 21:26:53
OK - Leaving as-is then.
| |
22 return object_store_creator.Create( | 35 return object_store_creator.Create( |
23 CachingFileSystem, | 36 CachingFileSystem, |
24 category='%s/%s' % (file_system.GetIdentity(), category), | 37 category='%s/%s' % (identity, category), |
25 **optargs) | 38 **optargs) |
26 self._stat_cache = create_object_store('stat') | 39 self._stat_cache = create_object_store('stat', try_versioning=True) |
27 # The read caches can start populated (start_empty=False) because file | 40 # The read caches can start populated (start_empty=False) because file |
28 # updates are picked up by the stat, so it doesn't need the force-refresh | 41 # updates are picked up by the stat, so it doesn't need the force-refresh |
29 # which starting empty is designed for. Without this optimisation, cron | 42 # which starting empty is designed for. Without this optimisation, cron |
30 # runs are extra slow. | 43 # runs are extra slow. |
31 self._read_cache = create_object_store('read', start_empty=False) | 44 self._read_cache = create_object_store('read', start_empty=False) |
32 self._walk_cache = create_object_store('walk', start_empty=False) | 45 self._walk_cache = create_object_store('walk', start_empty=False) |
33 | 46 |
34 def Refresh(self): | 47 def Refresh(self): |
35 return self._file_system.Refresh() | 48 return self._file_system.Refresh() |
36 | 49 |
(...skipping 12 matching lines...) Expand all Loading... | |
49 ''' | 62 ''' |
50 if path == dir_path: | 63 if path == dir_path: |
51 return dir_stat | 64 return dir_stat |
52 # Was a file stat. Extract that file. | 65 # Was a file stat. Extract that file. |
53 file_version = dir_stat.child_versions.get(file_path) | 66 file_version = dir_stat.child_versions.get(file_path) |
54 if file_version is None: | 67 if file_version is None: |
55 raise FileNotFoundError('No stat found for %s in %s (found %s)' % | 68 raise FileNotFoundError('No stat found for %s in %s (found %s)' % |
56 (path, dir_path, dir_stat.child_versions)) | 69 (path, dir_path, dir_stat.child_versions)) |
57 return StatInfo(file_version) | 70 return StatInfo(file_version) |
58 | 71 |
72 def raise_cache_miss(path): | |
73 raise FileNotFoundError('Got cache miss when trying to stat %s' % path) | |
74 | |
59 dir_stat = self._stat_cache.Get(dir_path).Get() | 75 dir_stat = self._stat_cache.Get(dir_path).Get() |
60 if dir_stat is not None: | 76 if dir_stat is not None: |
61 return Future(callback=lambda: make_stat_info(dir_stat)) | 77 return Future(callback=lambda: make_stat_info(dir_stat)) |
62 | 78 |
79 if self._fail_on_miss: | |
80 logging.warning('Bailing on stat cache miss for %s' % dir_path) | |
81 return Future(callback=lambda: raise_cache_miss(dir_path)) | |
82 | |
63 def next(dir_stat): | 83 def next(dir_stat): |
64 assert dir_stat is not None # should have raised a FileNotFoundError | 84 assert dir_stat is not None # should have raised a FileNotFoundError |
65 # We only ever need to cache the dir stat. | 85 # We only ever need to cache the dir stat. |
66 self._stat_cache.Set(dir_path, dir_stat) | 86 self._stat_cache.Set(dir_path, dir_stat) |
67 return make_stat_info(dir_stat) | 87 return make_stat_info(dir_stat) |
68 return self._MemoizedStatAsyncFromFileSystem(dir_path).Then(next) | 88 return self._MemoizedStatAsyncFromFileSystem(dir_path).Then(next) |
69 | 89 |
70 @memoize | 90 @memoize |
71 def _MemoizedStatAsyncFromFileSystem(self, dir_path): | 91 def _MemoizedStatAsyncFromFileSystem(self, dir_path): |
72 '''This is a simple wrapper to memoize Futures to directory stats, since | 92 '''This is a simple wrapper to memoize Futures to directory stats, since |
(...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
162 for f in self.ReadSingle(root).Get(): | 182 for f in self.ReadSingle(root).Get(): |
163 if IsDirectory(f): | 183 if IsDirectory(f): |
164 dirs.append(f) | 184 dirs.append(f) |
165 else: | 185 else: |
166 files.append(f) | 186 files.append(f) |
167 # Update the cache. This is a root -> (dirs, files, version) mapping. | 187 # Update the cache. This is a root -> (dirs, files, version) mapping. |
168 self._walk_cache.Set(root, (dirs, files, root_stat.version)) | 188 self._walk_cache.Set(root, (dirs, files, root_stat.version)) |
169 return dirs, files | 189 return dirs, files |
170 return self._file_system.Walk(root, depth=depth, file_lister=file_lister) | 190 return self._file_system.Walk(root, depth=depth, file_lister=file_lister) |
171 | 191 |
172 def GetCommitID(self): | |
173 return self._file_system.GetCommitID() | |
174 | |
175 def GetPreviousCommitID(self): | |
176 return self._file_system.GetPreviousCommitID() | |
177 | |
178 def GetIdentity(self): | 192 def GetIdentity(self): |
179 return self._file_system.GetIdentity() | 193 return self._file_system.GetIdentity() |
180 | 194 |
195 def GetVersion(self): | |
196 return self._file_system.GetVersion() | |
197 | |
181 def __repr__(self): | 198 def __repr__(self): |
182 return '%s of <%s>' % (type(self).__name__, repr(self._file_system)) | 199 return '%s of <%s>' % (type(self).__name__, repr(self._file_system)) |
OLD | NEW |