Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(551)

Side by Side Diff: chrome/common/extensions/docs/server2/caching_file_system.py

Issue 660383002: Docserver: Persist stat cache for versioned file systems (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: change caching strategy, better refresh cycle synchronization Created 6 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 import posixpath 5 import posixpath
6 import sys 6 import sys
7 7
8 from file_system import FileSystem, StatInfo, FileNotFoundError 8 from file_system import FileSystem, StatInfo, FileNotFoundError
9 from future import All, Future 9 from future import All, Future
10 from path_util import AssertIsDirectory, IsDirectory, ToDirectory 10 from path_util import AssertIsDirectory, IsDirectory, ToDirectory
11 from third_party.json_schema_compiler.memoize import memoize 11 from third_party.json_schema_compiler.memoize import memoize
12 12
13 13
14
15 class CacheMissError(Exception):
16 '''Raise when data is not found in a CachingFileSystem which is not allowed
17 to reference its backing FileSystem.'''
18 def __init__(self, message):
19 Exception.__init__(self, message)
20
21
22
14 class CachingFileSystem(FileSystem): 23 class CachingFileSystem(FileSystem):
15 '''FileSystem which implements a caching layer on top of |file_system|. It's 24 '''FileSystem which implements a caching layer on top of |file_system|. If
16 smart, using Stat() to decided whether to skip Read()ing from |file_system|, 25 |fail_on_miss| is True then cache misses throw a CacheMissError rather than
17 and only Stat()ing directories never files. 26 falling back onto the underlying FileSystem.
27
28 If |empty_stat_cache| is True (default), its stat cache is initialized empty.
29 This should be set to False when wrapping a FileSystem that makes a proper
30 distinction between stable identity and unstable identity.
31
32 The working assumption is that a file system's unstable identity changes any
33 time any contents of the file system change, and therefore a stat cache keyed
34 on unstable identity will never need to be refreshed.
18 ''' 35 '''
19 def __init__(self, file_system, object_store_creator): 36 def __init__(self,
37 file_system,
38 object_store_creator,
39 fail_on_miss=False,
40 empty_stat_cache=True):
20 self._file_system = file_system 41 self._file_system = file_system
21 def create_object_store(category, **optargs): 42 self._fail_on_miss = fail_on_miss
43 def create_object_store(category, use_stable_identity=True, **optargs):
44 if use_stable_identity:
45 identity = file_system.GetStableIdentity()
46 else:
47 identity = file_system.GetUnstableIdentity()
22 return object_store_creator.Create( 48 return object_store_creator.Create(
23 CachingFileSystem, 49 CachingFileSystem,
24 category='%s/%s' % (file_system.GetIdentity(), category), 50 category='%s/%s' % (identity, category),
25 **optargs) 51 **optargs)
26 self._stat_cache = create_object_store('stat') 52 # The stable stat cache caches file stat info keyed by the file system's
53 # stable identity (or unstable identity for persistent caches).
54 self._stat_cache = create_object_store('stat',
55 use_stable_identity=empty_stat_cache,
56 start_empty=empty_stat_cache)
27 # The read caches can start populated (start_empty=False) because file 57 # The read caches can start populated (start_empty=False) because file
28 # updates are picked up by the stat, so it doesn't need the force-refresh 58 # updates are picked up by the stat, so it doesn't need the force-refresh
29 # which starting empty is designed for. Without this optimisation, cron 59 # which starting empty is designed for. Without this optimisation, cron
30 # runs are extra slow. 60 # runs are extra slow.
31 self._read_cache = create_object_store('read', start_empty=False) 61 self._read_cache = create_object_store('read', start_empty=False)
32 self._walk_cache = create_object_store('walk', start_empty=False) 62 self._walk_cache = create_object_store('walk', start_empty=False)
33 63
34 def Refresh(self): 64 def Refresh(self):
35 return self._file_system.Refresh() 65 return self._file_system.Refresh()
36 66
(...skipping 12 matching lines...) Expand all
49 ''' 79 '''
50 if path == dir_path: 80 if path == dir_path:
51 return dir_stat 81 return dir_stat
52 # Was a file stat. Extract that file. 82 # Was a file stat. Extract that file.
53 file_version = dir_stat.child_versions.get(file_path) 83 file_version = dir_stat.child_versions.get(file_path)
54 if file_version is None: 84 if file_version is None:
55 raise FileNotFoundError('No stat found for %s in %s (found %s)' % 85 raise FileNotFoundError('No stat found for %s in %s (found %s)' %
56 (path, dir_path, dir_stat.child_versions)) 86 (path, dir_path, dir_stat.child_versions))
57 return StatInfo(file_version) 87 return StatInfo(file_version)
58 88
89 def raise_cache_miss(path):
90 raise FileNotFoundError('Got cache miss when trying to stat %s' % path)
91
59 dir_stat = self._stat_cache.Get(dir_path).Get() 92 dir_stat = self._stat_cache.Get(dir_path).Get()
60 if dir_stat is not None: 93 if dir_stat is not None:
61 return Future(callback=lambda: make_stat_info(dir_stat)) 94 return Future(callback=lambda: make_stat_info(dir_stat))
62 95
96 if self._fail_on_miss:
97 logging.info('Bailing on stat cache miss for %s' % dir_path)
98 return Future(callback=lambda: raise_cache_miss(dir_path))
99
63 def next(dir_stat): 100 def next(dir_stat):
64 assert dir_stat is not None # should have raised a FileNotFoundError 101 assert dir_stat is not None # should have raised a FileNotFoundError
65 # We only ever need to cache the dir stat. 102 # We only ever need to cache the dir stat.
66 self._stat_cache.Set(dir_path, dir_stat) 103 self._stat_cache.Set(dir_path, dir_stat)
67 return make_stat_info(dir_stat) 104 return make_stat_info(dir_stat)
68 return self._MemoizedStatAsyncFromFileSystem(dir_path).Then(next) 105 return self._MemoizedStatAsyncFromFileSystem(dir_path).Then(next)
69 106
70 @memoize 107 @memoize
71 def _MemoizedStatAsyncFromFileSystem(self, dir_path): 108 def _MemoizedStatAsyncFromFileSystem(self, dir_path):
72 '''This is a simple wrapper to memoize Futures to directory stats, since 109 '''This is a simple wrapper to memoize Futures to directory stats, since
73 StatAsync makes heavy use of it. Only cache directories so that the 110 StatAsync makes heavy use of it. Only cache directories so that the
74 memoized cache doesn't blow up. 111 memoized cache doesn't blow up.
75 ''' 112 '''
76 assert IsDirectory(dir_path) 113 assert IsDirectory(dir_path)
77 return self._file_system.StatAsync(dir_path) 114 return self._file_system.StatAsync(dir_path)
78 115
79 def Read(self, paths, skip_not_found=False): 116 def Read(self, paths, skip_not_found=False):
80 '''Reads a list of files. If a file is cached and it is not out of 117 '''Reads a list of files. If a file is cached and it is not out of
81 date, it is returned. Otherwise, the file is retrieved from the file system. 118 date, it is returned. Otherwise, the file is retrieved from the file system.
82 ''' 119 '''
83 # Files which aren't found are cached in the read object store as 120 # Files which aren't found are cached in the read object store as
84 # (path, None, None). This is to prevent re-reads of files we know 121 # (path, None, None). This is to prevent re-reads of files we know
85 # do not exist. 122 # do not exist.
86 cached_read_values = self._read_cache.GetMulti(paths).Get() 123 cached_read_values = self._read_cache.GetMulti(paths).Get()
87 cached_stat_values = self._stat_cache.GetMulti(paths).Get() 124 cached_stat_info = self._stat_cache.GetMulti(paths).Get()
88 125
89 # Populate a map of paths to Futures to their stat. They may have already 126 # Populate a map of paths to Futures to their stat. They may have already
90 # been cached in which case their Future will already have been constructed 127 # been cached in which case their Future will already have been constructed
91 # with a value. 128 # with a value.
92 stat_futures = {} 129 stat_futures = {}
93 130
94 def handle(error): 131 def handle(error):
95 if isinstance(error, FileNotFoundError): 132 if isinstance(error, FileNotFoundError):
96 return None 133 return None
97 raise error 134 raise error
98 135
99 for path in paths: 136 for path in paths:
100 stat_value = cached_stat_values.get(path) 137 stat_info = cached_stat_info.get(path)
101 if stat_value is None: 138 if stat_info is None:
102 stat_future = self.StatAsync(path) 139 stat_future = self.StatAsync(path)
103 if skip_not_found: 140 if skip_not_found:
104 stat_future = stat_future.Then(lambda x: x, handle) 141 stat_future = stat_future.Then(lambda x: x, handle)
105 else: 142 else:
106 stat_future = Future(value=stat_value) 143 stat_future = Future(value=stat_info)
107 stat_futures[path] = stat_future 144 stat_futures[path] = stat_future
108 145
109 # Filter only the cached data which is up to date by comparing to the latest 146 # Filter only the cached data which is up to date by comparing to the latest
110 # stat. The cached read data includes the cached version. Remove it for 147 # stat. The cached read data includes the cached version. Remove it for
111 # the result returned to callers. |version| == None implies a non-existent 148 # the result returned to callers. |version| == None implies a non-existent
112 # file, so skip it. 149 # file, so skip it.
113 up_to_date_data = dict( 150 up_to_date_data = dict(
114 (path, data) for path, (data, version) in cached_read_values.iteritems() 151 (path, data) for path, (data, version) in cached_read_values.iteritems()
115 if version is not None and stat_futures[path].Get().version == version) 152 if version is not None and stat_futures[path].Get().version == version)
116 153
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after
162 for f in self.ReadSingle(root).Get(): 199 for f in self.ReadSingle(root).Get():
163 if IsDirectory(f): 200 if IsDirectory(f):
164 dirs.append(f) 201 dirs.append(f)
165 else: 202 else:
166 files.append(f) 203 files.append(f)
167 # Update the cache. This is a root -> (dirs, files, version) mapping. 204 # Update the cache. This is a root -> (dirs, files, version) mapping.
168 self._walk_cache.Set(root, (dirs, files, root_stat.version)) 205 self._walk_cache.Set(root, (dirs, files, root_stat.version))
169 return dirs, files 206 return dirs, files
170 return self._file_system.Walk(root, depth=depth, file_lister=file_lister) 207 return self._file_system.Walk(root, depth=depth, file_lister=file_lister)
171 208
172 def GetCommitID(self):
173 return self._file_system.GetCommitID()
174
175 def GetPreviousCommitID(self):
176 return self._file_system.GetPreviousCommitID()
177
178 def GetIdentity(self): 209 def GetIdentity(self):
179 return self._file_system.GetIdentity() 210 return self._file_system.GetIdentity()
180 211
181 def __repr__(self): 212 def __repr__(self):
182 return '%s of <%s>' % (type(self).__name__, repr(self._file_system)) 213 return '%s of <%s>' % (type(self).__name__, repr(self._file_system))
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698