Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(77)

Side by Side Diff: chrome/common/extensions/docs/server2/caching_file_system.py

Issue 521453003: Docserver: Override Walk in CachingFileSystem (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 6 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 import posixpath 5 import posixpath
6 import sys 6 import sys
7 7
8 from file_system import FileSystem, StatInfo, FileNotFoundError 8 from file_system import FileSystem, StatInfo, FileNotFoundError
9 from future import Future 9 from future import All, Future
10 from path_util import IsDirectory, ToDirectory 10 from path_util import AssertIsDirectory, IsDirectory, ToDirectory
11 from third_party.json_schema_compiler.memoize import memoize 11 from third_party.json_schema_compiler.memoize import memoize
12 12
13 13
14 class CachingFileSystem(FileSystem): 14 class CachingFileSystem(FileSystem):
15 '''FileSystem which implements a caching layer on top of |file_system|. It's 15 '''FileSystem which implements a caching layer on top of |file_system|. It's
16 smart, using Stat() to decided whether to skip Read()ing from |file_system|, 16 smart, using Stat() to decided whether to skip Read()ing from |file_system|,
17 and only Stat()ing directories never files. 17 and only Stat()ing directories never files.
18 ''' 18 '''
19 def __init__(self, file_system, object_store_creator): 19 def __init__(self, file_system, object_store_creator):
20 self._file_system = file_system 20 self._file_system = file_system
21 def create_object_store(category, **optargs): 21 def create_object_store(category, **optargs):
22 return object_store_creator.Create( 22 return object_store_creator.Create(
23 CachingFileSystem, 23 CachingFileSystem,
24 category='%s/%s' % (file_system.GetIdentity(), category), 24 category='%s/%s' % (file_system.GetIdentity(), category),
25 **optargs) 25 **optargs)
26 self._stat_object_store = create_object_store('stat') 26 self._stat_cache = create_object_store('stat')
27 # The read caches can start populated (start_empty=False) because file 27 # The read caches can start populated (start_empty=False) because file
28 # updates are picked up by the stat, so it doesn't need the force-refresh 28 # updates are picked up by the stat, so it doesn't need the force-refresh
29 # which starting empty is designed for. Without this optimisation, cron 29 # which starting empty is designed for. Without this optimisation, cron
30 # runs are extra slow. 30 # runs are extra slow.
31 self._read_object_store = create_object_store('read', start_empty=False) 31 self._read_cache = create_object_store('read', start_empty=False)
32 self._walk_cache = create_object_store('walk', start_empty=False)
32 33
33 def Refresh(self): 34 def Refresh(self):
34 return self._file_system.Refresh() 35 return self._file_system.Refresh()
35 36
36 def StatAsync(self, path): 37 def StatAsync(self, path):
37 '''Stats the directory given, or if a file is given, stats the file's parent 38 '''Stats the directory given, or if a file is given, stats the file's parent
38 directory to get info about the file. 39 directory to get info about the file.
39 ''' 40 '''
40 # Always stat the parent directory, since it will have the stat of the child 41 # Always stat the parent directory, since it will have the stat of the child
41 # anyway, and this gives us an entire directory's stat info at once. 42 # anyway, and this gives us an entire directory's stat info at once.
42 dir_path, file_path = posixpath.split(path) 43 dir_path, file_path = posixpath.split(path)
43 dir_path = ToDirectory(dir_path) 44 dir_path = ToDirectory(dir_path)
44 45
45 def make_stat_info(dir_stat): 46 def make_stat_info(dir_stat):
46 '''Converts a dir stat into the correct resulting StatInfo; if the Stat 47 '''Converts a dir stat into the correct resulting StatInfo; if the Stat
47 was for a file, the StatInfo should just contain that file. 48 was for a file, the StatInfo should just contain that file.
48 ''' 49 '''
49 if path == dir_path: 50 if path == dir_path:
50 return dir_stat 51 return dir_stat
51 # Was a file stat. Extract that file. 52 # Was a file stat. Extract that file.
52 file_version = dir_stat.child_versions.get(file_path) 53 file_version = dir_stat.child_versions.get(file_path)
53 if file_version is None: 54 if file_version is None:
54 raise FileNotFoundError('No stat found for %s in %s (found %s)' % 55 raise FileNotFoundError('No stat found for %s in %s (found %s)' %
55 (path, dir_path, dir_stat.child_versions)) 56 (path, dir_path, dir_stat.child_versions))
56 return StatInfo(file_version) 57 return StatInfo(file_version)
57 58
58 dir_stat = self._stat_object_store.Get(dir_path).Get() 59 dir_stat = self._stat_cache.Get(dir_path).Get()
59 if dir_stat is not None: 60 if dir_stat is not None:
60 return Future(callback=lambda: make_stat_info(dir_stat)) 61 return Future(callback=lambda: make_stat_info(dir_stat))
61 62
62 def next(dir_stat): 63 def next(dir_stat):
63 assert dir_stat is not None # should have raised a FileNotFoundError 64 assert dir_stat is not None # should have raised a FileNotFoundError
64 # We only ever need to cache the dir stat. 65 # We only ever need to cache the dir stat.
65 self._stat_object_store.Set(dir_path, dir_stat) 66 self._stat_cache.Set(dir_path, dir_stat)
66 return make_stat_info(dir_stat) 67 return make_stat_info(dir_stat)
67 return self._MemoizedStatAsyncFromFileSystem(dir_path).Then(next) 68 return self._MemoizedStatAsyncFromFileSystem(dir_path).Then(next)
68 69
69 @memoize 70 @memoize
70 def _MemoizedStatAsyncFromFileSystem(self, dir_path): 71 def _MemoizedStatAsyncFromFileSystem(self, dir_path):
71 '''This is a simple wrapper to memoize Futures to directory stats, since 72 '''This is a simple wrapper to memoize Futures to directory stats, since
72 StatAsync makes heavy use of it. Only cache directories so that the 73 StatAsync makes heavy use of it. Only cache directories so that the
73 memoized cache doesn't blow up. 74 memoized cache doesn't blow up.
74 ''' 75 '''
75 assert IsDirectory(dir_path) 76 assert IsDirectory(dir_path)
76 return self._file_system.StatAsync(dir_path) 77 return self._file_system.StatAsync(dir_path)
77 78
78 def Read(self, paths, skip_not_found=False): 79 def Read(self, paths, skip_not_found=False):
79 '''Reads a list of files. If a file is cached and it is not out of 80 '''Reads a list of files. If a file is cached and it is not out of
80 date, it is returned. Otherwise, the file is retrieved from the file system. 81 date, it is returned. Otherwise, the file is retrieved from the file system.
81 ''' 82 '''
82 # Files which aren't found are cached in the read object store as 83 # Files which aren't found are cached in the read object store as
83 # (path, None, None). This is to prevent re-reads of files we know 84 # (path, None, None). This is to prevent re-reads of files we know
84 # do not exist. 85 # do not exist.
85 cached_read_values = self._read_object_store.GetMulti(paths).Get() 86 cached_read_values = self._read_cache.GetMulti(paths).Get()
86 cached_stat_values = self._stat_object_store.GetMulti(paths).Get() 87 cached_stat_values = self._stat_cache.GetMulti(paths).Get()
87 88
88 # Populate a map of paths to Futures to their stat. They may have already 89 # Populate a map of paths to Futures to their stat. They may have already
89 # been cached in which case their Future will already have been constructed 90 # been cached in which case their Future will already have been constructed
90 # with a value. 91 # with a value.
91 stat_futures = {} 92 stat_futures = {}
92 93
93 def handle(error): 94 def handle(error):
94 if isinstance(error, FileNotFoundError): 95 if isinstance(error, FileNotFoundError):
95 return None 96 return None
96 raise error 97 raise error
(...skipping 22 matching lines...) Expand all
119 # See the above declaration of |cached_read_values| for more information. 120 # See the above declaration of |cached_read_values| for more information.
120 paths = [path for path in paths 121 paths = [path for path in paths
121 if cached_read_values.get(path, (None, True))[1]] 122 if cached_read_values.get(path, (None, True))[1]]
122 123
123 if len(up_to_date_data) == len(paths): 124 if len(up_to_date_data) == len(paths):
124 # Everything was cached and up-to-date. 125 # Everything was cached and up-to-date.
125 return Future(value=up_to_date_data) 126 return Future(value=up_to_date_data)
126 127
127 def next(new_results): 128 def next(new_results):
128 # Update the cache. This is a path -> (data, version) mapping. 129 # Update the cache. This is a path -> (data, version) mapping.
129 self._read_object_store.SetMulti( 130 self._read_cache.SetMulti(
130 dict((path, (new_result, stat_futures[path].Get().version)) 131 dict((path, (new_result, stat_futures[path].Get().version))
131 for path, new_result in new_results.iteritems())) 132 for path, new_result in new_results.iteritems()))
132 # Update the read cache to include files that weren't found, to prevent 133 # Update the read cache to include files that weren't found, to prevent
133 # constantly trying to read a file we now know doesn't exist. 134 # constantly trying to read a file we now know doesn't exist.
134 self._read_object_store.SetMulti( 135 self._read_cache.SetMulti(
135 dict((path, (None, None)) for path in paths 136 dict((path, (None, None)) for path in paths
136 if stat_futures[path].Get() is None)) 137 if stat_futures[path].Get() is None))
137 new_results.update(up_to_date_data) 138 new_results.update(up_to_date_data)
138 return new_results 139 return new_results
139 # Read in the values that were uncached or old. 140 # Read in the values that were uncached or old.
140 return self._file_system.Read(set(paths) - set(up_to_date_data.iterkeys()), 141 return self._file_system.Read(set(paths) - set(up_to_date_data.iterkeys()),
141 skip_not_found=skip_not_found).Then(next) 142 skip_not_found=skip_not_found).Then(next)
142 143
144 def Walk(self, root, depth=-1):
145 '''Overrides FileSystem.Walk() to provide caching functionality.
146 '''
147 def delegate(root):
not at google - send to devlin 2014/08/29 20:26:07 file_lister?
148 res, root_stat = All((self._walk_cache.Get(root),
149 self.StatAsync(root))).Get()
150
151 if res and res[2] == root_stat.version:
ahernandez 2014/08/29 20:13:41 Sorry I keep flipping around this if condition, I'
not at google - send to devlin 2014/08/29 20:26:07 This looks good.
152 dirs, files = res[0], res[1]
153 else:
154 # Wasn't cached, or not up to date.
155 dirs, files = [], []
156 for f in self.ReadSingle(root).Get():
157 if IsDirectory(f):
158 dirs.append(f)
159 else:
160 files.append(f)
161 # Update the cache. This is a root -> (dirs, files, version) mapping.
162 self._walk_cache.Set(root, (dirs, files, root_stat.version))
163 return dirs, files
164 return self._file_system.Walk(root, depth=depth, file_lister=delegate)
165
143 def GetIdentity(self): 166 def GetIdentity(self):
144 return self._file_system.GetIdentity() 167 return self._file_system.GetIdentity()
145 168
146 def __repr__(self): 169 def __repr__(self):
147 return '%s of <%s>' % (type(self).__name__, repr(self._file_system)) 170 return '%s of <%s>' % (type(self).__name__, repr(self._file_system))
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698