Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(143)

Side by Side Diff: chrome/common/extensions/docs/server2/caching_file_system.py

Issue 512453002: Docserver: Add more skip_not_found support and cache "not found"s (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 6 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 import posixpath 5 import posixpath
6 import sys 6 import sys
7 7
8 from file_system import FileSystem, StatInfo, FileNotFoundError 8 from file_system import FileSystem, StatInfo, FileNotFoundError
9 from future import Future 9 from future import Future
10 from path_util import IsDirectory, ToDirectory 10 from path_util import IsDirectory, ToDirectory
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after
50 return dir_stat 50 return dir_stat
51 # Was a file stat. Extract that file. 51 # Was a file stat. Extract that file.
52 file_version = dir_stat.child_versions.get(file_path) 52 file_version = dir_stat.child_versions.get(file_path)
53 if file_version is None: 53 if file_version is None:
54 raise FileNotFoundError('No stat found for %s in %s (found %s)' % 54 raise FileNotFoundError('No stat found for %s in %s (found %s)' %
55 (path, dir_path, dir_stat.child_versions)) 55 (path, dir_path, dir_stat.child_versions))
56 return StatInfo(file_version) 56 return StatInfo(file_version)
57 57
58 dir_stat = self._stat_object_store.Get(dir_path).Get() 58 dir_stat = self._stat_object_store.Get(dir_path).Get()
59 if dir_stat is not None: 59 if dir_stat is not None:
60 return Future(value=make_stat_info(dir_stat)) 60 return Future(callback=lambda: make_stat_info(dir_stat))
61 61
62 def next(dir_stat): 62 def next(dir_stat):
63 assert dir_stat is not None # should have raised a FileNotFoundError 63 assert dir_stat is not None # should have raised a FileNotFoundError
64 # We only ever need to cache the dir stat. 64 # We only ever need to cache the dir stat.
65 self._stat_object_store.Set(dir_path, dir_stat) 65 self._stat_object_store.Set(dir_path, dir_stat)
66 return make_stat_info(dir_stat) 66 return make_stat_info(dir_stat)
67 return self._MemoizedStatAsyncFromFileSystem(dir_path).Then(next) 67 return self._MemoizedStatAsyncFromFileSystem(dir_path).Then(next)
68 68
69 @memoize 69 @memoize
70 def _MemoizedStatAsyncFromFileSystem(self, dir_path): 70 def _MemoizedStatAsyncFromFileSystem(self, dir_path):
71 '''This is a simple wrapper to memoize Futures to directory stats, since 71 '''This is a simple wrapper to memoize Futures to directory stats, since
72 StatAsync makes heavy use of it. Only cache directories so that the 72 StatAsync makes heavy use of it. Only cache directories so that the
73 memoized cache doesn't blow up. 73 memoized cache doesn't blow up.
74 ''' 74 '''
75 assert IsDirectory(dir_path) 75 assert IsDirectory(dir_path)
76 return self._file_system.StatAsync(dir_path) 76 return self._file_system.StatAsync(dir_path)
77 77
78 def Read(self, paths, skip_not_found=False): 78 def Read(self, paths, skip_not_found=False):
79 '''Reads a list of files. If a file is in memcache and it is not out of 79 '''Reads a list of files. If a file is in memcache and it is not out of
not at google - send to devlin 2014/08/26 20:59:34 Could you change "memcache" to just "cached"?
80 date, it is returned. Otherwise, the file is retrieved from the file system. 80 date, it is returned. Otherwise, the file is retrieved from the file system.
81 ''' 81 '''
82 cached_read_values = self._read_object_store.GetMulti(paths).Get() 82 cached_read_values = self._read_object_store.GetMulti(paths).Get()
83 cached_stat_values = self._stat_object_store.GetMulti(paths).Get() 83 cached_stat_values = self._stat_object_store.GetMulti(paths).Get()
84 84
85 # Populate a map of paths to Futures to their stat. They may have already 85 # Populate a map of paths to Futures to their stat. They may have already
86 # been cached in which case their Future will already have been constructed 86 # been cached in which case their Future will already have been constructed
87 # with a value. 87 # with a value.
88 stat_futures = {} 88 stat_futures = {}
89 89
90 def handle(error): 90 def handle(error):
91 if isinstance(error, FileNotFoundError): 91 if isinstance(error, FileNotFoundError):
92 return None 92 return None
93 raise error 93 raise error
94 94
95 for path in paths: 95 for path in paths:
96 stat_value = cached_stat_values.get(path) 96 stat_value = cached_stat_values.get(path)
97 if stat_value is None: 97 if stat_value is None:
98 stat_future = self.StatAsync(path) 98 stat_future = self.StatAsync(path)
99 if skip_not_found: 99 if skip_not_found:
100 stat_future = stat_future.Then(lambda x: x, handle) 100 stat_future = stat_future.Then(lambda x: x, handle)
101 else: 101 else:
102 stat_future = Future(value=stat_value) 102 stat_future = Future(value=stat_value)
103 stat_futures[path] = stat_future 103 stat_futures[path] = stat_future
104 104
105 # Filter only the cached data which is fresh by comparing to the latest 105 # Filter only the cached data which is fresh by comparing to the latest
106 # stat. The cached read data includes the cached version. Remove it for 106 # stat. The cached read data includes the cached version. Remove it for
107 # the result returned to callers. 107 # the result returned to callers.
108 fresh_data = dict( 108 fresh_data = dict(
not at google - send to devlin 2014/08/26 20:59:34 This "fresh" thing is tripping me up. Maybe it sho
109 (path, data) for path, (data, version) in cached_read_values.iteritems() 109 (path, data) for path, (data, version) in cached_read_values.iteritems()
110 if stat_futures[path].Get().version == version) 110 if version and stat_futures[path].Get().version == version)
not at google - send to devlin 2014/08/26 20:59:34 I'm paranoid about version being 0 here. Could you
111
112 if skip_not_found:
113 # Remove paths for files that don't exist so reads aren't attempted.
not at google - send to devlin 2014/08/26 20:59:34 Could you mention why this works? Namely that this
114 paths = [path for path in paths
115 if cached_read_values.get(path, (None, True))[1]]
not at google - send to devlin 2014/08/26 20:59:34 Could you use 'path in cached_read_values'? This .
ahernandez 2014/08/27 00:07:22 The reason I have the ugly .get() call is because
not at google - send to devlin 2014/08/27 01:23:55 Ah I see. So to be clear: If the item isn't cached
111 116
112 if len(fresh_data) == len(paths): 117 if len(fresh_data) == len(paths):
113 # Everything was cached and up-to-date. 118 # Everything was cached and up-to-date.
114 return Future(value=fresh_data) 119 return Future(value=fresh_data)
115 120
116 def next(new_results): 121 def next(new_results):
117 # Update the cache. This is a path -> (data, version) mapping. 122 # Update the cache. This is a path -> (data, version) mapping.
118 self._read_object_store.SetMulti( 123 self._read_object_store.SetMulti(
119 dict((path, (new_result, stat_futures[path].Get().version)) 124 dict((path, (new_result, stat_futures[path].Get().version))
120 for path, new_result in new_results.iteritems())) 125 for path, new_result in new_results.iteritems()))
126 # Update the read cache to include files that weren't found, to prevent
127 # constantly trying to read a file we now know doesn't exist.
not at google - send to devlin 2014/08/26 20:59:34 Relating to comments above - I'm not sure we need
ahernandez 2014/08/27 00:07:22 I think the stat cache doesn't know about non-exis
not at google - send to devlin 2014/08/27 01:23:55 Ohh I see. Ok. What you have is fine. We should a
ahernandez 2014/08/27 17:33:28 I don't think we should be afraid of comments :)
128 self._read_object_store.SetMulti(
129 dict((path, (None, None)) for path in paths
130 if stat_futures[path].Get() is None))
121 new_results.update(fresh_data) 131 new_results.update(fresh_data)
122 return new_results 132 return new_results
123 # Read in the values that were uncached or old. 133 # Read in the values that were uncached or old.
124 return self._file_system.Read(set(paths) - set(fresh_data.iterkeys()), 134 return self._file_system.Read(set(paths) - set(fresh_data.iterkeys()),
125 skip_not_found=skip_not_found).Then(next) 135 skip_not_found=skip_not_found).Then(next)
126 136
127 def GetIdentity(self): 137 def GetIdentity(self):
128 return self._file_system.GetIdentity() 138 return self._file_system.GetIdentity()
129 139
130 def __repr__(self): 140 def __repr__(self):
131 return '%s of <%s>' % (type(self).__name__, repr(self._file_system)) 141 return '%s of <%s>' % (type(self).__name__, repr(self._file_system))
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698