Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 import posixpath | 5 import posixpath |
| 6 import sys | 6 import sys |
| 7 | 7 |
| 8 from file_system import FileSystem, StatInfo, FileNotFoundError | 8 from file_system import FileSystem, StatInfo, FileNotFoundError |
| 9 from future import Future | 9 from future import All, Future |
| 10 from path_util import IsDirectory, ToDirectory | 10 from path_util import AssertIsDirectory, IsDirectory, ToDirectory |
| 11 from third_party.json_schema_compiler.memoize import memoize | 11 from third_party.json_schema_compiler.memoize import memoize |
| 12 | 12 |
| 13 | 13 |
| 14 class CachingFileSystem(FileSystem): | 14 class CachingFileSystem(FileSystem): |
| 15 '''FileSystem which implements a caching layer on top of |file_system|. It's | 15 '''FileSystem which implements a caching layer on top of |file_system|. It's |
| 16 smart, using Stat() to decided whether to skip Read()ing from |file_system|, | 16 smart, using Stat() to decided whether to skip Read()ing from |file_system|, |
| 17 and only Stat()ing directories never files. | 17 and only Stat()ing directories never files. |
| 18 ''' | 18 ''' |
| 19 def __init__(self, file_system, object_store_creator): | 19 def __init__(self, file_system, object_store_creator): |
| 20 self._file_system = file_system | 20 self._file_system = file_system |
| 21 def create_object_store(category, **optargs): | 21 def create_object_store(category, **optargs): |
| 22 return object_store_creator.Create( | 22 return object_store_creator.Create( |
| 23 CachingFileSystem, | 23 CachingFileSystem, |
| 24 category='%s/%s' % (file_system.GetIdentity(), category), | 24 category='%s/%s' % (file_system.GetIdentity(), category), |
| 25 **optargs) | 25 **optargs) |
| 26 self._stat_object_store = create_object_store('stat') | 26 self._stat_cache = create_object_store('stat') |
| 27 # The read caches can start populated (start_empty=False) because file | 27 # The read caches can start populated (start_empty=False) because file |
| 28 # updates are picked up by the stat, so it doesn't need the force-refresh | 28 # updates are picked up by the stat, so it doesn't need the force-refresh |
| 29 # which starting empty is designed for. Without this optimisation, cron | 29 # which starting empty is designed for. Without this optimisation, cron |
| 30 # runs are extra slow. | 30 # runs are extra slow. |
| 31 self._read_object_store = create_object_store('read', start_empty=False) | 31 self._read_cache = create_object_store('read', start_empty=False) |
| 32 self._walk_cache = create_object_store('walk', start_empty=False) | |
| 32 | 33 |
| 33 def Refresh(self): | 34 def Refresh(self): |
| 34 return self._file_system.Refresh() | 35 return self._file_system.Refresh() |
| 35 | 36 |
| 36 def StatAsync(self, path): | 37 def StatAsync(self, path): |
| 37 '''Stats the directory given, or if a file is given, stats the file's parent | 38 '''Stats the directory given, or if a file is given, stats the file's parent |
| 38 directory to get info about the file. | 39 directory to get info about the file. |
| 39 ''' | 40 ''' |
| 40 # Always stat the parent directory, since it will have the stat of the child | 41 # Always stat the parent directory, since it will have the stat of the child |
| 41 # anyway, and this gives us an entire directory's stat info at once. | 42 # anyway, and this gives us an entire directory's stat info at once. |
| 42 dir_path, file_path = posixpath.split(path) | 43 dir_path, file_path = posixpath.split(path) |
| 43 dir_path = ToDirectory(dir_path) | 44 dir_path = ToDirectory(dir_path) |
| 44 | 45 |
| 45 def make_stat_info(dir_stat): | 46 def make_stat_info(dir_stat): |
| 46 '''Converts a dir stat into the correct resulting StatInfo; if the Stat | 47 '''Converts a dir stat into the correct resulting StatInfo; if the Stat |
| 47 was for a file, the StatInfo should just contain that file. | 48 was for a file, the StatInfo should just contain that file. |
| 48 ''' | 49 ''' |
| 49 if path == dir_path: | 50 if path == dir_path: |
| 50 return dir_stat | 51 return dir_stat |
| 51 # Was a file stat. Extract that file. | 52 # Was a file stat. Extract that file. |
| 52 file_version = dir_stat.child_versions.get(file_path) | 53 file_version = dir_stat.child_versions.get(file_path) |
| 53 if file_version is None: | 54 if file_version is None: |
| 54 raise FileNotFoundError('No stat found for %s in %s (found %s)' % | 55 raise FileNotFoundError('No stat found for %s in %s (found %s)' % |
| 55 (path, dir_path, dir_stat.child_versions)) | 56 (path, dir_path, dir_stat.child_versions)) |
| 56 return StatInfo(file_version) | 57 return StatInfo(file_version) |
| 57 | 58 |
| 58 dir_stat = self._stat_object_store.Get(dir_path).Get() | 59 dir_stat = self._stat_cache.Get(dir_path).Get() |
| 59 if dir_stat is not None: | 60 if dir_stat is not None: |
| 60 return Future(callback=lambda: make_stat_info(dir_stat)) | 61 return Future(callback=lambda: make_stat_info(dir_stat)) |
| 61 | 62 |
| 62 def next(dir_stat): | 63 def next(dir_stat): |
| 63 assert dir_stat is not None # should have raised a FileNotFoundError | 64 assert dir_stat is not None # should have raised a FileNotFoundError |
| 64 # We only ever need to cache the dir stat. | 65 # We only ever need to cache the dir stat. |
| 65 self._stat_object_store.Set(dir_path, dir_stat) | 66 self._stat_cache.Set(dir_path, dir_stat) |
| 66 return make_stat_info(dir_stat) | 67 return make_stat_info(dir_stat) |
| 67 return self._MemoizedStatAsyncFromFileSystem(dir_path).Then(next) | 68 return self._MemoizedStatAsyncFromFileSystem(dir_path).Then(next) |
| 68 | 69 |
| 69 @memoize | 70 @memoize |
| 70 def _MemoizedStatAsyncFromFileSystem(self, dir_path): | 71 def _MemoizedStatAsyncFromFileSystem(self, dir_path): |
| 71 '''This is a simple wrapper to memoize Futures to directory stats, since | 72 '''This is a simple wrapper to memoize Futures to directory stats, since |
| 72 StatAsync makes heavy use of it. Only cache directories so that the | 73 StatAsync makes heavy use of it. Only cache directories so that the |
| 73 memoized cache doesn't blow up. | 74 memoized cache doesn't blow up. |
| 74 ''' | 75 ''' |
| 75 assert IsDirectory(dir_path) | 76 assert IsDirectory(dir_path) |
| 76 return self._file_system.StatAsync(dir_path) | 77 return self._file_system.StatAsync(dir_path) |
| 77 | 78 |
| 78 def Read(self, paths, skip_not_found=False): | 79 def Read(self, paths, skip_not_found=False): |
| 79 '''Reads a list of files. If a file is cached and it is not out of | 80 '''Reads a list of files. If a file is cached and it is not out of |
| 80 date, it is returned. Otherwise, the file is retrieved from the file system. | 81 date, it is returned. Otherwise, the file is retrieved from the file system. |
| 81 ''' | 82 ''' |
| 82 # Files which aren't found are cached in the read object store as | 83 # Files which aren't found are cached in the read object store as |
| 83 # (path, None, None). This is to prevent re-reads of files we know | 84 # (path, None, None). This is to prevent re-reads of files we know |
| 84 # do not exist. | 85 # do not exist. |
| 85 cached_read_values = self._read_object_store.GetMulti(paths).Get() | 86 cached_read_values = self._read_cache.GetMulti(paths).Get() |
| 86 cached_stat_values = self._stat_object_store.GetMulti(paths).Get() | 87 cached_stat_values = self._stat_cache.GetMulti(paths).Get() |
| 87 | 88 |
| 88 # Populate a map of paths to Futures to their stat. They may have already | 89 # Populate a map of paths to Futures to their stat. They may have already |
| 89 # been cached in which case their Future will already have been constructed | 90 # been cached in which case their Future will already have been constructed |
| 90 # with a value. | 91 # with a value. |
| 91 stat_futures = {} | 92 stat_futures = {} |
| 92 | 93 |
| 93 def handle(error): | 94 def handle(error): |
| 94 if isinstance(error, FileNotFoundError): | 95 if isinstance(error, FileNotFoundError): |
| 95 return None | 96 return None |
| 96 raise error | 97 raise error |
| (...skipping 22 matching lines...) Expand all Loading... | |
| 119 # See the above declaration of |cached_read_values| for more information. | 120 # See the above declaration of |cached_read_values| for more information. |
| 120 paths = [path for path in paths | 121 paths = [path for path in paths |
| 121 if cached_read_values.get(path, (None, True))[1]] | 122 if cached_read_values.get(path, (None, True))[1]] |
| 122 | 123 |
| 123 if len(up_to_date_data) == len(paths): | 124 if len(up_to_date_data) == len(paths): |
| 124 # Everything was cached and up-to-date. | 125 # Everything was cached and up-to-date. |
| 125 return Future(value=up_to_date_data) | 126 return Future(value=up_to_date_data) |
| 126 | 127 |
| 127 def next(new_results): | 128 def next(new_results): |
| 128 # Update the cache. This is a path -> (data, version) mapping. | 129 # Update the cache. This is a path -> (data, version) mapping. |
| 129 self._read_object_store.SetMulti( | 130 self._read_cache.SetMulti( |
| 130 dict((path, (new_result, stat_futures[path].Get().version)) | 131 dict((path, (new_result, stat_futures[path].Get().version)) |
| 131 for path, new_result in new_results.iteritems())) | 132 for path, new_result in new_results.iteritems())) |
| 132 # Update the read cache to include files that weren't found, to prevent | 133 # Update the read cache to include files that weren't found, to prevent |
| 133 # constantly trying to read a file we now know doesn't exist. | 134 # constantly trying to read a file we now know doesn't exist. |
| 134 self._read_object_store.SetMulti( | 135 self._read_cache.SetMulti( |
| 135 dict((path, (None, None)) for path in paths | 136 dict((path, (None, None)) for path in paths |
| 136 if stat_futures[path].Get() is None)) | 137 if stat_futures[path].Get() is None)) |
| 137 new_results.update(up_to_date_data) | 138 new_results.update(up_to_date_data) |
| 138 return new_results | 139 return new_results |
| 139 # Read in the values that were uncached or old. | 140 # Read in the values that were uncached or old. |
| 140 return self._file_system.Read(set(paths) - set(up_to_date_data.iterkeys()), | 141 return self._file_system.Read(set(paths) - set(up_to_date_data.iterkeys()), |
| 141 skip_not_found=skip_not_found).Then(next) | 142 skip_not_found=skip_not_found).Then(next) |
| 142 | 143 |
| 144 def Walk(self, root, depth=-1): | |
| 145 '''Overrides FileSystem.Walk() to provide caching functionality. | |
| 146 ''' | |
| 147 def delegate(root): | |
|
not at google - send to devlin
2014/08/29 20:26:07
file_lister?
| |
| 148 res, root_stat = All((self._walk_cache.Get(root), | |
| 149 self.StatAsync(root))).Get() | |
| 150 | |
| 151 if res and res[2] == root_stat.version: | |
|
ahernandez
2014/08/29 20:13:41
Sorry I keep flipping around this if condition, I'
not at google - send to devlin
2014/08/29 20:26:07
This looks good.
| |
| 152 dirs, files = res[0], res[1] | |
| 153 else: | |
| 154 # Wasn't cached, or not up to date. | |
| 155 dirs, files = [], [] | |
| 156 for f in self.ReadSingle(root).Get(): | |
| 157 if IsDirectory(f): | |
| 158 dirs.append(f) | |
| 159 else: | |
| 160 files.append(f) | |
| 161 # Update the cache. This is a root -> (dirs, files, version) mapping. | |
| 162 self._walk_cache.Set(root, (dirs, files, root_stat.version)) | |
| 163 return dirs, files | |
| 164 return self._file_system.Walk(root, depth=depth, file_lister=delegate) | |
| 165 | |
| 143 def GetIdentity(self): | 166 def GetIdentity(self): |
| 144 return self._file_system.GetIdentity() | 167 return self._file_system.GetIdentity() |
| 145 | 168 |
| 146 def __repr__(self): | 169 def __repr__(self): |
| 147 return '%s of <%s>' % (type(self).__name__, repr(self._file_system)) | 170 return '%s of <%s>' % (type(self).__name__, repr(self._file_system)) |
| OLD | NEW |