OLD | NEW |
---|---|
1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 | 4 |
5 import posixpath | 5 import posixpath |
6 import sys | 6 import sys |
7 | 7 |
8 from file_system import FileSystem, StatInfo, FileNotFoundError | 8 from file_system import FileSystem, StatInfo, FileNotFoundError |
9 from future import Future | 9 from future import Future |
10 from path_util import IsDirectory, ToDirectory | 10 from path_util import IsDirectory, ToDirectory |
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
50 return dir_stat | 50 return dir_stat |
51 # Was a file stat. Extract that file. | 51 # Was a file stat. Extract that file. |
52 file_version = dir_stat.child_versions.get(file_path) | 52 file_version = dir_stat.child_versions.get(file_path) |
53 if file_version is None: | 53 if file_version is None: |
54 raise FileNotFoundError('No stat found for %s in %s (found %s)' % | 54 raise FileNotFoundError('No stat found for %s in %s (found %s)' % |
55 (path, dir_path, dir_stat.child_versions)) | 55 (path, dir_path, dir_stat.child_versions)) |
56 return StatInfo(file_version) | 56 return StatInfo(file_version) |
57 | 57 |
58 dir_stat = self._stat_object_store.Get(dir_path).Get() | 58 dir_stat = self._stat_object_store.Get(dir_path).Get() |
59 if dir_stat is not None: | 59 if dir_stat is not None: |
60 return Future(value=make_stat_info(dir_stat)) | 60 return Future(callback=lambda: make_stat_info(dir_stat)) |
61 | 61 |
62 def next(dir_stat): | 62 def next(dir_stat): |
63 assert dir_stat is not None # should have raised a FileNotFoundError | 63 assert dir_stat is not None # should have raised a FileNotFoundError |
64 # We only ever need to cache the dir stat. | 64 # We only ever need to cache the dir stat. |
65 self._stat_object_store.Set(dir_path, dir_stat) | 65 self._stat_object_store.Set(dir_path, dir_stat) |
66 return make_stat_info(dir_stat) | 66 return make_stat_info(dir_stat) |
67 return self._MemoizedStatAsyncFromFileSystem(dir_path).Then(next) | 67 return self._MemoizedStatAsyncFromFileSystem(dir_path).Then(next) |
68 | 68 |
69 @memoize | 69 @memoize |
70 def _MemoizedStatAsyncFromFileSystem(self, dir_path): | 70 def _MemoizedStatAsyncFromFileSystem(self, dir_path): |
71 '''This is a simple wrapper to memoize Futures to directory stats, since | 71 '''This is a simple wrapper to memoize Futures to directory stats, since |
72 StatAsync makes heavy use of it. Only cache directories so that the | 72 StatAsync makes heavy use of it. Only cache directories so that the |
73 memoized cache doesn't blow up. | 73 memoized cache doesn't blow up. |
74 ''' | 74 ''' |
75 assert IsDirectory(dir_path) | 75 assert IsDirectory(dir_path) |
76 return self._file_system.StatAsync(dir_path) | 76 return self._file_system.StatAsync(dir_path) |
77 | 77 |
78 def Read(self, paths, skip_not_found=False): | 78 def Read(self, paths, skip_not_found=False): |
79 '''Reads a list of files. If a file is in memcache and it is not out of | 79 '''Reads a list of files. If a file is in memcache and it is not out of |
not at google - send to devlin
2014/08/26 20:59:34
Could you change "memcache" to just "cached"?
| |
80 date, it is returned. Otherwise, the file is retrieved from the file system. | 80 date, it is returned. Otherwise, the file is retrieved from the file system. |
81 ''' | 81 ''' |
82 cached_read_values = self._read_object_store.GetMulti(paths).Get() | 82 cached_read_values = self._read_object_store.GetMulti(paths).Get() |
83 cached_stat_values = self._stat_object_store.GetMulti(paths).Get() | 83 cached_stat_values = self._stat_object_store.GetMulti(paths).Get() |
84 | 84 |
85 # Populate a map of paths to Futures to their stat. They may have already | 85 # Populate a map of paths to Futures to their stat. They may have already |
86 # been cached in which case their Future will already have been constructed | 86 # been cached in which case their Future will already have been constructed |
87 # with a value. | 87 # with a value. |
88 stat_futures = {} | 88 stat_futures = {} |
89 | 89 |
90 def handle(error): | 90 def handle(error): |
91 if isinstance(error, FileNotFoundError): | 91 if isinstance(error, FileNotFoundError): |
92 return None | 92 return None |
93 raise error | 93 raise error |
94 | 94 |
95 for path in paths: | 95 for path in paths: |
96 stat_value = cached_stat_values.get(path) | 96 stat_value = cached_stat_values.get(path) |
97 if stat_value is None: | 97 if stat_value is None: |
98 stat_future = self.StatAsync(path) | 98 stat_future = self.StatAsync(path) |
99 if skip_not_found: | 99 if skip_not_found: |
100 stat_future = stat_future.Then(lambda x: x, handle) | 100 stat_future = stat_future.Then(lambda x: x, handle) |
101 else: | 101 else: |
102 stat_future = Future(value=stat_value) | 102 stat_future = Future(value=stat_value) |
103 stat_futures[path] = stat_future | 103 stat_futures[path] = stat_future |
104 | 104 |
105 # Filter only the cached data which is fresh by comparing to the latest | 105 # Filter only the cached data which is fresh by comparing to the latest |
106 # stat. The cached read data includes the cached version. Remove it for | 106 # stat. The cached read data includes the cached version. Remove it for |
107 # the result returned to callers. | 107 # the result returned to callers. |
108 fresh_data = dict( | 108 fresh_data = dict( |
not at google - send to devlin
2014/08/26 20:59:34
This "fresh" thing is tripping me up. Maybe it sho
| |
109 (path, data) for path, (data, version) in cached_read_values.iteritems() | 109 (path, data) for path, (data, version) in cached_read_values.iteritems() |
110 if stat_futures[path].Get().version == version) | 110 if version and stat_futures[path].Get().version == version) |
not at google - send to devlin
2014/08/26 20:59:34
I'm paranoid about version being 0 here. Could you
| |
111 | |
112 if skip_not_found: | |
113 # Remove paths for files that don't exist so reads aren't attempted. | |
not at google - send to devlin
2014/08/26 20:59:34
Could you mention why this works? Namely that this
| |
114 paths = [path for path in paths | |
115 if cached_read_values.get(path, (None, True))[1]] | |
not at google - send to devlin
2014/08/26 20:59:34
Could you use 'path in cached_read_values'? This .
ahernandez
2014/08/27 00:07:22
The reason I have the ugly .get() call is because
not at google - send to devlin
2014/08/27 01:23:55
Ah I see. So to be clear: If the item isn't cached
| |
111 | 116 |
112 if len(fresh_data) == len(paths): | 117 if len(fresh_data) == len(paths): |
113 # Everything was cached and up-to-date. | 118 # Everything was cached and up-to-date. |
114 return Future(value=fresh_data) | 119 return Future(value=fresh_data) |
115 | 120 |
116 def next(new_results): | 121 def next(new_results): |
117 # Update the cache. This is a path -> (data, version) mapping. | 122 # Update the cache. This is a path -> (data, version) mapping. |
118 self._read_object_store.SetMulti( | 123 self._read_object_store.SetMulti( |
119 dict((path, (new_result, stat_futures[path].Get().version)) | 124 dict((path, (new_result, stat_futures[path].Get().version)) |
120 for path, new_result in new_results.iteritems())) | 125 for path, new_result in new_results.iteritems())) |
126 # Update the read cache to include files that weren't found, to prevent | |
127 # constantly trying to read a file we now know doesn't exist. | |
not at google - send to devlin
2014/08/26 20:59:34
Relating to comments above - I'm not sure we need
ahernandez
2014/08/27 00:07:22
I think the stat cache doesn't know about non-exis
not at google - send to devlin
2014/08/27 01:23:55
Ohh I see. Ok. What you have is fine.
We should a
ahernandez
2014/08/27 17:33:28
I don't think we should be afraid of comments :)
| |
128 self._read_object_store.SetMulti( | |
129 dict((path, (None, None)) for path in paths | |
130 if stat_futures[path].Get() is None)) | |
121 new_results.update(fresh_data) | 131 new_results.update(fresh_data) |
122 return new_results | 132 return new_results |
123 # Read in the values that were uncached or old. | 133 # Read in the values that were uncached or old. |
124 return self._file_system.Read(set(paths) - set(fresh_data.iterkeys()), | 134 return self._file_system.Read(set(paths) - set(fresh_data.iterkeys()), |
125 skip_not_found=skip_not_found).Then(next) | 135 skip_not_found=skip_not_found).Then(next) |
126 | 136 |
127 def GetIdentity(self): | 137 def GetIdentity(self): |
128 return self._file_system.GetIdentity() | 138 return self._file_system.GetIdentity() |
129 | 139 |
130 def __repr__(self): | 140 def __repr__(self): |
131 return '%s of <%s>' % (type(self).__name__, repr(self._file_system)) | 141 return '%s of <%s>' % (type(self).__name__, repr(self._file_system)) |
OLD | NEW |