OLD | NEW |
1 # Copyright 2016 The LUCI Authors. All rights reserved. | 1 # Copyright 2016 The LUCI Authors. All rights reserved. |
2 # Use of this source code is governed under the Apache License, Version 2.0 | 2 # Use of this source code is governed under the Apache License, Version 2.0 |
3 # that can be found in the LICENSE file. | 3 # that can be found in the LICENSE file. |
4 | 4 |
5 """This file implements Named Caches.""" | 5 """This file implements Named Caches.""" |
6 | 6 |
7 import contextlib | 7 import contextlib |
8 import logging | 8 import logging |
9 import optparse | 9 import optparse |
10 import os | 10 import os |
(...skipping 10 matching lines...) Expand all Loading... |
21 # Keep synced with task_request.py | 21 # Keep synced with task_request.py |
22 CACHE_NAME_RE = re.compile(ur'^[a-z0-9_]{1,4096}$') | 22 CACHE_NAME_RE = re.compile(ur'^[a-z0-9_]{1,4096}$') |
23 MAX_CACHE_SIZE = 50 | 23 MAX_CACHE_SIZE = 50 |
24 | 24 |
25 | 25 |
26 class Error(Exception): | 26 class Error(Exception): |
27 """Named cache specific error.""" | 27 """Named cache specific error.""" |
28 | 28 |
29 | 29 |
30 class CacheManager(object): | 30 class CacheManager(object): |
31 """Manages cache directories exposed to a task as symlinks. | 31 """Manages cache directories exposed to a task. |
32 | 32 |
33 A task can specify that caches should be present on a bot. A cache is | 33 A task can specify that caches should be present on a bot. A cache is |
34 tuple (name, path), where | 34 tuple (name, path), where |
35 name is a short identifier that describes the contents of the cache, e.g. | 35 name is a short identifier that describes the contents of the cache, e.g. |
36 "git_v8" could be all git repositories required by v8 builds, or | 36 "git_v8" could be all git repositories required by v8 builds, or |
37 "build_chromium" could be build artefacts of the Chromium. | 37 "build_chromium" could be build artefacts of the Chromium. |
38 path is a directory path relative to the task run dir. It will be mapped | 38 path is a directory path relative to the task run dir. Cache installation |
39 to the cache directory persisted on the bot. | 39 puts the requested cache directory at the path. |
40 """ | 40 """ |
41 | 41 |
42 def __init__(self, root_dir): | 42 def __init__(self, root_dir): |
43 """Initializes NamedCaches. | 43 """Initializes NamedCaches. |
44 | 44 |
45 |root_dir| is a directory for persistent cache storage. | 45 |root_dir| is a directory for persistent cache storage. |
46 """ | 46 """ |
| 47 assert isinstance(root_dir, unicode), root_dir |
47 assert file_path.isabs(root_dir), root_dir | 48 assert file_path.isabs(root_dir), root_dir |
48 self.root_dir = unicode(root_dir) | 49 self.root_dir = root_dir |
49 self._lock = threading_utils.LockWithAssert() | 50 self._lock = threading_utils.LockWithAssert() |
50 # LRU {cache_name -> cache_location} | 51 # LRU {cache_name -> cache_location} |
51 # It is saved to |root_dir|/state.json. | 52 # It is saved to |root_dir|/state.json. |
52 self._lru = None | 53 self._lru = None |
53 | 54 |
54 @contextlib.contextmanager | 55 @contextlib.contextmanager |
55 def open(self, time_fn=None): | 56 def open(self, time_fn=None): |
56 """Opens NamedCaches for mutation operations, such as request or trim. | 57 """Opens NamedCaches for mutation operations, such as install. |
57 | 58 |
58 Only on caller can open the cache manager at a time. If the same thread | 59 Only one caller can open the cache manager at a time. If the same thread |
59 calls this function after opening it earlier, the call will deadlock. | 60 calls this function after opening it earlier, the call will deadlock. |
60 | 61 |
61 time_fn is a function that returns timestamp (float) and used to take | 62 time_fn is a function that returns timestamp (float) and used to take |
62 timestamps when new caches are requested. | 63 timestamps when new caches are requested. |
63 | 64 |
64 Returns a context manager that must be closed as soon as possible. | 65 Returns a context manager that must be closed as soon as possible. |
65 """ | 66 """ |
66 with self._lock: | 67 with self._lock: |
67 state_path = os.path.join(self.root_dir, u'state.json') | 68 state_path = os.path.join(self.root_dir, u'state.json') |
68 assert self._lru is None, 'acquired lock, but self._lru is not None' | 69 assert self._lru is None, 'acquired lock, but self._lru is not None' |
(...skipping 10 matching lines...) Expand all Loading... |
79 try: | 80 try: |
80 yield | 81 yield |
81 finally: | 82 finally: |
82 file_path.ensure_tree(self.root_dir) | 83 file_path.ensure_tree(self.root_dir) |
83 self._lru.save(state_path) | 84 self._lru.save(state_path) |
84 self._lru = None | 85 self._lru = None |
85 | 86 |
86 def __len__(self): | 87 def __len__(self): |
87 """Returns number of items in the cache. | 88 """Returns number of items in the cache. |
88 | 89 |
89 Requires NamedCache to be open. | 90 NamedCache must be open. |
90 """ | 91 """ |
91 return len(self._lru) | 92 return len(self._lru) |
92 | 93 |
93 def request(self, name): | |
94 """Returns an absolute path to the directory of the named cache. | |
95 | |
96 Creates a cache directory if it does not exist yet. | |
97 | |
98 Requires NamedCache to be open. | |
99 """ | |
100 self._lock.assert_locked() | |
101 assert isinstance(name, basestring), name | |
102 path = self._lru.get(name) | |
103 create_named_link = False | |
104 if path is None: | |
105 path = self._allocate_dir() | |
106 create_named_link = True | |
107 logging.info('Created %r for %r', path, name) | |
108 abs_path = os.path.join(self.root_dir, path) | |
109 | |
110 # TODO(maruel): That's weird, it should exist already. | |
111 file_path.ensure_tree(abs_path) | |
112 self._lru.add(name, path) | |
113 | |
114 if create_named_link: | |
115 # Create symlink <root_dir>/<named>/<name> -> <root_dir>/<short name> | |
116 # for user convenience. | |
117 named_path = self._get_named_path(name) | |
118 if os.path.exists(named_path): | |
119 file_path.remove(named_path) | |
120 else: | |
121 file_path.ensure_tree(os.path.dirname(named_path)) | |
122 logging.info('Symlink %r to %r', named_path, abs_path) | |
123 fs.symlink(abs_path, named_path) | |
124 | |
125 return abs_path | |
126 | |
127 def get_oldest(self): | 94 def get_oldest(self): |
128 """Returns name of the LRU cache or None. | 95 """Returns name of the LRU cache or None. |
129 | 96 |
130 Requires NamedCache to be open. | 97 NamedCache must be open. |
131 """ | 98 """ |
132 self._lock.assert_locked() | 99 self._lock.assert_locked() |
133 try: | 100 try: |
134 return self._lru.get_oldest()[0] | 101 return self._lru.get_oldest()[0] |
135 except KeyError: | 102 except KeyError: |
136 return None | 103 return None |
137 | 104 |
138 def get_timestamp(self, name): | 105 def get_timestamp(self, name): |
139 """Returns timestamp of last use of an item. | 106 """Returns timestamp of last use of an item. |
140 | 107 |
141 Requires NamedCache to be open. | 108 NamedCache must be open. |
142 | 109 |
143 Raises KeyError if cache is not found. | 110 Raises KeyError if cache is not found. |
144 """ | 111 """ |
145 self._lock.assert_locked() | 112 self._lock.assert_locked() |
146 assert isinstance(name, basestring), name | 113 assert isinstance(name, basestring), name |
147 return self._lru.get_timestamp(name) | 114 return self._lru.get_timestamp(name) |
148 | 115 |
149 @contextlib.contextmanager | 116 @property |
150 def create_symlinks(self, root, named_caches): | 117 def available(self): |
151 """Creates symlinks in |root| for the specified named_caches. | 118 """Returns a set of names of available caches. |
152 | 119 |
153 named_caches must be a list of (name, path) tuples. | 120 NamedCache must be open. |
154 | |
155 Requires NamedCache to be open. | |
156 | |
157 Raises Error if cannot create a symlink. | |
158 """ | 121 """ |
159 self._lock.assert_locked() | 122 self._lock.assert_locked() |
160 for name, path in named_caches: | 123 return self._lru.keys_set() |
161 logging.info('Named cache %r -> %r', name, path) | |
162 try: | |
163 _validate_named_cache_path(path) | |
164 symlink_path = os.path.abspath(os.path.join(root, path)) | |
165 file_path.ensure_tree(os.path.dirname(symlink_path)) | |
166 requested = self.request(name) | |
167 logging.info('Symlink %r to %r', symlink_path, requested) | |
168 fs.symlink(requested, symlink_path) | |
169 except (OSError, Error) as ex: | |
170 raise Error( | |
171 'cannot create a symlink for cache named "%s" at "%s": %s' % ( | |
172 name, symlink_path, ex)) | |
173 | 124 |
174 def delete_symlinks(self, root, named_caches): | 125 def install(self, path, name): |
175 """Deletes symlinks from |root| for the specified named_caches. | 126 """Moves the directory for the specified named cache to |path|. |
176 | 127 |
177 named_caches must be a list of (name, path) tuples. | 128 NamedCache must be open. path must be absolute, unicode and must not exist. |
| 129 |
| 130 Raises Error if cannot install the cache. |
178 """ | 131 """ |
179 for name, path in named_caches: | 132 self._lock.assert_locked() |
180 logging.info('Unlinking named cache "%s"', name) | 133 logging.info('Installing named cache %r to %r', name, path) |
181 try: | 134 try: |
182 _validate_named_cache_path(path) | 135 _check_abs(path) |
183 symlink_path = os.path.abspath(os.path.join(root, path)) | 136 if os.path.isdir(path): |
184 fs.unlink(symlink_path) | 137 raise Error('installation directory %r already exists' % path) |
185 except (OSError, Error) as ex: | 138 |
186 raise Error( | 139 rel_cache = self._lru.get(name) |
187 'cannot unlink cache named "%s" at "%s": %s' % ( | 140 if rel_cache: |
188 name, symlink_path, ex)) | 141 abs_cache = os.path.join(self.root_dir, rel_cache) |
| 142 if os.path.isdir(abs_cache): |
| 143 logging.info('Moving %r to %r', abs_cache, path) |
| 144 file_path.ensure_tree(os.path.dirname(path)) |
| 145 fs.rename(abs_cache, path) |
| 146 self._remove(name) |
| 147 return |
| 148 |
| 149 logging.warning('directory for named cache %r does not exist', name) |
| 150 self._remove(name) |
| 151 |
| 152 # The named cache does not exist, create an empty directory. |
| 153 # When uninstalling, we will move it back to the cache and create an |
| 154 # an entry. |
| 155 file_path.ensure_tree(path) |
| 156 except (OSError, Error) as ex: |
| 157 raise Error( |
| 158 'cannot install cache named %r at %r: %s' % ( |
| 159 name, path, ex)) |
| 160 |
| 161 def uninstall(self, path, name): |
| 162 """Moves the cache directory back. Opposite to install(). |
| 163 |
| 164 NamedCache must be open. path must be absolute and unicode. |
| 165 |
| 166 Raises Error if cannot uninstall the cache. |
| 167 """ |
| 168 logging.info('Uninstalling named cache %r from %r', name, path) |
| 169 try: |
| 170 _check_abs(path) |
| 171 if not os.path.isdir(path): |
| 172 logging.warning( |
| 173 'Directory %r does not exist anymore. Cache lost.', path) |
| 174 return |
| 175 |
| 176 rel_cache = self._lru.get(name) |
| 177 if rel_cache: |
| 178 # Do not crash because cache already exists. |
| 179 logging.warning('overwriting an existing named cache %r', name) |
| 180 create_named_link = False |
| 181 else: |
| 182 rel_cache = self._allocate_dir() |
| 183 create_named_link = True |
| 184 |
| 185 # Move the dir and create an entry for the named cache. |
| 186 abs_cache = os.path.join(self.root_dir, rel_cache) |
| 187 logging.info('Moving %r to %r', path, abs_cache) |
| 188 file_path.ensure_tree(os.path.dirname(abs_cache)) |
| 189 fs.rename(path, abs_cache) |
| 190 self._lru.add(name, rel_cache) |
| 191 |
| 192 if create_named_link: |
| 193 # Create symlink <root_dir>/<named>/<name> -> <root_dir>/<short name> |
| 194 # for user convenience. |
| 195 named_path = self._get_named_path(name) |
| 196 if os.path.exists(named_path): |
| 197 file_path.remove(named_path) |
| 198 else: |
| 199 file_path.ensure_tree(os.path.dirname(named_path)) |
| 200 fs.symlink(abs_cache, named_path) |
| 201 logging.info('Created symlink %r to %r', named_path, abs_cache) |
| 202 except (OSError, Error) as ex: |
| 203 raise Error( |
| 204 'cannot uninstall cache named %r at %r: %s' % ( |
| 205 name, path, ex)) |
189 | 206 |
190 def trim(self, min_free_space): | 207 def trim(self, min_free_space): |
191 """Purges cache. | 208 """Purges cache. |
192 | 209 |
193 Removes cache directories that were not accessed for a long time | 210 Removes cache directories that were not accessed for a long time |
194 until there is enough free space and the number of caches is sane. | 211 until there is enough free space and the number of caches is sane. |
195 | 212 |
196 If min_free_space is None, disk free space is not checked. | 213 If min_free_space is None, disk free space is not checked. |
197 | 214 |
198 Requires NamedCache to be open. | 215 NamedCache must be open. |
199 | 216 |
200 Returns: | 217 Returns: |
201 Number of caches deleted. | 218 Number of caches deleted. |
202 """ | 219 """ |
203 self._lock.assert_locked() | 220 self._lock.assert_locked() |
204 if not os.path.isdir(self.root_dir): | 221 if not os.path.isdir(self.root_dir): |
205 return 0 | 222 return 0 |
206 | 223 |
207 total = 0 | 224 total = 0 |
208 free_space = 0 | 225 free_space = 0 |
209 if min_free_space: | 226 if min_free_space: |
210 free_space = file_path.get_free_space(self.root_dir) | 227 free_space = file_path.get_free_space(self.root_dir) |
211 while ((min_free_space and free_space < min_free_space) | 228 while ((min_free_space and free_space < min_free_space) |
212 or len(self._lru) > MAX_CACHE_SIZE): | 229 or len(self._lru) > MAX_CACHE_SIZE): |
213 logging.info( | 230 logging.info( |
214 'Making space for named cache %s > %s or %s > %s', | 231 'Making space for named cache %d > %d or %d > %d', |
215 free_space, min_free_space, len(self._lru), MAX_CACHE_SIZE) | 232 free_space, min_free_space, len(self._lru), MAX_CACHE_SIZE) |
216 try: | 233 try: |
217 name, (path, _) = self._lru.get_oldest() | 234 name, _ = self._lru.get_oldest() |
218 except KeyError: | 235 except KeyError: |
219 return total | 236 return total |
220 named_dir = self._get_named_path(name) | 237 logging.info('Removing named cache %r', name) |
221 if fs.islink(named_dir): | 238 self._remove(name) |
222 fs.unlink(named_dir) | |
223 path_abs = os.path.join(self.root_dir, path) | |
224 if os.path.isdir(path_abs): | |
225 logging.info('Removing named cache %s', path_abs) | |
226 file_path.rmtree(path_abs) | |
227 if min_free_space: | 239 if min_free_space: |
228 free_space = file_path.get_free_space(self.root_dir) | 240 free_space = file_path.get_free_space(self.root_dir) |
229 self._lru.pop(name) | |
230 total += 1 | 241 total += 1 |
231 return total | 242 return total |
232 | 243 |
233 _DIR_ALPHABET = string.ascii_letters + string.digits | 244 _DIR_ALPHABET = string.ascii_letters + string.digits |
234 | 245 |
235 def _allocate_dir(self): | 246 def _allocate_dir(self): |
236 """Creates and returns relative path of a new cache directory.""" | 247 """Creates and returns relative path of a new cache directory.""" |
237 # We randomly generate directory names that have two lower/upper case | 248 # We randomly generate directory names that have two lower/upper case |
238 # letters or digits. Total number of possibilities is (26*2 + 10)^2 = 3844. | 249 # letters or digits. Total number of possibilities is (26*2 + 10)^2 = 3844. |
239 abc_len = len(self._DIR_ALPHABET) | 250 abc_len = len(self._DIR_ALPHABET) |
240 tried = set() | 251 tried = set() |
241 while len(tried) < 1000: | 252 while len(tried) < 1000: |
242 i = random.randint(0, abc_len * abc_len - 1) | 253 i = random.randint(0, abc_len * abc_len - 1) |
243 rel_path = ( | 254 rel_path = ( |
244 self._DIR_ALPHABET[i / abc_len] + | 255 self._DIR_ALPHABET[i / abc_len] + |
245 self._DIR_ALPHABET[i % abc_len]) | 256 self._DIR_ALPHABET[i % abc_len]) |
246 if rel_path in tried: | 257 if rel_path in tried: |
247 continue | 258 continue |
248 abs_path = os.path.join(self.root_dir, rel_path) | 259 abs_path = os.path.join(self.root_dir, rel_path) |
249 if not fs.exists(abs_path): | 260 if not fs.exists(abs_path): |
250 return rel_path | 261 return rel_path |
251 tried.add(rel_path) | 262 tried.add(rel_path) |
252 raise Error('could not allocate a new cache dir, too many cache dirs') | 263 raise Error('could not allocate a new cache dir, too many cache dirs') |
253 | 264 |
| 265 def _remove(self, name): |
| 266 """Removes a cache directory and entry. |
| 267 |
| 268 NamedCache must be open. |
| 269 |
| 270 Returns: |
| 271 Number of caches deleted. |
| 272 """ |
| 273 self._lock.assert_locked() |
| 274 rel_path = self._lru.get(name) |
| 275 if not rel_path: |
| 276 return |
| 277 |
| 278 named_dir = self._get_named_path(name) |
| 279 if fs.islink(named_dir): |
| 280 fs.unlink(named_dir) |
| 281 |
| 282 abs_path = os.path.join(self.root_dir, rel_path) |
| 283 if os.path.isdir(abs_path): |
| 284 file_path.rmtree(abs_path) |
| 285 self._lru.pop(name) |
| 286 |
254 def _get_named_path(self, name): | 287 def _get_named_path(self, name): |
255 return os.path.join(self.root_dir, 'named', name) | 288 return os.path.join(self.root_dir, 'named', name) |
256 | 289 |
257 | 290 |
258 def add_named_cache_options(parser): | 291 def add_named_cache_options(parser): |
259 group = optparse.OptionGroup(parser, 'Named caches') | 292 group = optparse.OptionGroup(parser, 'Named caches') |
260 group.add_option( | 293 group.add_option( |
261 '--named-cache', | 294 '--named-cache', |
262 dest='named_caches', | 295 dest='named_caches', |
263 action='append', | 296 action='append', |
264 nargs=2, | 297 nargs=2, |
265 default=[], | 298 default=[], |
266 help='A named cache to request. Accepts two arguments, name and path. ' | 299 help='A named cache to request. Accepts two arguments, name and path. ' |
267 'name identifies the cache, must match regex [a-z0-9_]{1,4096}. ' | 300 'name identifies the cache, must match regex [a-z0-9_]{1,4096}. ' |
268 'path is a path relative to the run dir where the cache directory ' | 301 'path is a path relative to the run dir where the cache directory ' |
269 'must be symlinked to. ' | 302 'must be put to. ' |
270 'This option can be specified more than once.') | 303 'This option can be specified more than once.') |
271 group.add_option( | 304 group.add_option( |
272 '--named-cache-root', | 305 '--named-cache-root', |
273 help='Cache root directory. Default=%default') | 306 help='Cache root directory. Default=%default') |
274 parser.add_option_group(group) | 307 parser.add_option_group(group) |
275 | 308 |
276 | 309 |
277 def process_named_cache_options(parser, options): | 310 def process_named_cache_options(parser, options): |
278 """Validates named cache options and returns a CacheManager.""" | 311 """Validates named cache options and returns a CacheManager.""" |
279 if options.named_caches and not options.named_cache_root: | 312 if options.named_caches and not options.named_cache_root: |
280 parser.error('--named-cache is specified, but --named-cache-root is empty') | 313 parser.error('--named-cache is specified, but --named-cache-root is empty') |
281 for name, path in options.named_caches: | 314 for name, path in options.named_caches: |
282 if not CACHE_NAME_RE.match(name): | 315 if not CACHE_NAME_RE.match(name): |
283 parser.error( | 316 parser.error( |
284 'cache name "%s" does not match %s' % (name, CACHE_NAME_RE.pattern)) | 317 'cache name %r does not match %r' % (name, CACHE_NAME_RE.pattern)) |
285 if not path: | 318 if not path: |
286 parser.error('cache path cannot be empty') | 319 parser.error('cache path cannot be empty') |
287 if options.named_cache_root: | 320 if options.named_cache_root: |
288 return CacheManager(os.path.abspath(options.named_cache_root)) | 321 return CacheManager(unicode(os.path.abspath(options.named_cache_root))) |
289 return None | 322 return None |
290 | 323 |
291 | 324 |
292 def _validate_named_cache_path(path): | 325 def _check_abs(path): |
293 if os.path.isabs(path): | 326 if not isinstance(path, unicode): |
294 raise Error('named cache path must not be absolute') | 327 raise Error('named cache installation path must be unicode') |
295 if '..' in path.split(os.path.sep): | 328 if not os.path.isabs(path): |
296 raise Error('named cache path must not contain ".."') | 329 raise Error('named cache installation path must be absolute') |
OLD | NEW |