Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(422)

Side by Side Diff: client/named_cache.py

Issue 2877483004: Reland "named caches: move instead of symlinking" (Closed)
Patch Set: remove wrong comment Created 3 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Copyright 2016 The LUCI Authors. All rights reserved. 1 # Copyright 2016 The LUCI Authors. All rights reserved.
2 # Use of this source code is governed under the Apache License, Version 2.0 2 # Use of this source code is governed under the Apache License, Version 2.0
3 # that can be found in the LICENSE file. 3 # that can be found in the LICENSE file.
4 4
5 """This file implements Named Caches.""" 5 """This file implements Named Caches."""
6 6
7 import contextlib 7 import contextlib
8 import logging 8 import logging
9 import optparse 9 import optparse
10 import os 10 import os
(...skipping 10 matching lines...) Expand all
21 # Keep synced with task_request.py 21 # Keep synced with task_request.py
22 CACHE_NAME_RE = re.compile(ur'^[a-z0-9_]{1,4096}$') 22 CACHE_NAME_RE = re.compile(ur'^[a-z0-9_]{1,4096}$')
23 MAX_CACHE_SIZE = 50 23 MAX_CACHE_SIZE = 50
24 24
25 25
26 class Error(Exception): 26 class Error(Exception):
27 """Named cache specific error.""" 27 """Named cache specific error."""
28 28
29 29
30 class CacheManager(object): 30 class CacheManager(object):
31 """Manages cache directories exposed to a task as symlinks. 31 """Manages cache directories exposed to a task.
32 32
33 A task can specify that caches should be present on a bot. A cache is 33 A task can specify that caches should be present on a bot. A cache is
34 tuple (name, path), where 34 tuple (name, path), where
35 name is a short identifier that describes the contents of the cache, e.g. 35 name is a short identifier that describes the contents of the cache, e.g.
36 "git_v8" could be all git repositories required by v8 builds, or 36 "git_v8" could be all git repositories required by v8 builds, or
37 "build_chromium" could be build artefacts of the Chromium. 37 "build_chromium" could be build artefacts of the Chromium.
38 path is a directory path relative to the task run dir. It will be mapped 38 path is a directory path relative to the task run dir. Cache installation
39 to the cache directory persisted on the bot. 39 puts the requested cache directory at the path.
40 """ 40 """
41 41
42 def __init__(self, root_dir): 42 def __init__(self, root_dir):
43 """Initializes NamedCaches. 43 """Initializes NamedCaches.
44 44
45 |root_dir| is a directory for persistent cache storage. 45 |root_dir| is a directory for persistent cache storage.
46 """ 46 """
47 assert isinstance(root_dir, unicode), root_dir
47 assert file_path.isabs(root_dir), root_dir 48 assert file_path.isabs(root_dir), root_dir
48 self.root_dir = unicode(root_dir) 49 self.root_dir = root_dir
49 self._lock = threading_utils.LockWithAssert() 50 self._lock = threading_utils.LockWithAssert()
50 # LRU {cache_name -> cache_location} 51 # LRU {cache_name -> cache_location}
51 # It is saved to |root_dir|/state.json. 52 # It is saved to |root_dir|/state.json.
52 self._lru = None 53 self._lru = None
53 54
54 @contextlib.contextmanager 55 @contextlib.contextmanager
55 def open(self, time_fn=None): 56 def open(self, time_fn=None):
56 """Opens NamedCaches for mutation operations, such as request or trim. 57 """Opens NamedCaches for mutation operations, such as install.
57 58
58 Only on caller can open the cache manager at a time. If the same thread 59 Only one caller can open the cache manager at a time. If the same thread
59 calls this function after opening it earlier, the call will deadlock. 60 calls this function after opening it earlier, the call will deadlock.
60 61
61 time_fn is a function that returns timestamp (float) and used to take 62 time_fn is a function that returns timestamp (float) and used to take
62 timestamps when new caches are requested. 63 timestamps when new caches are requested.
63 64
64 Returns a context manager that must be closed as soon as possible. 65 Returns a context manager that must be closed as soon as possible.
65 """ 66 """
66 with self._lock: 67 with self._lock:
67 state_path = os.path.join(self.root_dir, u'state.json') 68 state_path = os.path.join(self.root_dir, u'state.json')
68 assert self._lru is None, 'acquired lock, but self._lru is not None' 69 assert self._lru is None, 'acquired lock, but self._lru is not None'
(...skipping 10 matching lines...) Expand all
79 try: 80 try:
80 yield 81 yield
81 finally: 82 finally:
82 file_path.ensure_tree(self.root_dir) 83 file_path.ensure_tree(self.root_dir)
83 self._lru.save(state_path) 84 self._lru.save(state_path)
84 self._lru = None 85 self._lru = None
85 86
86 def __len__(self): 87 def __len__(self):
87 """Returns number of items in the cache. 88 """Returns number of items in the cache.
88 89
89 Requires NamedCache to be open. 90 NamedCache must be open.
90 """ 91 """
91 return len(self._lru) 92 return len(self._lru)
92 93
93 def request(self, name):
94 """Returns an absolute path to the directory of the named cache.
95
96 Creates a cache directory if it does not exist yet.
97
98 Requires NamedCache to be open.
99 """
100 self._lock.assert_locked()
101 assert isinstance(name, basestring), name
102 path = self._lru.get(name)
103 create_named_link = False
104 if path is None:
105 path = self._allocate_dir()
106 create_named_link = True
107 logging.info('Created %r for %r', path, name)
108 abs_path = os.path.join(self.root_dir, path)
109
110 # TODO(maruel): That's weird, it should exist already.
111 file_path.ensure_tree(abs_path)
112 self._lru.add(name, path)
113
114 if create_named_link:
115 # Create symlink <root_dir>/<named>/<name> -> <root_dir>/<short name>
116 # for user convenience.
117 named_path = self._get_named_path(name)
118 if os.path.exists(named_path):
119 file_path.remove(named_path)
120 else:
121 file_path.ensure_tree(os.path.dirname(named_path))
122 logging.info('Symlink %r to %r', named_path, abs_path)
123 fs.symlink(abs_path, named_path)
124
125 return abs_path
126
127 def get_oldest(self): 94 def get_oldest(self):
128 """Returns name of the LRU cache or None. 95 """Returns name of the LRU cache or None.
129 96
130 Requires NamedCache to be open. 97 NamedCache must be open.
131 """ 98 """
132 self._lock.assert_locked() 99 self._lock.assert_locked()
133 try: 100 try:
134 return self._lru.get_oldest()[0] 101 return self._lru.get_oldest()[0]
135 except KeyError: 102 except KeyError:
136 return None 103 return None
137 104
138 def get_timestamp(self, name): 105 def get_timestamp(self, name):
139 """Returns timestamp of last use of an item. 106 """Returns timestamp of last use of an item.
140 107
141 Requires NamedCache to be open. 108 NamedCache must be open.
142 109
143 Raises KeyError if cache is not found. 110 Raises KeyError if cache is not found.
144 """ 111 """
145 self._lock.assert_locked() 112 self._lock.assert_locked()
146 assert isinstance(name, basestring), name 113 assert isinstance(name, basestring), name
147 return self._lru.get_timestamp(name) 114 return self._lru.get_timestamp(name)
148 115
149 @contextlib.contextmanager 116 @property
150 def create_symlinks(self, root, named_caches): 117 def available(self):
151 """Creates symlinks in |root| for the specified named_caches. 118 """Returns a set of names of available caches.
152 119
153 named_caches must be a list of (name, path) tuples. 120 NamedCache must be open.
154
155 Requires NamedCache to be open.
156
157 Raises Error if cannot create a symlink.
158 """ 121 """
159 self._lock.assert_locked() 122 self._lock.assert_locked()
160 for name, path in named_caches: 123 return self._lru.keys_set()
161 logging.info('Named cache %r -> %r', name, path)
162 try:
163 _validate_named_cache_path(path)
164 symlink_path = os.path.abspath(os.path.join(root, path))
165 file_path.ensure_tree(os.path.dirname(symlink_path))
166 requested = self.request(name)
167 logging.info('Symlink %r to %r', symlink_path, requested)
168 fs.symlink(requested, symlink_path)
169 except (OSError, Error) as ex:
170 raise Error(
171 'cannot create a symlink for cache named "%s" at "%s": %s' % (
172 name, symlink_path, ex))
173 124
174 def delete_symlinks(self, root, named_caches): 125 def install(self, path, name):
175 """Deletes symlinks from |root| for the specified named_caches. 126 """Moves the directory for the specified named cache to |path|.
176 127
177 named_caches must be a list of (name, path) tuples. 128 NamedCache must be open. path must be absolute, unicode and must not exist.
129
130 Raises Error if cannot install the cache.
178 """ 131 """
179 for name, path in named_caches: 132 self._lock.assert_locked()
180 logging.info('Unlinking named cache "%s"', name) 133 logging.info('Installing named cache %r to %r', name, path)
181 try: 134 try:
182 _validate_named_cache_path(path) 135 _check_abs(path)
183 symlink_path = os.path.abspath(os.path.join(root, path)) 136 if os.path.isdir(path):
184 fs.unlink(symlink_path) 137 raise Error('installation directory %r already exists' % path)
185 except (OSError, Error) as ex: 138
186 raise Error( 139 rel_cache = self._lru.get(name)
187 'cannot unlink cache named "%s" at "%s": %s' % ( 140 if rel_cache:
188 name, symlink_path, ex)) 141 abs_cache = os.path.join(self.root_dir, rel_cache)
142 if os.path.isdir(abs_cache):
143 logging.info('Moving %r to %r', abs_cache, path)
144 file_path.ensure_tree(os.path.dirname(path))
145 fs.rename(abs_cache, path)
146 self._remove(name)
147 return
148
149 logging.warning('directory for named cache %r does not exist', name)
150 self._remove(name)
151
152 # The named cache does not exist, create an empty directory.
153 # When uninstalling, we will move it back to the cache and create an
154 # an entry.
155 file_path.ensure_tree(path)
156 except (OSError, Error) as ex:
157 raise Error(
158 'cannot install cache named %r at %r: %s' % (
159 name, path, ex))
160
161 def uninstall(self, path, name):
162 """Moves the cache directory back. Opposite to install().
163
164 NamedCache must be open. path must be absolute and unicode.
165
166 Raises Error if cannot uninstall the cache.
167 """
168 logging.info('Uninstalling named cache %r from %r', name, path)
169 try:
170 _check_abs(path)
171 if not os.path.isdir(path):
172 logging.warning(
173 'Directory %r does not exist anymore. Cache lost.', path)
174 return
175
176 rel_cache = self._lru.get(name)
177 if rel_cache:
178 # Do not crash because cache already exists.
179 logging.warning('overwriting an existing named cache %r', name)
180 create_named_link = False
181 else:
182 rel_cache = self._allocate_dir()
183 create_named_link = True
184
185 # Move the dir and create an entry for the named cache.
186 abs_cache = os.path.join(self.root_dir, rel_cache)
187 logging.info('Moving %r to %r', path, abs_cache)
188 file_path.ensure_tree(os.path.dirname(abs_cache))
189 fs.rename(path, abs_cache)
190 self._lru.add(name, rel_cache)
191
192 if create_named_link:
193 # Create symlink <root_dir>/<named>/<name> -> <root_dir>/<short name>
194 # for user convenience.
195 named_path = self._get_named_path(name)
196 if os.path.exists(named_path):
197 file_path.remove(named_path)
198 else:
199 file_path.ensure_tree(os.path.dirname(named_path))
200 fs.symlink(abs_cache, named_path)
201 logging.info('Created symlink %r to %r', named_path, abs_cache)
202 except (OSError, Error) as ex:
203 raise Error(
204 'cannot uninstall cache named %r at %r: %s' % (
205 name, path, ex))
189 206
190 def trim(self, min_free_space): 207 def trim(self, min_free_space):
191 """Purges cache. 208 """Purges cache.
192 209
193 Removes cache directories that were not accessed for a long time 210 Removes cache directories that were not accessed for a long time
194 until there is enough free space and the number of caches is sane. 211 until there is enough free space and the number of caches is sane.
195 212
196 If min_free_space is None, disk free space is not checked. 213 If min_free_space is None, disk free space is not checked.
197 214
198 Requires NamedCache to be open. 215 NamedCache must be open.
199 216
200 Returns: 217 Returns:
201 Number of caches deleted. 218 Number of caches deleted.
202 """ 219 """
203 self._lock.assert_locked() 220 self._lock.assert_locked()
204 if not os.path.isdir(self.root_dir): 221 if not os.path.isdir(self.root_dir):
205 return 0 222 return 0
206 223
207 total = 0 224 total = 0
208 free_space = 0 225 free_space = 0
209 if min_free_space: 226 if min_free_space:
210 free_space = file_path.get_free_space(self.root_dir) 227 free_space = file_path.get_free_space(self.root_dir)
211 while ((min_free_space and free_space < min_free_space) 228 while ((min_free_space and free_space < min_free_space)
212 or len(self._lru) > MAX_CACHE_SIZE): 229 or len(self._lru) > MAX_CACHE_SIZE):
213 logging.info( 230 logging.info(
214 'Making space for named cache %s > %s or %s > %s', 231 'Making space for named cache %d > %d or %d > %d',
215 free_space, min_free_space, len(self._lru), MAX_CACHE_SIZE) 232 free_space, min_free_space, len(self._lru), MAX_CACHE_SIZE)
216 try: 233 try:
217 name, (path, _) = self._lru.get_oldest() 234 name, _ = self._lru.get_oldest()
218 except KeyError: 235 except KeyError:
219 return total 236 return total
220 named_dir = self._get_named_path(name) 237 logging.info('Removing named cache %r', name)
221 if fs.islink(named_dir): 238 self._remove(name)
222 fs.unlink(named_dir)
223 path_abs = os.path.join(self.root_dir, path)
224 if os.path.isdir(path_abs):
225 logging.info('Removing named cache %s', path_abs)
226 file_path.rmtree(path_abs)
227 if min_free_space: 239 if min_free_space:
228 free_space = file_path.get_free_space(self.root_dir) 240 free_space = file_path.get_free_space(self.root_dir)
229 self._lru.pop(name)
230 total += 1 241 total += 1
231 return total 242 return total
232 243
233 _DIR_ALPHABET = string.ascii_letters + string.digits 244 _DIR_ALPHABET = string.ascii_letters + string.digits
234 245
235 def _allocate_dir(self): 246 def _allocate_dir(self):
236 """Creates and returns relative path of a new cache directory.""" 247 """Creates and returns relative path of a new cache directory."""
237 # We randomly generate directory names that have two lower/upper case 248 # We randomly generate directory names that have two lower/upper case
238 # letters or digits. Total number of possibilities is (26*2 + 10)^2 = 3844. 249 # letters or digits. Total number of possibilities is (26*2 + 10)^2 = 3844.
239 abc_len = len(self._DIR_ALPHABET) 250 abc_len = len(self._DIR_ALPHABET)
240 tried = set() 251 tried = set()
241 while len(tried) < 1000: 252 while len(tried) < 1000:
242 i = random.randint(0, abc_len * abc_len - 1) 253 i = random.randint(0, abc_len * abc_len - 1)
243 rel_path = ( 254 rel_path = (
244 self._DIR_ALPHABET[i / abc_len] + 255 self._DIR_ALPHABET[i / abc_len] +
245 self._DIR_ALPHABET[i % abc_len]) 256 self._DIR_ALPHABET[i % abc_len])
246 if rel_path in tried: 257 if rel_path in tried:
247 continue 258 continue
248 abs_path = os.path.join(self.root_dir, rel_path) 259 abs_path = os.path.join(self.root_dir, rel_path)
249 if not fs.exists(abs_path): 260 if not fs.exists(abs_path):
250 return rel_path 261 return rel_path
251 tried.add(rel_path) 262 tried.add(rel_path)
252 raise Error('could not allocate a new cache dir, too many cache dirs') 263 raise Error('could not allocate a new cache dir, too many cache dirs')
253 264
265 def _remove(self, name):
266 """Removes a cache directory and entry.
267
268 NamedCache must be open.
269
270 Returns:
271 Number of caches deleted.
272 """
273 self._lock.assert_locked()
274 rel_path = self._lru.get(name)
275 if not rel_path:
276 return
277
278 named_dir = self._get_named_path(name)
279 if fs.islink(named_dir):
280 fs.unlink(named_dir)
281
282 abs_path = os.path.join(self.root_dir, rel_path)
283 if os.path.isdir(abs_path):
284 file_path.rmtree(abs_path)
285 self._lru.pop(name)
286
254 def _get_named_path(self, name): 287 def _get_named_path(self, name):
255 return os.path.join(self.root_dir, 'named', name) 288 return os.path.join(self.root_dir, 'named', name)
256 289
257 290
258 def add_named_cache_options(parser): 291 def add_named_cache_options(parser):
259 group = optparse.OptionGroup(parser, 'Named caches') 292 group = optparse.OptionGroup(parser, 'Named caches')
260 group.add_option( 293 group.add_option(
261 '--named-cache', 294 '--named-cache',
262 dest='named_caches', 295 dest='named_caches',
263 action='append', 296 action='append',
264 nargs=2, 297 nargs=2,
265 default=[], 298 default=[],
266 help='A named cache to request. Accepts two arguments, name and path. ' 299 help='A named cache to request. Accepts two arguments, name and path. '
267 'name identifies the cache, must match regex [a-z0-9_]{1,4096}. ' 300 'name identifies the cache, must match regex [a-z0-9_]{1,4096}. '
268 'path is a path relative to the run dir where the cache directory ' 301 'path is a path relative to the run dir where the cache directory '
269 'must be symlinked to. ' 302 'must be put to. '
270 'This option can be specified more than once.') 303 'This option can be specified more than once.')
271 group.add_option( 304 group.add_option(
272 '--named-cache-root', 305 '--named-cache-root',
273 help='Cache root directory. Default=%default') 306 help='Cache root directory. Default=%default')
274 parser.add_option_group(group) 307 parser.add_option_group(group)
275 308
276 309
277 def process_named_cache_options(parser, options): 310 def process_named_cache_options(parser, options):
278 """Validates named cache options and returns a CacheManager.""" 311 """Validates named cache options and returns a CacheManager."""
279 if options.named_caches and not options.named_cache_root: 312 if options.named_caches and not options.named_cache_root:
280 parser.error('--named-cache is specified, but --named-cache-root is empty') 313 parser.error('--named-cache is specified, but --named-cache-root is empty')
281 for name, path in options.named_caches: 314 for name, path in options.named_caches:
282 if not CACHE_NAME_RE.match(name): 315 if not CACHE_NAME_RE.match(name):
283 parser.error( 316 parser.error(
284 'cache name "%s" does not match %s' % (name, CACHE_NAME_RE.pattern)) 317 'cache name %r does not match %r' % (name, CACHE_NAME_RE.pattern))
285 if not path: 318 if not path:
286 parser.error('cache path cannot be empty') 319 parser.error('cache path cannot be empty')
287 if options.named_cache_root: 320 if options.named_cache_root:
288 return CacheManager(os.path.abspath(options.named_cache_root)) 321 return CacheManager(unicode(os.path.abspath(options.named_cache_root)))
289 return None 322 return None
290 323
291 324
292 def _validate_named_cache_path(path): 325 def _check_abs(path):
293 if os.path.isabs(path): 326 if not isinstance(path, unicode):
294 raise Error('named cache path must not be absolute') 327 raise Error('named cache installation path must be unicode')
295 if '..' in path.split(os.path.sep): 328 if not os.path.isabs(path):
296 raise Error('named cache path must not contain ".."') 329 raise Error('named cache installation path must be absolute')
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698