Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(73)

Side by Side Diff: client/named_cache.py

Issue 2875113002: Revert "named caches: move instead of symlinking" (Closed)
Patch Set: Created 3 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | client/run_isolated.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2016 The LUCI Authors. All rights reserved. 1 # Copyright 2016 The LUCI Authors. All rights reserved.
2 # Use of this source code is governed under the Apache License, Version 2.0 2 # Use of this source code is governed under the Apache License, Version 2.0
3 # that can be found in the LICENSE file. 3 # that can be found in the LICENSE file.
4 4
5 """This file implements Named Caches.""" 5 """This file implements Named Caches."""
6 6
7 import contextlib 7 import contextlib
8 import logging 8 import logging
9 import optparse 9 import optparse
10 import os 10 import os
(...skipping 10 matching lines...) Expand all
21 # Keep synced with task_request.py 21 # Keep synced with task_request.py
22 CACHE_NAME_RE = re.compile(ur'^[a-z0-9_]{1,4096}$') 22 CACHE_NAME_RE = re.compile(ur'^[a-z0-9_]{1,4096}$')
23 MAX_CACHE_SIZE = 50 23 MAX_CACHE_SIZE = 50
24 24
25 25
26 class Error(Exception): 26 class Error(Exception):
27 """Named cache specific error.""" 27 """Named cache specific error."""
28 28
29 29
30 class CacheManager(object): 30 class CacheManager(object):
31 """Manages cache directories exposed to a task. 31 """Manages cache directories exposed to a task as symlinks.
32 32
33 A task can specify that caches should be present on a bot. A cache is 33 A task can specify that caches should be present on a bot. A cache is
34 tuple (name, path), where 34 tuple (name, path), where
35 name is a short identifier that describes the contents of the cache, e.g. 35 name is a short identifier that describes the contents of the cache, e.g.
36 "git_v8" could be all git repositories required by v8 builds, or 36 "git_v8" could be all git repositories required by v8 builds, or
37 "build_chromium" could be build artefacts of the Chromium. 37 "build_chromium" could be build artefacts of the Chromium.
38 path is a directory path relative to the task run dir. Cache installation 38 path is a directory path relative to the task run dir. It will be mapped
39 puts the requested cache directory at the path. 39 to the cache directory persisted on the bot.
40 """ 40 """
41 41
42 def __init__(self, root_dir): 42 def __init__(self, root_dir):
43 """Initializes NamedCaches. 43 """Initializes NamedCaches.
44 44
45 |root_dir| is a directory for persistent cache storage. 45 |root_dir| is a directory for persistent cache storage.
46 """ 46 """
47 assert isinstance(root_dir, unicode), root_dir
48 assert file_path.isabs(root_dir), root_dir 47 assert file_path.isabs(root_dir), root_dir
49 self.root_dir = root_dir 48 self.root_dir = unicode(root_dir)
50 self._lock = threading_utils.LockWithAssert() 49 self._lock = threading_utils.LockWithAssert()
51 # LRU {cache_name -> cache_location} 50 # LRU {cache_name -> cache_location}
52 # It is saved to |root_dir|/state.json. 51 # It is saved to |root_dir|/state.json.
53 self._lru = None 52 self._lru = None
54 53
55 @contextlib.contextmanager 54 @contextlib.contextmanager
56 def open(self, time_fn=None): 55 def open(self, time_fn=None):
57 """Opens NamedCaches for mutation operations, such as install. 56 """Opens NamedCaches for mutation operations, such as request or trim.
58 57
59 Only one caller can open the cache manager at a time. If the same thread 58 Only on caller can open the cache manager at a time. If the same thread
60 calls this function after opening it earlier, the call will deadlock. 59 calls this function after opening it earlier, the call will deadlock.
61 60
62 time_fn is a function that returns timestamp (float) and used to take 61 time_fn is a function that returns timestamp (float) and used to take
63 timestamps when new caches are requested. 62 timestamps when new caches are requested.
64 63
65 Returns a context manager that must be closed as soon as possible. 64 Returns a context manager that must be closed as soon as possible.
66 """ 65 """
67 with self._lock: 66 with self._lock:
68 state_path = os.path.join(self.root_dir, u'state.json') 67 state_path = os.path.join(self.root_dir, u'state.json')
69 assert self._lru is None, 'acquired lock, but self._lru is not None' 68 assert self._lru is None, 'acquired lock, but self._lru is not None'
(...skipping 10 matching lines...) Expand all
80 try: 79 try:
81 yield 80 yield
82 finally: 81 finally:
83 file_path.ensure_tree(self.root_dir) 82 file_path.ensure_tree(self.root_dir)
84 self._lru.save(state_path) 83 self._lru.save(state_path)
85 self._lru = None 84 self._lru = None
86 85
87 def __len__(self): 86 def __len__(self):
88 """Returns number of items in the cache. 87 """Returns number of items in the cache.
89 88
90 NamedCache must be open. 89 Requires NamedCache to be open.
91 """ 90 """
92 return len(self._lru) 91 return len(self._lru)
93 92
93 def request(self, name):
94 """Returns an absolute path to the directory of the named cache.
95
96 Creates a cache directory if it does not exist yet.
97
98 Requires NamedCache to be open.
99 """
100 self._lock.assert_locked()
101 assert isinstance(name, basestring), name
102 path = self._lru.get(name)
103 create_named_link = False
104 if path is None:
105 path = self._allocate_dir()
106 create_named_link = True
107 logging.info('Created %r for %r', path, name)
108 abs_path = os.path.join(self.root_dir, path)
109
110 # TODO(maruel): That's weird, it should exist already.
111 file_path.ensure_tree(abs_path)
112 self._lru.add(name, path)
113
114 if create_named_link:
115 # Create symlink <root_dir>/<named>/<name> -> <root_dir>/<short name>
116 # for user convenience.
117 named_path = self._get_named_path(name)
118 if os.path.exists(named_path):
119 file_path.remove(named_path)
120 else:
121 file_path.ensure_tree(os.path.dirname(named_path))
122 logging.info('Symlink %r to %r', named_path, abs_path)
123 fs.symlink(abs_path, named_path)
124
125 return abs_path
126
94 def get_oldest(self): 127 def get_oldest(self):
95 """Returns name of the LRU cache or None. 128 """Returns name of the LRU cache or None.
96 129
97 NamedCache must be open. 130 Requires NamedCache to be open.
98 """ 131 """
99 self._lock.assert_locked() 132 self._lock.assert_locked()
100 try: 133 try:
101 return self._lru.get_oldest()[0] 134 return self._lru.get_oldest()[0]
102 except KeyError: 135 except KeyError:
103 return None 136 return None
104 137
105 def get_timestamp(self, name): 138 def get_timestamp(self, name):
106 """Returns timestamp of last use of an item. 139 """Returns timestamp of last use of an item.
107 140
108 NamedCache must be open. 141 Requires NamedCache to be open.
109 142
110 Raises KeyError if cache is not found. 143 Raises KeyError if cache is not found.
111 """ 144 """
112 self._lock.assert_locked() 145 self._lock.assert_locked()
113 assert isinstance(name, basestring), name 146 assert isinstance(name, basestring), name
114 return self._lru.get_timestamp(name) 147 return self._lru.get_timestamp(name)
115 148
116 @property 149 @contextlib.contextmanager
117 def available(self): 150 def create_symlinks(self, root, named_caches):
118 """Returns a set of names of available caches. 151 """Creates symlinks in |root| for the specified named_caches.
119 152
120 NamedCache must be open. 153 named_caches must be a list of (name, path) tuples.
154
155 Requires NamedCache to be open.
156
157 Raises Error if cannot create a symlink.
121 """ 158 """
122 self._lock.assert_locked() 159 self._lock.assert_locked()
123 return self._lru.keys_set() 160 for name, path in named_caches:
161 logging.info('Named cache %r -> %r', name, path)
162 try:
163 _validate_named_cache_path(path)
164 symlink_path = os.path.abspath(os.path.join(root, path))
165 file_path.ensure_tree(os.path.dirname(symlink_path))
166 requested = self.request(name)
167 logging.info('Symlink %r to %r', symlink_path, requested)
168 fs.symlink(requested, symlink_path)
169 except (OSError, Error) as ex:
170 raise Error(
171 'cannot create a symlink for cache named "%s" at "%s": %s' % (
172 name, symlink_path, ex))
124 173
125 def install(self, path, name): 174 def delete_symlinks(self, root, named_caches):
126 """Moves the directory for the specified named cache to |path|. 175 """Deletes symlinks from |root| for the specified named_caches.
127 176
128 NamedCache must be open. path must be absolute, unicode and must not exist. 177 named_caches must be a list of (name, path) tuples.
129
130 Raises Error if cannot install the cache.
131 """ 178 """
132 self._lock.assert_locked() 179 for name, path in named_caches:
133 logging.info('Installing named cache %r to %r', name, path) 180 logging.info('Unlinking named cache "%s"', name)
134 try: 181 try:
135 _check_abs(path) 182 _validate_named_cache_path(path)
136 if os.path.isdir(path): 183 symlink_path = os.path.abspath(os.path.join(root, path))
137 raise Error('installation directory %r already exists' % path) 184 fs.unlink(symlink_path)
138 185 except (OSError, Error) as ex:
139 rel_cache = self._lru.get(name) 186 raise Error(
140 if rel_cache: 187 'cannot unlink cache named "%s" at "%s": %s' % (
141 abs_cache = os.path.join(self.root_dir, rel_cache) 188 name, symlink_path, ex))
142 if os.path.isdir(abs_cache):
143 logging.info('Moving %r to %r', abs_cache, path)
144 file_path.ensure_tree(os.path.dirname(path))
145 fs.rename(abs_cache, path)
146 self._remove(name)
147 return
148
149 logging.warning('directory for named cache %r does not exist', name)
150 self._remove(name)
151
152 # The named cache does not exist, create an empty directory.
153 # When uninstalling, we will move it back to the cache and create an
154 # an entry.
155 file_path.ensure_tree(path)
156 except (OSError, Error) as ex:
157 raise Error(
158 'cannot install cache named %r at %r: %s' % (
159 name, path, ex))
160
161 def uninstall(self, path, name):
162 """Moves the cache directory back. Opposite to install().
163
164 NamedCache must be open. path must be absolute and unicode.
165
166 Raises Error if cannot uninstall the cache.
167 """
168 logging.info('Uninstalling named cache %r from %r', name, path)
169 try:
170 _check_abs(path)
171 if not os.path.isdir(path):
172 logging.warning(
173 'Directory %r does not exist anymore. Cache lost.', path)
174 return
175
176 rel_cache = self._lru.get(name)
177 if rel_cache:
178 # Do not crash because cache already exists.
179 logging.warning('overwriting an existing named cache %r', name)
180 create_named_link = False
181 else:
182 rel_cache = self._allocate_dir()
183 create_named_link = True
184
185 # Move the dir and create an entry for the named cache.
186 abs_cache = os.path.join(self.root_dir, rel_cache)
187 logging.info('Moving %r to %r', path, abs_cache)
188 file_path.ensure_tree(os.path.dirname(abs_cache))
189 fs.rename(path, abs_cache)
190 self._lru.add(name, rel_cache)
191
192 if create_named_link:
193 # Create symlink <root_dir>/<named>/<name> -> <root_dir>/<short name>
194 # for user convenience.
195 named_path = self._get_named_path(name)
196 if os.path.exists(named_path):
197 file_path.remove(named_path)
198 else:
199 file_path.ensure_tree(os.path.dirname(named_path))
200 fs.symlink(abs_cache, named_path)
201 logging.info('Created symlink %r to %r', named_path, abs_cache)
202 except (OSError, Error) as ex:
203 raise Error(
204 'cannot uninstall cache named %r at %r: %s' % (
205 name, path, ex))
206 189
207 def trim(self, min_free_space): 190 def trim(self, min_free_space):
208 """Purges cache. 191 """Purges cache.
209 192
210 Removes cache directories that were not accessed for a long time 193 Removes cache directories that were not accessed for a long time
211 until there is enough free space and the number of caches is sane. 194 until there is enough free space and the number of caches is sane.
212 195
213 If min_free_space is None, disk free space is not checked. 196 If min_free_space is None, disk free space is not checked.
214 197
215 NamedCache must be open. 198 Requires NamedCache to be open.
216 199
217 Returns: 200 Returns:
218 Number of caches deleted. 201 Number of caches deleted.
219 """ 202 """
220 self._lock.assert_locked() 203 self._lock.assert_locked()
221 if not os.path.isdir(self.root_dir): 204 if not os.path.isdir(self.root_dir):
222 return 0 205 return 0
223 206
224 total = 0 207 total = 0
225 free_space = 0 208 free_space = 0
226 if min_free_space: 209 if min_free_space:
227 free_space = file_path.get_free_space(self.root_dir) 210 free_space = file_path.get_free_space(self.root_dir)
228 while ((min_free_space and free_space < min_free_space) 211 while ((min_free_space and free_space < min_free_space)
229 or len(self._lru) > MAX_CACHE_SIZE): 212 or len(self._lru) > MAX_CACHE_SIZE):
230 logging.info( 213 logging.info(
231 'Making space for named cache %d > %d or %d > %d', 214 'Making space for named cache %s > %s or %s > %s',
232 free_space, min_free_space, len(self._lru), MAX_CACHE_SIZE) 215 free_space, min_free_space, len(self._lru), MAX_CACHE_SIZE)
233 try: 216 try:
234 name, _ = self._lru.get_oldest() 217 name, (path, _) = self._lru.get_oldest()
235 except KeyError: 218 except KeyError:
236 return total 219 return total
237 logging.info('Removing named cache %r', name) 220 named_dir = self._get_named_path(name)
238 self._remove(name) 221 if fs.islink(named_dir):
222 fs.unlink(named_dir)
223 path_abs = os.path.join(self.root_dir, path)
224 if os.path.isdir(path_abs):
225 logging.info('Removing named cache %s', path_abs)
226 file_path.rmtree(path_abs)
239 if min_free_space: 227 if min_free_space:
240 free_space = file_path.get_free_space(self.root_dir) 228 free_space = file_path.get_free_space(self.root_dir)
229 self._lru.pop(name)
241 total += 1 230 total += 1
242 return total 231 return total
243 232
244 _DIR_ALPHABET = string.ascii_letters + string.digits 233 _DIR_ALPHABET = string.ascii_letters + string.digits
245 234
246 def _allocate_dir(self): 235 def _allocate_dir(self):
247 """Creates and returns relative path of a new cache directory.""" 236 """Creates and returns relative path of a new cache directory."""
248 # We randomly generate directory names that have two lower/upper case 237 # We randomly generate directory names that have two lower/upper case
249 # letters or digits. Total number of possibilities is (26*2 + 10)^2 = 3844. 238 # letters or digits. Total number of possibilities is (26*2 + 10)^2 = 3844.
250 abc_len = len(self._DIR_ALPHABET) 239 abc_len = len(self._DIR_ALPHABET)
251 tried = set() 240 tried = set()
252 while len(tried) < 1000: 241 while len(tried) < 1000:
253 i = random.randint(0, abc_len * abc_len - 1) 242 i = random.randint(0, abc_len * abc_len - 1)
254 rel_path = ( 243 rel_path = (
255 self._DIR_ALPHABET[i / abc_len] + 244 self._DIR_ALPHABET[i / abc_len] +
256 self._DIR_ALPHABET[i % abc_len]) 245 self._DIR_ALPHABET[i % abc_len])
257 if rel_path in tried: 246 if rel_path in tried:
258 continue 247 continue
259 abs_path = os.path.join(self.root_dir, rel_path) 248 abs_path = os.path.join(self.root_dir, rel_path)
260 if not fs.exists(abs_path): 249 if not fs.exists(abs_path):
261 return rel_path 250 return rel_path
262 tried.add(rel_path) 251 tried.add(rel_path)
263 raise Error('could not allocate a new cache dir, too many cache dirs') 252 raise Error('could not allocate a new cache dir, too many cache dirs')
264 253
265 def _remove(self, name):
266 """Removes a cache directory and entry.
267
268 NamedCache must be open.
269
270 Returns:
271 Number of caches deleted.
272 """
273 self._lock.assert_locked()
274 rel_path = self._lru.get(name)
275 if not rel_path:
276 return
277
278 named_dir = self._get_named_path(name)
279 if fs.islink(named_dir):
280 fs.unlink(named_dir)
281
282 abs_path = os.path.join(self.root_dir, rel_path)
283 if os.path.isdir(abs_path):
284 file_path.rmtree(abs_path)
285 self._lru.pop(name)
286
287 def _get_named_path(self, name): 254 def _get_named_path(self, name):
288 return os.path.join(self.root_dir, 'named', name) 255 return os.path.join(self.root_dir, 'named', name)
289 256
290 257
291 def add_named_cache_options(parser): 258 def add_named_cache_options(parser):
292 group = optparse.OptionGroup(parser, 'Named caches') 259 group = optparse.OptionGroup(parser, 'Named caches')
293 group.add_option( 260 group.add_option(
294 '--named-cache', 261 '--named-cache',
295 dest='named_caches', 262 dest='named_caches',
296 action='append', 263 action='append',
297 nargs=2, 264 nargs=2,
298 default=[], 265 default=[],
299 help='A named cache to request. Accepts two arguments, name and path. ' 266 help='A named cache to request. Accepts two arguments, name and path. '
300 'name identifies the cache, must match regex [a-z0-9_]{1,4096}. ' 267 'name identifies the cache, must match regex [a-z0-9_]{1,4096}. '
301 'path is a path relative to the run dir where the cache directory ' 268 'path is a path relative to the run dir where the cache directory '
302 'must be put to. ' 269 'must be symlinked to. '
303 'This option can be specified more than once.') 270 'This option can be specified more than once.')
304 group.add_option( 271 group.add_option(
305 '--named-cache-root', 272 '--named-cache-root',
306 help='Cache root directory. Default=%default') 273 help='Cache root directory. Default=%default')
307 parser.add_option_group(group) 274 parser.add_option_group(group)
308 275
309 276
310 def process_named_cache_options(parser, options): 277 def process_named_cache_options(parser, options):
311 """Validates named cache options and returns a CacheManager.""" 278 """Validates named cache options and returns a CacheManager."""
312 if options.named_caches and not options.named_cache_root: 279 if options.named_caches and not options.named_cache_root:
313 parser.error('--named-cache is specified, but --named-cache-root is empty') 280 parser.error('--named-cache is specified, but --named-cache-root is empty')
314 for name, path in options.named_caches: 281 for name, path in options.named_caches:
315 if not CACHE_NAME_RE.match(name): 282 if not CACHE_NAME_RE.match(name):
316 parser.error( 283 parser.error(
317 'cache name %r does not match %r' % (name, CACHE_NAME_RE.pattern)) 284 'cache name "%s" does not match %s' % (name, CACHE_NAME_RE.pattern))
318 if not path: 285 if not path:
319 parser.error('cache path cannot be empty') 286 parser.error('cache path cannot be empty')
320 if options.named_cache_root: 287 if options.named_cache_root:
321 return CacheManager(unicode(os.path.abspath(options.named_cache_root))) 288 return CacheManager(os.path.abspath(options.named_cache_root))
322 return None 289 return None
323 290
324 291
325 def _check_abs(path): 292 def _validate_named_cache_path(path):
326 if not isinstance(path, unicode): 293 if os.path.isabs(path):
327 raise Error('named cache installation path must be unicode') 294 raise Error('named cache path must not be absolute')
328 if not os.path.isabs(path): 295 if '..' in path.split(os.path.sep):
329 raise Error('named cache installation path must be absolute') 296 raise Error('named cache path must not contain ".."')
OLDNEW
« no previous file with comments | « no previous file | client/run_isolated.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698