| OLD | NEW |
| (Empty) |
| 1 #!/usr/bin/env python | |
| 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
| 3 # Use of this source code is governed by a BSD-style license that can be | |
| 4 # found in the LICENSE file. | |
| 5 | |
| 6 """Reads a manifest, creates a tree of hardlinks and runs the test. | |
| 7 | |
| 8 Keeps a local cache. | |
| 9 """ | |
| 10 | |
| 11 import ctypes | |
| 12 import hashlib | |
| 13 import json | |
| 14 import logging | |
| 15 import optparse | |
| 16 import os | |
| 17 import Queue | |
| 18 import re | |
| 19 import shutil | |
| 20 import stat | |
| 21 import subprocess | |
| 22 import sys | |
| 23 import tempfile | |
| 24 import threading | |
| 25 import time | |
| 26 import urllib | |
| 27 | |
| 28 | |
| 29 # Types of action accepted by recreate_tree(). | |
| 30 HARDLINK, SYMLINK, COPY = range(1, 4) | |
| 31 | |
| 32 RE_IS_SHA1 = re.compile(r'^[a-fA-F0-9]{40}$') | |
| 33 | |
| 34 | |
| 35 class ConfigError(ValueError): | |
| 36 """Generic failure to load a manifest.""" | |
| 37 pass | |
| 38 | |
| 39 | |
| 40 class MappingError(OSError): | |
| 41 """Failed to recreate the tree.""" | |
| 42 pass | |
| 43 | |
| 44 | |
| 45 def get_flavor(): | |
| 46 """Returns the system default flavor. Copied from gyp/pylib/gyp/common.py.""" | |
| 47 flavors = { | |
| 48 'cygwin': 'win', | |
| 49 'win32': 'win', | |
| 50 'darwin': 'mac', | |
| 51 'sunos5': 'solaris', | |
| 52 'freebsd7': 'freebsd', | |
| 53 'freebsd8': 'freebsd', | |
| 54 } | |
| 55 return flavors.get(sys.platform, 'linux') | |
| 56 | |
| 57 | |
| 58 def os_link(source, link_name): | |
| 59 """Add support for os.link() on Windows.""" | |
| 60 if sys.platform == 'win32': | |
| 61 if not ctypes.windll.kernel32.CreateHardLinkW( | |
| 62 unicode(link_name), unicode(source), 0): | |
| 63 raise OSError() | |
| 64 else: | |
| 65 os.link(source, link_name) | |
| 66 | |
| 67 | |
| 68 def readable_copy(outfile, infile): | |
| 69 """Makes a copy of the file that is readable by everyone.""" | |
| 70 shutil.copy(infile, outfile) | |
| 71 read_enabled_mode = (os.stat(outfile).st_mode | stat.S_IRUSR | | |
| 72 stat.S_IRGRP | stat.S_IROTH) | |
| 73 os.chmod(outfile, read_enabled_mode) | |
| 74 | |
| 75 | |
| 76 def link_file(outfile, infile, action): | |
| 77 """Links a file. The type of link depends on |action|.""" | |
| 78 logging.debug('Mapping %s to %s' % (infile, outfile)) | |
| 79 if action not in (HARDLINK, SYMLINK, COPY): | |
| 80 raise ValueError('Unknown mapping action %s' % action) | |
| 81 if not os.path.isfile(infile): | |
| 82 raise MappingError('%s is missing' % infile) | |
| 83 if os.path.isfile(outfile): | |
| 84 raise MappingError( | |
| 85 '%s already exist; insize:%d; outsize:%d' % | |
| 86 (outfile, os.stat(infile).st_size, os.stat(outfile).st_size)) | |
| 87 | |
| 88 if action == COPY: | |
| 89 readable_copy(outfile, infile) | |
| 90 elif action == SYMLINK and sys.platform != 'win32': | |
| 91 # On windows, symlink are converted to hardlink and fails over to copy. | |
| 92 os.symlink(infile, outfile) | |
| 93 else: | |
| 94 try: | |
| 95 os_link(infile, outfile) | |
| 96 except OSError: | |
| 97 # Probably a different file system. | |
| 98 logging.warn( | |
| 99 'Failed to hardlink, failing back to copy %s to %s' % ( | |
| 100 infile, outfile)) | |
| 101 readable_copy(outfile, infile) | |
| 102 | |
| 103 | |
| 104 def _set_write_bit(path, read_only): | |
| 105 """Sets or resets the executable bit on a file or directory.""" | |
| 106 mode = os.lstat(path).st_mode | |
| 107 if read_only: | |
| 108 mode = mode & 0500 | |
| 109 else: | |
| 110 mode = mode | 0200 | |
| 111 if hasattr(os, 'lchmod'): | |
| 112 os.lchmod(path, mode) # pylint: disable=E1101 | |
| 113 else: | |
| 114 if stat.S_ISLNK(mode): | |
| 115 # Skip symlink without lchmod() support. | |
| 116 logging.debug('Can\'t change +w bit on symlink %s' % path) | |
| 117 return | |
| 118 | |
| 119 # TODO(maruel): Implement proper DACL modification on Windows. | |
| 120 os.chmod(path, mode) | |
| 121 | |
| 122 | |
| 123 def make_writable(root, read_only): | |
| 124 """Toggle the writable bit on a directory tree.""" | |
| 125 root = os.path.abspath(root) | |
| 126 for dirpath, dirnames, filenames in os.walk(root, topdown=True): | |
| 127 for filename in filenames: | |
| 128 _set_write_bit(os.path.join(dirpath, filename), read_only) | |
| 129 | |
| 130 for dirname in dirnames: | |
| 131 _set_write_bit(os.path.join(dirpath, dirname), read_only) | |
| 132 | |
| 133 | |
| 134 def rmtree(root): | |
| 135 """Wrapper around shutil.rmtree() to retry automatically on Windows.""" | |
| 136 make_writable(root, False) | |
| 137 if sys.platform == 'win32': | |
| 138 for i in range(3): | |
| 139 try: | |
| 140 shutil.rmtree(root) | |
| 141 break | |
| 142 except WindowsError: # pylint: disable=E0602 | |
| 143 delay = (i+1)*2 | |
| 144 print >> sys.stderr, ( | |
| 145 'The test has subprocess outliving it. Sleep %d seconds.' % delay) | |
| 146 time.sleep(delay) | |
| 147 else: | |
| 148 shutil.rmtree(root) | |
| 149 | |
| 150 | |
| 151 def is_same_filesystem(path1, path2): | |
| 152 """Returns True if both paths are on the same filesystem. | |
| 153 | |
| 154 This is required to enable the use of hardlinks. | |
| 155 """ | |
| 156 assert os.path.isabs(path1), path1 | |
| 157 assert os.path.isabs(path2), path2 | |
| 158 if sys.platform == 'win32': | |
| 159 # If the drive letter mismatches, assume it's a separate partition. | |
| 160 # TODO(maruel): It should look at the underlying drive, a drive letter could | |
| 161 # be a mount point to a directory on another drive. | |
| 162 assert re.match(r'^[a-zA-Z]\:\\.*', path1), path1 | |
| 163 assert re.match(r'^[a-zA-Z]\:\\.*', path2), path2 | |
| 164 if path1[0].lower() != path2[0].lower(): | |
| 165 return False | |
| 166 return os.stat(path1).st_dev == os.stat(path2).st_dev | |
| 167 | |
| 168 | |
| 169 def get_free_space(path): | |
| 170 """Returns the number of free bytes.""" | |
| 171 if sys.platform == 'win32': | |
| 172 free_bytes = ctypes.c_ulonglong(0) | |
| 173 ctypes.windll.kernel32.GetDiskFreeSpaceExW( | |
| 174 ctypes.c_wchar_p(path), None, None, ctypes.pointer(free_bytes)) | |
| 175 return free_bytes.value | |
| 176 f = os.statvfs(path) | |
| 177 return f.f_bfree * f.f_frsize | |
| 178 | |
| 179 | |
| 180 def make_temp_dir(prefix, root_dir): | |
| 181 """Returns a temporary directory on the same file system as root_dir.""" | |
| 182 base_temp_dir = None | |
| 183 if not is_same_filesystem(root_dir, tempfile.gettempdir()): | |
| 184 base_temp_dir = os.path.dirname(root_dir) | |
| 185 return tempfile.mkdtemp(prefix=prefix, dir=base_temp_dir) | |
| 186 | |
| 187 | |
| 188 def load_manifest(content): | |
| 189 """Verifies the manifest is valid and loads this object with the json data. | |
| 190 """ | |
| 191 try: | |
| 192 data = json.loads(content) | |
| 193 except ValueError: | |
| 194 raise ConfigError('Failed to parse: %s...' % content[:100]) | |
| 195 | |
| 196 if not isinstance(data, dict): | |
| 197 raise ConfigError('Expected dict, got %r' % data) | |
| 198 | |
| 199 for key, value in data.iteritems(): | |
| 200 if key == 'command': | |
| 201 if not isinstance(value, list): | |
| 202 raise ConfigError('Expected list, got %r' % value) | |
| 203 for subvalue in value: | |
| 204 if not isinstance(subvalue, basestring): | |
| 205 raise ConfigError('Expected string, got %r' % subvalue) | |
| 206 | |
| 207 elif key == 'files': | |
| 208 if not isinstance(value, dict): | |
| 209 raise ConfigError('Expected dict, got %r' % value) | |
| 210 for subkey, subvalue in value.iteritems(): | |
| 211 if not isinstance(subkey, basestring): | |
| 212 raise ConfigError('Expected string, got %r' % subkey) | |
| 213 if not isinstance(subvalue, dict): | |
| 214 raise ConfigError('Expected dict, got %r' % subvalue) | |
| 215 for subsubkey, subsubvalue in subvalue.iteritems(): | |
| 216 if subsubkey == 'link': | |
| 217 if not isinstance(subsubvalue, basestring): | |
| 218 raise ConfigError('Expected string, got %r' % subsubvalue) | |
| 219 elif subsubkey == 'mode': | |
| 220 if not isinstance(subsubvalue, int): | |
| 221 raise ConfigError('Expected int, got %r' % subsubvalue) | |
| 222 elif subsubkey == 'sha-1': | |
| 223 if not RE_IS_SHA1.match(subsubvalue): | |
| 224 raise ConfigError('Expected sha-1, got %r' % subsubvalue) | |
| 225 elif subsubkey == 'size': | |
| 226 if not isinstance(subsubvalue, int): | |
| 227 raise ConfigError('Expected int, got %r' % subsubvalue) | |
| 228 elif subsubkey == 'timestamp': | |
| 229 if not isinstance(subsubvalue, int): | |
| 230 raise ConfigError('Expected int, got %r' % subsubvalue) | |
| 231 elif subsubkey == 'touched_only': | |
| 232 if not isinstance(subsubvalue, bool): | |
| 233 raise ConfigError('Expected bool, got %r' % subsubvalue) | |
| 234 else: | |
| 235 raise ConfigError('Unknown subsubkey %s' % subsubkey) | |
| 236 if bool('sha-1' in subvalue) and bool('link' in subvalue): | |
| 237 raise ConfigError( | |
| 238 'Did not expect both \'sha-1\' and \'link\', got: %r' % subvalue) | |
| 239 | |
| 240 elif key == 'includes': | |
| 241 if not isinstance(value, list): | |
| 242 raise ConfigError('Expected list, got %r' % value) | |
| 243 for subvalue in value: | |
| 244 if not RE_IS_SHA1.match(subvalue): | |
| 245 raise ConfigError('Expected sha-1, got %r' % subvalue) | |
| 246 | |
| 247 elif key == 'read_only': | |
| 248 if not isinstance(value, bool): | |
| 249 raise ConfigError('Expected bool, got %r' % value) | |
| 250 | |
| 251 elif key == 'relative_cwd': | |
| 252 if not isinstance(value, basestring): | |
| 253 raise ConfigError('Expected string, got %r' % value) | |
| 254 | |
| 255 elif key == 'os': | |
| 256 if value != get_flavor(): | |
| 257 raise ConfigError( | |
| 258 'Expected \'os\' to be \'%s\' but got \'%s\'' % | |
| 259 (get_flavor(), value)) | |
| 260 | |
| 261 else: | |
| 262 raise ConfigError('Unknown key %s' % key) | |
| 263 | |
| 264 return data | |
| 265 | |
| 266 | |
| 267 def fix_python_path(cmd): | |
| 268 """Returns the fixed command line to call the right python executable.""" | |
| 269 out = cmd[:] | |
| 270 if out[0] == 'python': | |
| 271 out[0] = sys.executable | |
| 272 elif out[0].endswith('.py'): | |
| 273 out.insert(0, sys.executable) | |
| 274 return out | |
| 275 | |
| 276 | |
| 277 class Profiler(object): | |
| 278 def __init__(self, name): | |
| 279 self.name = name | |
| 280 self.start_time = None | |
| 281 | |
| 282 def __enter__(self): | |
| 283 self.start_time = time.time() | |
| 284 return self | |
| 285 | |
| 286 def __exit__(self, _exc_type, _exec_value, _traceback): | |
| 287 time_taken = time.time() - self.start_time | |
| 288 logging.info('Profiling: Section %s took %3.3f seconds', | |
| 289 self.name, time_taken) | |
| 290 | |
| 291 | |
| 292 class Remote(object): | |
| 293 """Priority based worker queue to fetch or upload files from a | |
| 294 content-address server. Any function may be given as the fetcher/upload, | |
| 295 as long as it takes two inputs (the item contents, and their relative | |
| 296 destination). | |
| 297 | |
| 298 Supports local file system, CIFS or http remotes. | |
| 299 | |
| 300 When the priority of items is equals, works in strict FIFO mode. | |
| 301 """ | |
| 302 # Initial and maximum number of worker threads. | |
| 303 INITIAL_WORKERS = 2 | |
| 304 MAX_WORKERS = 16 | |
| 305 # Priorities. | |
| 306 LOW, MED, HIGH = (1<<8, 2<<8, 3<<8) | |
| 307 INTERNAL_PRIORITY_BITS = (1<<8) - 1 | |
| 308 RETRIES = 5 | |
| 309 | |
| 310 def __init__(self, destination_root): | |
| 311 # Function to fetch a remote object or upload to a remote location.. | |
| 312 self._do_item = self.get_file_handler(destination_root) | |
| 313 # Contains tuple(priority, index, obj, destination). | |
| 314 self._queue = Queue.PriorityQueue() | |
| 315 # Contains tuple(priority, index, obj). | |
| 316 self._done = Queue.PriorityQueue() | |
| 317 | |
| 318 # Contains generated exceptions that haven't been handled yet. | |
| 319 self._exceptions = Queue.Queue() | |
| 320 | |
| 321 # To keep FIFO ordering in self._queue. It is assumed xrange's iterator is | |
| 322 # thread-safe. | |
| 323 self._next_index = xrange(0, 1<<30).__iter__().next | |
| 324 | |
| 325 # Control access to the following member. | |
| 326 self._ready_lock = threading.Lock() | |
| 327 # Number of threads in wait state. | |
| 328 self._ready = 0 | |
| 329 | |
| 330 # Control access to the following member. | |
| 331 self._workers_lock = threading.Lock() | |
| 332 self._workers = [] | |
| 333 for _ in range(self.INITIAL_WORKERS): | |
| 334 self._add_worker() | |
| 335 | |
| 336 def join(self): | |
| 337 """Blocks until the queue is empty.""" | |
| 338 self._queue.join() | |
| 339 | |
| 340 def next_exception(self): | |
| 341 """Returns the next unhandled exception, or None if there is | |
| 342 no exception.""" | |
| 343 try: | |
| 344 return self._exceptions.get_nowait() | |
| 345 except Queue.Empty: | |
| 346 return None | |
| 347 | |
| 348 def add_item(self, priority, obj, dest): | |
| 349 """Retrieves an object from the remote data store. | |
| 350 | |
| 351 The smaller |priority| gets fetched first. | |
| 352 | |
| 353 Thread-safe. | |
| 354 """ | |
| 355 assert (priority & self.INTERNAL_PRIORITY_BITS) == 0 | |
| 356 self._add_to_queue(priority, obj, dest) | |
| 357 | |
| 358 def get_result(self): | |
| 359 """Returns the next file that was successfully fetched.""" | |
| 360 r = self._done.get() | |
| 361 if r[0] == -1: | |
| 362 # It's an exception. | |
| 363 raise r[2][0], r[2][1], r[2][2] | |
| 364 return r[2] | |
| 365 | |
| 366 def _add_to_queue(self, priority, obj, dest): | |
| 367 with self._ready_lock: | |
| 368 start_new_worker = not self._ready | |
| 369 self._queue.put((priority, self._next_index(), obj, dest)) | |
| 370 if start_new_worker: | |
| 371 self._add_worker() | |
| 372 | |
| 373 def _add_worker(self): | |
| 374 """Add one worker thread if there isn't too many. Thread-safe.""" | |
| 375 with self._workers_lock: | |
| 376 if len(self._workers) >= self.MAX_WORKERS: | |
| 377 return False | |
| 378 worker = threading.Thread(target=self._run) | |
| 379 self._workers.append(worker) | |
| 380 worker.daemon = True | |
| 381 worker.start() | |
| 382 | |
| 383 def _step_done(self, result): | |
| 384 """Worker helper function""" | |
| 385 self._done.put(result) | |
| 386 self._queue.task_done() | |
| 387 if result[0] == -1: | |
| 388 self._exceptions.put(sys.exc_info()) | |
| 389 | |
| 390 def _run(self): | |
| 391 """Worker thread loop.""" | |
| 392 while True: | |
| 393 try: | |
| 394 with self._ready_lock: | |
| 395 self._ready += 1 | |
| 396 item = self._queue.get() | |
| 397 finally: | |
| 398 with self._ready_lock: | |
| 399 self._ready -= 1 | |
| 400 if not item: | |
| 401 return | |
| 402 priority, index, obj, dest = item | |
| 403 try: | |
| 404 self._do_item(obj, dest) | |
| 405 except IOError: | |
| 406 # Retry a few times, lowering the priority. | |
| 407 if (priority & self.INTERNAL_PRIORITY_BITS) < self.RETRIES: | |
| 408 self._add_to_queue(priority + 1, obj, dest) | |
| 409 self._queue.task_done() | |
| 410 continue | |
| 411 # Transfers the exception back. It has maximum priority. | |
| 412 self._step_done((-1, 0, sys.exc_info())) | |
| 413 except: | |
| 414 # Transfers the exception back. It has maximum priority. | |
| 415 self._step_done((-1, 0, sys.exc_info())) | |
| 416 else: | |
| 417 self._step_done((priority, index, obj)) | |
| 418 | |
| 419 @staticmethod | |
| 420 def get_file_handler(file_or_url): | |
| 421 """Returns a object to retrieve objects from a remote.""" | |
| 422 if re.match(r'^https?://.+$', file_or_url): | |
| 423 file_or_url = file_or_url.rstrip('/') + '/' | |
| 424 def download_file(item, dest): | |
| 425 # TODO(maruel): Reuse HTTP connections. The stdlib doesn't make this | |
| 426 # easy. | |
| 427 source = file_or_url + item | |
| 428 logging.debug('download_file(%s, %s)', source, dest) | |
| 429 urllib.urlretrieve(source, dest) | |
| 430 return download_file | |
| 431 | |
| 432 def copy_file(item, dest): | |
| 433 source = os.path.join(file_or_url, item) | |
| 434 logging.debug('copy_file(%s, %s)', source, dest) | |
| 435 shutil.copy(source, dest) | |
| 436 return copy_file | |
| 437 | |
| 438 | |
| 439 class CachePolicies(object): | |
| 440 def __init__(self, max_cache_size, min_free_space, max_items): | |
| 441 """ | |
| 442 Arguments: | |
| 443 - max_cache_size: Trim if the cache gets larger than this value. If 0, the | |
| 444 cache is effectively a leak. | |
| 445 - min_free_space: Trim if disk free space becomes lower than this value. If | |
| 446 0, it unconditionally fill the disk. | |
| 447 - max_items: Maximum number of items to keep in the cache. If 0, do not | |
| 448 enforce a limit. | |
| 449 """ | |
| 450 self.max_cache_size = max_cache_size | |
| 451 self.min_free_space = min_free_space | |
| 452 self.max_items = max_items | |
| 453 | |
| 454 | |
| 455 class Cache(object): | |
| 456 """Stateful LRU cache. | |
| 457 | |
| 458 Saves its state as json file. | |
| 459 """ | |
| 460 STATE_FILE = 'state.json' | |
| 461 | |
| 462 def __init__(self, cache_dir, remote, policies): | |
| 463 """ | |
| 464 Arguments: | |
| 465 - cache_dir: Directory where to place the cache. | |
| 466 - remote: Remote where to fetch items from. | |
| 467 - policies: cache retention policies. | |
| 468 """ | |
| 469 self.cache_dir = cache_dir | |
| 470 self.remote = remote | |
| 471 self.policies = policies | |
| 472 self.state_file = os.path.join(cache_dir, self.STATE_FILE) | |
| 473 # The tuple(file, size) are kept as an array in a LRU style. E.g. | |
| 474 # self.state[0] is the oldest item. | |
| 475 self.state = [] | |
| 476 # A lookup map to speed up searching. | |
| 477 self._lookup = {} | |
| 478 self._dirty = False | |
| 479 | |
| 480 # Items currently being fetched. Keep it local to reduce lock contention. | |
| 481 self._pending_queue = set() | |
| 482 | |
| 483 # Profiling values. | |
| 484 self._added = [] | |
| 485 self._removed = [] | |
| 486 self._free_disk = 0 | |
| 487 | |
| 488 if not os.path.isdir(self.cache_dir): | |
| 489 os.makedirs(self.cache_dir) | |
| 490 if os.path.isfile(self.state_file): | |
| 491 try: | |
| 492 self.state = json.load(open(self.state_file, 'r')) | |
| 493 except (IOError, ValueError), e: | |
| 494 # Too bad. The file will be overwritten and the cache cleared. | |
| 495 logging.error( | |
| 496 'Broken state file %s, ignoring.\n%s' % (self.STATE_FILE, e)) | |
| 497 if (not isinstance(self.state, list) or | |
| 498 not all( | |
| 499 isinstance(i, (list, tuple)) and len(i) == 2 for i in self.state)): | |
| 500 # Discard. | |
| 501 self.state = [] | |
| 502 self._dirty = True | |
| 503 | |
| 504 # Ensure that all files listed in the state still exist and add new ones. | |
| 505 previous = set(filename for filename, _ in self.state) | |
| 506 if len(previous) != len(self.state): | |
| 507 logging.warn('Cache state is corrupted') | |
| 508 self._dirty = True | |
| 509 self.state = [] | |
| 510 else: | |
| 511 added = 0 | |
| 512 for filename in os.listdir(self.cache_dir): | |
| 513 if filename == self.STATE_FILE: | |
| 514 continue | |
| 515 if filename in previous: | |
| 516 previous.remove(filename) | |
| 517 continue | |
| 518 # An untracked file. | |
| 519 self._dirty = True | |
| 520 if not RE_IS_SHA1.match(filename): | |
| 521 logging.warn('Removing unknown file %s from cache', filename) | |
| 522 os.remove(self.path(filename)) | |
| 523 else: | |
| 524 # Insert as the oldest file. It will be deleted eventually if not | |
| 525 # accessed. | |
| 526 self._add(filename, False) | |
| 527 added += 1 | |
| 528 if added: | |
| 529 logging.warn('Added back %d unknown files', added) | |
| 530 self.state = [ | |
| 531 (filename, size) for filename, size in self.state | |
| 532 if filename not in previous | |
| 533 ] | |
| 534 self._update_lookup() | |
| 535 | |
| 536 with Profiler('SetupTrimming'): | |
| 537 self.trim() | |
| 538 | |
| 539 def __enter__(self): | |
| 540 return self | |
| 541 | |
| 542 def __exit__(self, _exc_type, _exec_value, _traceback): | |
| 543 with Profiler('CleanupTrimming'): | |
| 544 self.trim() | |
| 545 | |
| 546 logging.info( | |
| 547 '%4d (%7dkb) added', len(self._added), sum(self._added) / 1024) | |
| 548 logging.info( | |
| 549 '%4d (%7dkb) current', | |
| 550 len(self.state), | |
| 551 sum(i[1] for i in self.state) / 1024) | |
| 552 logging.info( | |
| 553 '%4d (%7dkb) removed', len(self._removed), sum(self._removed) / 1024) | |
| 554 logging.info('%7dkb free', self._free_disk / 1024) | |
| 555 | |
| 556 def remove_lru_file(self): | |
| 557 """Removes the last recently used file.""" | |
| 558 try: | |
| 559 filename, size = self.state.pop(0) | |
| 560 del self._lookup[filename] | |
| 561 self._removed.append(size) | |
| 562 os.remove(self.path(filename)) | |
| 563 self._dirty = True | |
| 564 except OSError as e: | |
| 565 logging.error('Error attempting to delete a file\n%s' % e) | |
| 566 | |
| 567 def trim(self): | |
| 568 """Trims anything we don't know, make sure enough free space exists.""" | |
| 569 # Ensure maximum cache size. | |
| 570 if self.policies.max_cache_size and self.state: | |
| 571 while sum(i[1] for i in self.state) > self.policies.max_cache_size: | |
| 572 self.remove_lru_file() | |
| 573 | |
| 574 # Ensure maximum number of items in the cache. | |
| 575 if self.policies.max_items and self.state: | |
| 576 while len(self.state) > self.policies.max_items: | |
| 577 self.remove_lru_file() | |
| 578 | |
| 579 # Ensure enough free space. | |
| 580 self._free_disk = get_free_space(self.cache_dir) | |
| 581 while ( | |
| 582 self.policies.min_free_space and | |
| 583 self.state and | |
| 584 self._free_disk < self.policies.min_free_space): | |
| 585 self.remove_lru_file() | |
| 586 self._free_disk = get_free_space(self.cache_dir) | |
| 587 | |
| 588 self.save() | |
| 589 | |
| 590 def retrieve(self, priority, item): | |
| 591 """Retrieves a file from the remote, if not already cached, and adds it to | |
| 592 the cache. | |
| 593 """ | |
| 594 assert not '/' in item | |
| 595 path = self.path(item) | |
| 596 index = self._lookup.get(item) | |
| 597 if index is None: | |
| 598 if item in self._pending_queue: | |
| 599 # Already pending. The same object could be referenced multiple times. | |
| 600 return | |
| 601 self.remote.add_item(priority, item, path) | |
| 602 self._pending_queue.add(item) | |
| 603 else: | |
| 604 if index != len(self.state) - 1: | |
| 605 # Was already in cache. Update it's LRU value by putting it at the end. | |
| 606 self.state.append(self.state.pop(index)) | |
| 607 self._dirty = True | |
| 608 self._update_lookup() | |
| 609 | |
| 610 def add(self, filepath, obj): | |
| 611 """Forcibly adds a file to the cache.""" | |
| 612 if not obj in self._lookup: | |
| 613 link_file(self.path(obj), filepath, HARDLINK) | |
| 614 self._add(obj, True) | |
| 615 | |
| 616 def path(self, item): | |
| 617 """Returns the path to one item.""" | |
| 618 return os.path.join(self.cache_dir, item) | |
| 619 | |
| 620 def save(self): | |
| 621 """Saves the LRU ordering.""" | |
| 622 json.dump(self.state, open(self.state_file, 'wb'), separators=(',',':')) | |
| 623 | |
| 624 def wait_for(self, items): | |
| 625 """Starts a loop that waits for at least one of |items| to be retrieved. | |
| 626 | |
| 627 Returns the first item retrieved. | |
| 628 """ | |
| 629 # Flush items already present. | |
| 630 for item in items: | |
| 631 if item in self._lookup: | |
| 632 return item | |
| 633 | |
| 634 assert all(i in self._pending_queue for i in items), ( | |
| 635 items, self._pending_queue) | |
| 636 # Note that: | |
| 637 # len(self._pending_queue) == | |
| 638 # ( len(self.remote._workers) - self.remote._ready + | |
| 639 # len(self._remote._queue) + len(self._remote.done)) | |
| 640 # There is no lock-free way to verify that. | |
| 641 while self._pending_queue: | |
| 642 item = self.remote.get_result() | |
| 643 self._pending_queue.remove(item) | |
| 644 self._add(item, True) | |
| 645 if item in items: | |
| 646 return item | |
| 647 | |
| 648 def _add(self, item, at_end): | |
| 649 """Adds an item in the internal state. | |
| 650 | |
| 651 If |at_end| is False, self._lookup becomes inconsistent and | |
| 652 self._update_lookup() must be called. | |
| 653 """ | |
| 654 size = os.stat(self.path(item)).st_size | |
| 655 self._added.append(size) | |
| 656 if at_end: | |
| 657 self.state.append((item, size)) | |
| 658 self._lookup[item] = len(self.state) - 1 | |
| 659 else: | |
| 660 self.state.insert(0, (item, size)) | |
| 661 self._dirty = True | |
| 662 | |
| 663 def _update_lookup(self): | |
| 664 self._lookup = dict( | |
| 665 (filename, index) for index, (filename, _) in enumerate(self.state)) | |
| 666 | |
| 667 | |
| 668 | |
| 669 class Manifest(object): | |
| 670 """Represents a single parsed manifest, e.g. a .results file.""" | |
| 671 def __init__(self, obj_hash): | |
| 672 """|obj_hash| is really the sha-1 of the file.""" | |
| 673 logging.debug('Manifest(%s)' % obj_hash) | |
| 674 self.obj_hash = obj_hash | |
| 675 # Set once all the left-side of the tree is parsed. 'Tree' here means the | |
| 676 # manifest and all the manifest recursively included by it with 'includes' | |
| 677 # key. The order of each manifest sha-1 in 'includes' is important, as the | |
| 678 # later ones are not processed until the firsts are retrieved and read. | |
| 679 self.can_fetch = False | |
| 680 | |
| 681 # Raw data. | |
| 682 self.data = {} | |
| 683 # A Manifest instance, one per object in self.includes. | |
| 684 self.children = [] | |
| 685 | |
| 686 # Set once the manifest is loaded. | |
| 687 self._manifest_parsed = False | |
| 688 # Set once the files are fetched. | |
| 689 self.files_fetched = False | |
| 690 | |
| 691 def load(self, content): | |
| 692 """Verifies the manifest is valid and loads this object with the json data. | |
| 693 """ | |
| 694 logging.debug('Manifest.load(%s)' % self.obj_hash) | |
| 695 assert not self._manifest_parsed | |
| 696 self.data = load_manifest(content) | |
| 697 self.children = [Manifest(i) for i in self.data.get('includes', [])] | |
| 698 self._manifest_parsed = True | |
| 699 | |
| 700 def fetch_files(self, cache, files): | |
| 701 """Adds files in this manifest not present in files dictionary. | |
| 702 | |
| 703 Preemptively request files. | |
| 704 | |
| 705 Note that |files| is modified by this function. | |
| 706 """ | |
| 707 assert self.can_fetch | |
| 708 if not self._manifest_parsed or self.files_fetched: | |
| 709 return | |
| 710 logging.debug('fetch_files(%s)' % self.obj_hash) | |
| 711 for filepath, properties in self.data.get('files', {}).iteritems(): | |
| 712 # Root manifest has priority on the files being mapped. In particular, | |
| 713 # overriden files must not be fetched. | |
| 714 if filepath not in files: | |
| 715 files[filepath] = properties | |
| 716 if 'sha-1' in properties: | |
| 717 # Preemptively request files. | |
| 718 logging.debug('fetching %s' % filepath) | |
| 719 cache.retrieve(Remote.MED, properties['sha-1']) | |
| 720 self.files_fetched = True | |
| 721 | |
| 722 | |
| 723 class Settings(object): | |
| 724 """Results of a completely parsed manifest.""" | |
| 725 def __init__(self): | |
| 726 self.command = [] | |
| 727 self.files = {} | |
| 728 self.read_only = None | |
| 729 self.relative_cwd = None | |
| 730 # The main manifest. | |
| 731 self.root = None | |
| 732 logging.debug('Settings') | |
| 733 | |
| 734 def load(self, cache, root_manifest_hash): | |
| 735 """Loads the manifest and all the included manifests asynchronously. | |
| 736 | |
| 737 It enables support for included manifest. They are processed in strict order | |
| 738 but fetched asynchronously from the cache. This is important so that a file | |
| 739 in an included manifest that is overridden by an embedding manifest is not | |
| 740 fetched neededlessly. The includes are fetched in one pass and the files are | |
| 741 fetched as soon as all the manifests on the left-side of the tree were | |
| 742 fetched. | |
| 743 | |
| 744 The prioritization is very important here for nested manifests. 'includes' | |
| 745 have the highest priority and the algorithm is optimized for both deep and | |
| 746 wide manifests. A deep one is a long link of manifest referenced one at a | |
| 747 time by one item in 'includes'. A wide one has a large number of 'includes' | |
| 748 in a single manifest. 'left' is defined as an included manifest earlier in | |
| 749 the 'includes' list. So the order of the elements in 'includes' is | |
| 750 important. | |
| 751 """ | |
| 752 self.root = Manifest(root_manifest_hash) | |
| 753 cache.retrieve(Remote.HIGH, root_manifest_hash) | |
| 754 pending = {root_manifest_hash: self.root} | |
| 755 # Keeps the list of retrieved items to refuse recursive includes. | |
| 756 retrieved = [root_manifest_hash] | |
| 757 | |
| 758 def update_self(node): | |
| 759 node.fetch_files(cache, self.files) | |
| 760 # Grabs properties. | |
| 761 if not self.command and node.data.get('command'): | |
| 762 self.command = node.data['command'] | |
| 763 if self.read_only is None and node.data.get('read_only') is not None: | |
| 764 self.read_only = node.data['read_only'] | |
| 765 if (self.relative_cwd is None and | |
| 766 node.data.get('relative_cwd') is not None): | |
| 767 self.relative_cwd = node.data['relative_cwd'] | |
| 768 | |
| 769 def traverse_tree(node): | |
| 770 if node.can_fetch: | |
| 771 if not node.files_fetched: | |
| 772 update_self(node) | |
| 773 will_break = False | |
| 774 for i in node.children: | |
| 775 if not i.can_fetch: | |
| 776 if will_break: | |
| 777 break | |
| 778 # Automatically mark the first one as fetcheable. | |
| 779 i.can_fetch = True | |
| 780 will_break = True | |
| 781 traverse_tree(i) | |
| 782 | |
| 783 while pending: | |
| 784 item_hash = cache.wait_for(pending) | |
| 785 item = pending.pop(item_hash) | |
| 786 item.load(open(cache.path(item_hash), 'r').read()) | |
| 787 if item_hash == root_manifest_hash: | |
| 788 # It's the root item. | |
| 789 item.can_fetch = True | |
| 790 | |
| 791 for new_child in item.children: | |
| 792 h = new_child.obj_hash | |
| 793 if h in retrieved: | |
| 794 raise ConfigError('Manifest %s is retrieved recursively' % h) | |
| 795 pending[h] = new_child | |
| 796 cache.retrieve(Remote.HIGH, h) | |
| 797 | |
| 798 # Traverse the whole tree to see if files can now be fetched. | |
| 799 traverse_tree(self.root) | |
| 800 def check(n): | |
| 801 return all(check(x) for x in n.children) and n.files_fetched | |
| 802 assert check(self.root) | |
| 803 self.relative_cwd = self.relative_cwd or '' | |
| 804 self.read_only = self.read_only or False | |
| 805 | |
| 806 | |
| 807 def run_tha_test(manifest_hash, cache_dir, remote, policies): | |
| 808 """Downloads the dependencies in the cache, hardlinks them into a temporary | |
| 809 directory and runs the executable. | |
| 810 """ | |
| 811 settings = Settings() | |
| 812 with Cache(cache_dir, Remote(remote), policies) as cache: | |
| 813 outdir = make_temp_dir('run_tha_test', cache_dir) | |
| 814 try: | |
| 815 # Initiate all the files download. | |
| 816 with Profiler('GetManifests') as _prof: | |
| 817 # Optionally support local files. | |
| 818 if not RE_IS_SHA1.match(manifest_hash): | |
| 819 # Adds it in the cache. While not strictly necessary, this simplifies | |
| 820 # the rest. | |
| 821 h = hashlib.sha1(open(manifest_hash, 'r').read()).hexdigest() | |
| 822 cache.add(manifest_hash, h) | |
| 823 manifest_hash = h | |
| 824 settings.load(cache, manifest_hash) | |
| 825 | |
| 826 if not settings.command: | |
| 827 print >> sys.stderr, 'No command to run' | |
| 828 return 1 | |
| 829 | |
| 830 with Profiler('GetRest') as _prof: | |
| 831 logging.debug('Creating directories') | |
| 832 # Creates the tree of directories to create. | |
| 833 directories = set(os.path.dirname(f) for f in settings.files) | |
| 834 for item in list(directories): | |
| 835 while item: | |
| 836 directories.add(item) | |
| 837 item = os.path.dirname(item) | |
| 838 for d in sorted(directories): | |
| 839 if d: | |
| 840 os.mkdir(os.path.join(outdir, d)) | |
| 841 | |
| 842 # Creates the links if necessary. | |
| 843 for filepath, properties in settings.files.iteritems(): | |
| 844 if 'link' not in properties: | |
| 845 continue | |
| 846 outfile = os.path.join(outdir, filepath) | |
| 847 os.symlink(properties['link'], outfile) | |
| 848 if 'mode' in properties: | |
| 849 # It's not set on Windows. | |
| 850 os.chmod(outfile, properties['mode']) | |
| 851 | |
| 852 # Remaining files to be processed. | |
| 853 # Note that files could still be not be downloaded yet here. | |
| 854 remaining = dict() | |
| 855 for filepath, props in settings.files.iteritems(): | |
| 856 if 'sha-1' in props: | |
| 857 remaining.setdefault(props['sha-1'], []).append((filepath, props)) | |
| 858 | |
| 859 # Do bookkeeping while files are being downloaded in the background. | |
| 860 cwd = os.path.join(outdir, settings.relative_cwd) | |
| 861 if not os.path.isdir(cwd): | |
| 862 os.makedirs(cwd) | |
| 863 cmd = settings.command[:] | |
| 864 # Ensure paths are correctly separated on windows. | |
| 865 cmd[0] = cmd[0].replace('/', os.path.sep) | |
| 866 cmd = fix_python_path(cmd) | |
| 867 | |
| 868 # Now block on the remaining files to be downloaded and mapped. | |
| 869 while remaining: | |
| 870 obj = cache.wait_for(remaining) | |
| 871 for filepath, properties in remaining.pop(obj): | |
| 872 outfile = os.path.join(outdir, filepath) | |
| 873 link_file(outfile, cache.path(obj), HARDLINK) | |
| 874 if 'mode' in properties: | |
| 875 # It's not set on Windows. | |
| 876 os.chmod(outfile, properties['mode']) | |
| 877 | |
| 878 if settings.read_only: | |
| 879 make_writable(outdir, True) | |
| 880 logging.info('Running %s, cwd=%s' % (cmd, cwd)) | |
| 881 try: | |
| 882 with Profiler('RunTest') as _prof: | |
| 883 return subprocess.call(cmd, cwd=cwd) | |
| 884 except OSError: | |
| 885 print >> sys.stderr, 'Failed to run %s; cwd=%s' % (cmd, cwd) | |
| 886 raise | |
| 887 finally: | |
| 888 rmtree(outdir) | |
| 889 | |
| 890 | |
| 891 def main(): | |
| 892 parser = optparse.OptionParser( | |
| 893 usage='%prog <options>', description=sys.modules[__name__].__doc__) | |
| 894 parser.add_option( | |
| 895 '-v', '--verbose', action='count', default=0, help='Use multiple times') | |
| 896 parser.add_option('--no-run', action='store_true', help='Skip the run part') | |
| 897 | |
| 898 group = optparse.OptionGroup(parser, 'Data source') | |
| 899 group.add_option( | |
| 900 '-m', '--manifest', | |
| 901 metavar='FILE', | |
| 902 help='File/url describing what to map or run') | |
| 903 group.add_option( | |
| 904 '-H', '--hash', | |
| 905 help='Hash of the manifest to grab from the hash table') | |
| 906 parser.add_option_group(group) | |
| 907 | |
| 908 group.add_option( | |
| 909 '-r', '--remote', metavar='URL', help='Remote where to get the items') | |
| 910 group = optparse.OptionGroup(parser, 'Cache management') | |
| 911 group.add_option( | |
| 912 '--cache', | |
| 913 default='cache', | |
| 914 metavar='DIR', | |
| 915 help='Cache directory, default=%default') | |
| 916 group.add_option( | |
| 917 '--max-cache-size', | |
| 918 type='int', | |
| 919 metavar='NNN', | |
| 920 default=20*1024*1024*1024, | |
| 921 help='Trim if the cache gets larger than this value, default=%default') | |
| 922 group.add_option( | |
| 923 '--min-free-space', | |
| 924 type='int', | |
| 925 metavar='NNN', | |
| 926 default=1*1024*1024*1024, | |
| 927 help='Trim if disk free space becomes lower than this value, ' | |
| 928 'default=%default') | |
| 929 group.add_option( | |
| 930 '--max-items', | |
| 931 type='int', | |
| 932 metavar='NNN', | |
| 933 default=100000, | |
| 934 help='Trim if more than this number of items are in the cache ' | |
| 935 'default=%default') | |
| 936 parser.add_option_group(group) | |
| 937 | |
| 938 options, args = parser.parse_args() | |
| 939 level = [logging.ERROR, logging.INFO, logging.DEBUG][min(2, options.verbose)] | |
| 940 logging.basicConfig( | |
| 941 level=level, | |
| 942 format='%(levelname)5s %(module)15s(%(lineno)3d): %(message)s') | |
| 943 | |
| 944 if bool(options.manifest) == bool(options.hash): | |
| 945 parser.error('One and only one of --manifest or --hash is required.') | |
| 946 if not options.remote: | |
| 947 parser.error('--remote is required.') | |
| 948 if args: | |
| 949 parser.error('Unsupported args %s' % ' '.join(args)) | |
| 950 | |
| 951 policies = CachePolicies( | |
| 952 options.max_cache_size, options.min_free_space, options.max_items) | |
| 953 try: | |
| 954 return run_tha_test( | |
| 955 options.manifest or options.hash, | |
| 956 os.path.abspath(options.cache), | |
| 957 options.remote, | |
| 958 policies) | |
| 959 except (ConfigError, MappingError), e: | |
| 960 print >> sys.stderr, str(e) | |
| 961 return 1 | |
| 962 | |
| 963 | |
| 964 if __name__ == '__main__': | |
| 965 sys.exit(main()) | |
| OLD | NEW |