Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
| 2 # Copyright 2013 The LUCI Authors. All rights reserved. | 2 # Copyright 2013 The LUCI Authors. All rights reserved. |
| 3 # Use of this source code is governed under the Apache License, Version 2.0 | 3 # Use of this source code is governed under the Apache License, Version 2.0 |
| 4 # that can be found in the LICENSE file. | 4 # that can be found in the LICENSE file. |
| 5 | 5 |
| 6 """Archives a set of files or directories to an Isolate Server.""" | 6 """Archives a set of files or directories to an Isolate Server.""" |
| 7 | 7 |
| 8 __version__ = '0.6.0' | 8 __version__ = '0.7.0' |
| 9 | 9 |
| 10 import base64 | 10 import base64 |
| 11 import collections | |
| 11 import errno | 12 import errno |
| 12 import functools | 13 import functools |
| 14 import json | |
| 13 import io | 15 import io |
| 14 import logging | 16 import logging |
| 15 import optparse | 17 import optparse |
| 16 import os | 18 import os |
| 17 import re | 19 import re |
| 18 import signal | 20 import signal |
| 19 import stat | 21 import stat |
| 20 import sys | 22 import sys |
| 21 import tempfile | 23 import tempfile |
| 22 import threading | 24 import threading |
| 23 import time | 25 import time |
| 24 import types | 26 import types |
| 25 import zlib | 27 import zlib |
| 26 | 28 |
| 27 from third_party import colorama | 29 from third_party import colorama |
| 28 from third_party.depot_tools import fix_encoding | 30 from third_party.depot_tools import fix_encoding |
| 29 from third_party.depot_tools import subcommand | 31 from third_party.depot_tools import subcommand |
| 30 | 32 |
| 31 from libs import arfile | 33 from libs import arfile |
| 32 from utils import file_path | 34 from utils import file_path |
| 33 from utils import fs | 35 from utils import fs |
| 34 from utils import logging_utils | 36 from utils import logging_utils |
| 35 from utils import lru | |
| 36 from utils import net | 37 from utils import net |
| 37 from utils import on_error | 38 from utils import on_error |
| 38 from utils import subprocess42 | 39 from utils import subprocess42 |
| 39 from utils import threading_utils | 40 from utils import threading_utils |
| 40 from utils import tools | 41 from utils import tools |
| 41 | 42 |
| 42 import auth | 43 import auth |
| 43 import isolated_format | 44 import isolated_format |
| 44 | 45 |
| 45 | 46 |
| (...skipping 1428 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1474 0, it unconditionally fill the disk. | 1475 0, it unconditionally fill the disk. |
| 1475 - max_items: Maximum number of items to keep in the cache. If 0, do not | 1476 - max_items: Maximum number of items to keep in the cache. If 0, do not |
| 1476 enforce a limit. | 1477 enforce a limit. |
| 1477 """ | 1478 """ |
| 1478 self.max_cache_size = max_cache_size | 1479 self.max_cache_size = max_cache_size |
| 1479 self.min_free_space = min_free_space | 1480 self.min_free_space = min_free_space |
| 1480 self.max_items = max_items | 1481 self.max_items = max_items |
| 1481 | 1482 |
| 1482 | 1483 |
| 1483 class DiskCache(LocalCache): | 1484 class DiskCache(LocalCache): |
| 1484 """Stateful LRU cache in a flat hash table in a directory. | 1485 """Stateful LRU cache in a semi-flat hash table in a directory. |
| 1485 | 1486 |
| 1486 Saves its state as json file. | 1487 Saves its state as json file. |
| 1487 """ | 1488 """ |
| 1488 STATE_FILE = u'state.json' | 1489 STATE_FILE = u'state.json' |
| 1489 | 1490 |
| 1490 def __init__(self, cache_dir, policies, hash_algo): | 1491 # All protected methods (starting with '_') except _path should be called |
| 1492 # with self._lock held. | |
| 1493 | |
| 1494 def __init__(self, cache_dir, policies, hash_algo, time_fn=None): | |
| 1491 """ | 1495 """ |
| 1492 Arguments: | 1496 Arguments: |
| 1493 cache_dir: directory where to place the cache. | 1497 cache_dir: directory where to place the cache. |
| 1494 policies: cache retention policies. | 1498 policies: cache retention policies. |
| 1495 algo: hashing algorithm used. | 1499 algo: hashing algorithm used. |
| 1500 time_fn: function to take current timestamp when adding new items. | |
| 1501 Defaults to time.time. | |
| 1496 """ | 1502 """ |
| 1497 # All protected methods (starting with '_') except _path should be called | |
| 1498 # with self._lock held. | |
| 1499 super(DiskCache, self).__init__() | 1503 super(DiskCache, self).__init__() |
| 1504 self.time_fn = time_fn or time.time | |
| 1500 self.cache_dir = cache_dir | 1505 self.cache_dir = cache_dir |
| 1501 self.policies = policies | 1506 self.policies = policies |
| 1502 self.hash_algo = hash_algo | 1507 self.hash_algo = hash_algo |
| 1503 self.state_file = os.path.join(cache_dir, self.STATE_FILE) | 1508 self.state_file = os.path.join(cache_dir, self.STATE_FILE) |
| 1504 # Items in a LRU lookup dict(digest: size). | 1509 # Items in a LRU lookup dict(digest: [size, timestamp]). |
| 1505 self._lru = lru.LRUDict() | 1510 # We use lists instead of tuples because JSON arrays are parsed to lists. |
| 1511 # Mutations must be followed by `self._dirty = True`. | |
| 1512 self._lru = collections.OrderedDict() | |
| 1513 self._dirty = False # if True, self._lru was modified since loading/saving. | |
| 1506 # Current cached free disk space. It is updated by self._trim(). | 1514 # Current cached free disk space. It is updated by self._trim(). |
| 1507 self._free_disk = 0 | 1515 self._free_disk = 0 |
| 1508 # The first item in the LRU cache that must not be evicted during this run | 1516 # The first item in the LRU cache that must not be evicted during this run |
| 1509 # since it was referenced. All items more recent that _protected in the LRU | 1517 # since it was referenced. All items more recent that _protected in the LRU |
| 1510 # cache are also inherently protected. It could be a set() of all items | 1518 # cache are also inherently protected. It could be a set() of all items |
| 1511 # referenced but this increases memory usage without a use case. | 1519 # referenced but this increases memory usage without a use case. |
| 1512 self._protected = None | 1520 self._protected = None |
| 1513 # Cleanup operations done by self._load(), if any. | 1521 # Cleanup operations done by self._load(), if any. |
| 1514 self._operations = [] | 1522 self._operations = [] |
| 1523 | |
| 1524 self._free_disk = file_path.get_free_space(self.cache_dir) | |
| 1525 | |
| 1515 with tools.Profiler('Setup'): | 1526 with tools.Profiler('Setup'): |
| 1516 with self._lock: | 1527 with self._lock: |
| 1517 # self._load() calls self._trim() which initializes self._free_disk. | |
| 1518 self._load() | 1528 self._load() |
| 1519 | 1529 |
| 1520 def __contains__(self, digest): | 1530 def __contains__(self, digest): |
| 1521 with self._lock: | 1531 with self._lock: |
| 1522 return digest in self._lru | 1532 return digest in self._lru |
| 1523 | 1533 |
| 1524 def __enter__(self): | 1534 def __enter__(self): |
| 1525 return self | 1535 return self |
| 1526 | 1536 |
| 1527 def __exit__(self, _exc_type, _exec_value, _traceback): | 1537 def __exit__(self, _exc_type, _exec_value, _traceback): |
| 1528 with tools.Profiler('CleanupTrimming'): | 1538 with tools.Profiler('CleanupTrimming'): |
| 1529 with self._lock: | 1539 with self._lock: |
| 1530 self._trim() | 1540 self._trim() |
| 1531 | 1541 |
| 1532 logging.info( | 1542 logging.info( |
| 1533 '%5d (%8dkb) added', | 1543 '%5d (%8dkb) added', |
| 1534 len(self._added), sum(self._added) / 1024) | 1544 len(self._added), sum(self._added) / 1024) |
| 1535 logging.info( | 1545 logging.info( |
| 1536 '%5d (%8dkb) current', | 1546 '%5d (%8dkb) current', |
| 1537 len(self._lru), | 1547 len(self._lru), |
| 1538 sum(self._lru.itervalues()) / 1024) | 1548 self._cache_disk_size() / 1024) |
| 1539 logging.info( | 1549 logging.info( |
| 1540 '%5d (%8dkb) evicted', | 1550 '%5d (%8dkb) evicted', |
| 1541 len(self._evicted), sum(self._evicted) / 1024) | 1551 len(self._evicted), sum(self._evicted) / 1024) |
| 1542 logging.info( | 1552 logging.info( |
| 1543 ' %8dkb free', | 1553 ' %8dkb free', |
| 1544 self._free_disk / 1024) | 1554 self._free_disk / 1024) |
| 1545 return False | 1555 return False |
| 1546 | 1556 |
| 1557 def _sizes(self): | |
| 1558 """Returns an iterator of pairs (digest, size).""" | |
| 1559 return ( | |
| 1560 (digest, size) | |
| 1561 for digest, (size, _) in self._lru.iteritems()) | |
| 1562 | |
| 1547 def cached_set(self): | 1563 def cached_set(self): |
| 1548 with self._lock: | 1564 with self._lock: |
| 1549 return self._lru.keys_set() | 1565 return set(self._lru) |
| 1550 | 1566 |
| 1551 def cleanup(self): | 1567 def cleanup(self): |
| 1552 """Cleans up the cache directory. | 1568 """Cleans up the cache directory. |
| 1553 | 1569 |
| 1554 Ensures there is no unknown files in cache_dir. | 1570 Ensures there is no unknown files in cache_dir. |
| 1555 Ensures the read-only bits are set correctly. | 1571 Ensures the read-only bits are set correctly. |
| 1556 | 1572 |
| 1557 At that point, the cache was already loaded, trimmed to respect cache | 1573 At that point, the cache was already loaded, trimmed to respect cache |
| 1558 policies. | 1574 policies. |
| 1559 """ | 1575 """ |
| 1560 fs.chmod(self.cache_dir, 0700) | 1576 def try_remove(filename): |
| 1561 # Ensure that all files listed in the state still exist and add new ones. | 1577 assert os.path.isabs(filename) |
| 1562 previous = self._lru.keys_set() | |
| 1563 # It'd be faster if there were a readdir() function. | |
| 1564 for filename in fs.listdir(self.cache_dir): | |
| 1565 if filename == self.STATE_FILE: | |
| 1566 fs.chmod(os.path.join(self.cache_dir, filename), 0600) | |
| 1567 continue | |
| 1568 if filename in previous: | |
| 1569 fs.chmod(os.path.join(self.cache_dir, filename), 0400) | |
| 1570 previous.remove(filename) | |
| 1571 continue | |
| 1572 | |
| 1573 # An untracked file. Delete it. | |
| 1574 logging.warning('Removing unknown file %s from cache', filename) | 1578 logging.warning('Removing unknown file %s from cache', filename) |
| 1575 p = self._path(filename) | 1579 if fs.isdir(filename): |
| 1576 if fs.isdir(p): | |
| 1577 try: | 1580 try: |
| 1578 file_path.rmtree(p) | 1581 file_path.rmtree(filename) |
| 1579 except OSError: | 1582 except OSError: |
| 1580 pass | 1583 pass |
| 1581 else: | 1584 else: |
| 1582 file_path.try_remove(p) | 1585 file_path.try_remove(filename) |
| 1583 continue | 1586 |
| 1587 fs.chmod(self.cache_dir, 0700) | |
| 1588 # Ensure that all files listed in the state still exist and add new ones. | |
| 1589 previous = set(self._lru) | |
|
M-A Ruel
2016/10/13 20:51:12
self.cached_set() ? just for the form. :)
| |
| 1590 # It'd be faster if there were a readdir() function. | |
| 1591 for filename_l1 in fs.listdir(self.cache_dir): | |
| 1592 full_name_l1 = os.path.join(self.cache_dir, filename_l1) | |
| 1593 if filename_l1 == self.STATE_FILE: | |
| 1594 fs.chmod(full_name_l1, 0600) | |
| 1595 continue | |
| 1596 if len(filename_l1) != 2: | |
| 1597 try_remove(full_name_l1) | |
| 1598 continue | |
| 1599 for filename_l2 in fs.listdir(full_name_l1): | |
| 1600 digest = filename_l1 + filename_l2 | |
| 1601 full_name_l2 = os.path.join(full_name_l1, filename_l2) | |
| 1602 if digest in previous: | |
| 1603 fs.chmod(full_name_l2, 0400) | |
| 1604 previous.remove(digest) | |
| 1605 else: | |
| 1606 try_remove(full_name_l2) | |
| 1584 | 1607 |
| 1585 if previous: | 1608 if previous: |
| 1586 # Filter out entries that were not found. | 1609 # Filter out entries that were not found. |
| 1587 logging.warning('Removed %d lost files', len(previous)) | 1610 logging.warning('Removed %d lost files', len(previous)) |
| 1588 for filename in previous: | 1611 for digest in previous: |
| 1589 self._lru.pop(filename) | 1612 self._lru.pop(digest) |
| 1613 self._dirty = True | |
| 1590 | 1614 |
| 1591 # What remains to be done is to hash every single item to | 1615 # What remains to be done is to hash every single item to |
| 1592 # detect corruption, then save to ensure state.json is up to date. | 1616 # detect corruption, then save to ensure state.json is up to date. |
| 1593 # Sadly, on a 50Gb cache with 100mib/s I/O, this is still over 8 minutes. | 1617 # Sadly, on a 50Gb cache with 100mib/s I/O, this is still over 8 minutes. |
| 1594 # TODO(maruel): Let's revisit once directory metadata is stored in | 1618 # TODO(maruel): Let's revisit once directory metadata is stored in |
| 1595 # state.json so only the files that had been mapped since the last cleanup() | 1619 # state.json so only the files that had been mapped since the last cleanup() |
| 1596 # call are manually verified. | 1620 # call are manually verified. |
| 1597 # | 1621 # |
| 1598 #with self._lock: | 1622 #with self._lock: |
| 1599 # for digest in self._lru: | 1623 # for digest in self._lru: |
| 1600 # if not isolated_format.is_valid_hash( | 1624 # if not isolated_format.is_valid_hash( |
| 1601 # self._path(digest), self.hash_algo): | 1625 # self._path(digest), self.hash_algo): |
| 1602 # self.evict(digest) | 1626 # self.evict(digest) |
| 1603 # logging.info('Deleted corrupted item: %s', digest) | 1627 # logging.info('Deleted corrupted item: %s', digest) |
| 1604 | 1628 |
| 1605 def touch(self, digest, size): | 1629 def touch(self, digest, size): |
| 1606 """Verifies an actual file is valid. | 1630 """Verifies an actual file is valid. |
| 1607 | 1631 |
| 1608 Note that is doesn't compute the hash so it could still be corrupted if the | 1632 Note that is doesn't compute the hash so it could still be corrupted if the |
| 1609 file size didn't change. | 1633 file size didn't change. |
| 1610 | 1634 |
| 1611 TODO(maruel): More stringent verification while keeping the check fast. | 1635 TODO(maruel): More stringent verification while keeping the check fast. |
| 1612 """ | 1636 """ |
| 1613 # Do the check outside the lock. | 1637 # Do the check outside the lock. |
| 1614 if not is_valid_file(self._path(digest), size): | 1638 if not is_valid_file(self._path(digest), size): |
| 1615 return False | 1639 return False |
| 1616 | 1640 |
| 1617 # Update it's LRU position. | 1641 # Update it's LRU position. |
| 1618 with self._lock: | 1642 with self._lock: |
| 1619 if digest not in self._lru: | 1643 if self._lru.pop(digest, None) is None: |
| 1620 return False | 1644 return False |
| 1621 self._lru.touch(digest) | 1645 self._lru[digest] = [size, self.time_fn()] |
| 1646 self._dirty = True | |
| 1622 self._protected = self._protected or digest | 1647 self._protected = self._protected or digest |
| 1623 return True | 1648 return True |
| 1624 | 1649 |
| 1625 def evict(self, digest): | 1650 def evict(self, digest): |
| 1626 with self._lock: | 1651 with self._lock: |
| 1627 # Do not check for 'digest == self._protected' since it could be because | 1652 # Do not check for 'digest == self._protected' since it could be because |
| 1628 # the object is corrupted. | 1653 # the object is corrupted. |
| 1629 self._lru.pop(digest) | 1654 self._lru.pop(digest, None) |
| 1655 self._dirty = True | |
| 1630 self._delete_file(digest, UNKNOWN_FILE_SIZE) | 1656 self._delete_file(digest, UNKNOWN_FILE_SIZE) |
| 1631 | 1657 |
| 1632 def getfileobj(self, digest): | 1658 def getfileobj(self, digest): |
| 1633 try: | 1659 try: |
| 1634 f = fs.open(self._path(digest), 'rb') | 1660 f = fs.open(self._path(digest), 'rb') |
| 1635 with self._lock: | 1661 with self._lock: |
| 1636 self._used.append(self._lru[digest]) | 1662 self._used.append(self._lru[digest][0]) |
| 1637 return f | 1663 return f |
| 1638 except IOError: | 1664 except IOError: |
| 1639 raise CacheMiss(digest) | 1665 raise CacheMiss(digest) |
| 1640 | 1666 |
| 1641 def write(self, digest, content): | 1667 def write(self, digest, content): |
| 1642 assert content is not None | 1668 assert content is not None |
| 1643 with self._lock: | 1669 with self._lock: |
| 1644 self._protected = self._protected or digest | 1670 self._protected = self._protected or digest |
| 1645 path = self._path(digest) | 1671 path = self._path(digest) |
| 1646 # A stale broken file may remain. It is possible for the file to have write | 1672 # A stale broken file may remain. It is possible for the file to have write |
| (...skipping 11 matching lines...) Expand all Loading... | |
| 1658 file_path.try_remove(path) | 1684 file_path.try_remove(path) |
| 1659 raise | 1685 raise |
| 1660 # Make the file read-only in the cache. This has a few side-effects since | 1686 # Make the file read-only in the cache. This has a few side-effects since |
| 1661 # the file node is modified, so every directory entries to this file becomes | 1687 # the file node is modified, so every directory entries to this file becomes |
| 1662 # read-only. It's fine here because it is a new file. | 1688 # read-only. It's fine here because it is a new file. |
| 1663 file_path.set_read_only(path, True) | 1689 file_path.set_read_only(path, True) |
| 1664 with self._lock: | 1690 with self._lock: |
| 1665 self._add(digest, size) | 1691 self._add(digest, size) |
| 1666 return digest | 1692 return digest |
| 1667 | 1693 |
| 1694 def _cache_disk_size(self): | |
|
M-A Ruel
2016/10/13 20:51:12
can you move it aside cache_set? I feel it'd make
| |
| 1695 """Returns number of bytes that cache files take.""" | |
| 1696 return sum(size for (size, _) in self._lru.itervalues()) | |
| 1697 | |
| 1698 @staticmethod | |
| 1699 def _validate_state(state): | |
| 1700 """Raises ValueError if the given serialized state is invalid.""" | |
| 1701 if not isinstance(state, dict): | |
| 1702 raise ValueError('not a JSON object') | |
| 1703 | |
| 1704 version = state.get('version') | |
| 1705 if not isinstance(version, int): | |
| 1706 raise ValueError('version %r is not an integer' % (version,)) | |
| 1707 | |
| 1708 entries = state.get('entries') | |
| 1709 if not isinstance(entries, list): | |
| 1710 raise ValueError('entries is not an array') | |
| 1711 for entry in entries: | |
| 1712 try: | |
| 1713 if not isinstance(entry, list) or len(entry) != 2: | |
| 1714 raise ValueError('not a pair') | |
| 1715 if not isinstance(entry[0], basestring): | |
| 1716 raise ValueError('first item is not a string') | |
| 1717 | |
| 1718 pair_of_numbers = ( | |
| 1719 isinstance(entry[1], list) and | |
| 1720 len(entry[1]) == 2 and | |
| 1721 all(isinstance(x, (int, float)) for x in entry[1]) | |
| 1722 ) | |
| 1723 if not pair_of_numbers: | |
| 1724 raise ValueError('second item is not a pair of numbers') | |
| 1725 except ValueError as ex: | |
| 1726 raise ValueError('invalid entry %r: %s' % (entry, ex)) | |
| 1727 | |
| 1668 def _load(self): | 1728 def _load(self): |
| 1669 """Loads state of the cache from json file. | 1729 """Loads state of the cache from json file if it exists. |
| 1670 | 1730 |
| 1671 If cache_dir does not exist on disk, it is created. | 1731 If cache_dir does not exist on disk, it is created. |
| 1732 | |
| 1733 Raises ValueError if the state file is broken. | |
| 1672 """ | 1734 """ |
| 1673 self._lock.assert_locked() | 1735 self._lock.assert_locked() |
| 1674 | 1736 |
| 1675 if not fs.isfile(self.state_file): | 1737 if not fs.isfile(self.state_file): |
| 1676 if not os.path.isdir(self.cache_dir): | 1738 if not os.path.isdir(self.cache_dir): |
| 1677 fs.makedirs(self.cache_dir) | 1739 fs.makedirs(self.cache_dir) |
| 1678 else: | 1740 else: |
| 1679 # Load state of the cache. | 1741 # Load state of the cache. |
| 1680 try: | 1742 try: |
| 1681 self._lru = lru.LRUDict.load(self.state_file) | 1743 with open(self.state_file, 'r') as f: |
|
M-A Ruel
2016/10/13 20:51:12
'rb'
| |
| 1682 except ValueError as err: | 1744 state = json.load(f) |
| 1683 logging.error('Failed to load cache state: %s' % (err,)) | 1745 if not isinstance(state, dict): |
| 1684 # Don't want to keep broken state file. | 1746 # Possibly old format. |
|
M-A Ruel
2016/10/13 20:51:12
# Possibly old format. Hard reset with a clear cac
| |
| 1685 file_path.try_remove(self.state_file) | 1747 file_path.rmtree(self.cache_dir) |
| 1748 fs.makedirs(self.cache_dir) | |
| 1749 self._lru = collections.OrderedDict() | |
| 1750 else: | |
| 1751 self._validate_state(state) | |
| 1752 if state['version'] != 2: | |
| 1753 raise ValueError( | |
| 1754 'unsupported version %s, supported is 2' % (state['version'])) | |
| 1755 self._lru = collections.OrderedDict(state['entries']) | |
| 1756 self._dirty = False | |
| 1757 except (IOError, ValueError) as e: | |
| 1758 raise ValueError('Cannot load state file %s: %s' % (self.state_file, e)) | |
| 1759 | |
| 1686 self._trim() | 1760 self._trim() |
| 1687 # We want the initial cache size after trimming, i.e. what is readily | 1761 # We want the initial cache size after trimming, i.e. what is readily |
| 1688 # avaiable. | 1762 # avaiable. |
| 1689 self._initial_number_items = len(self._lru) | 1763 self._initial_number_items = len(self._lru) |
| 1690 self._initial_size = sum(self._lru.itervalues()) | 1764 self._initial_size = self._cache_disk_size() |
| 1691 if self._evicted: | 1765 if self._evicted: |
| 1692 logging.info( | 1766 logging.info( |
| 1693 'Trimming evicted items with the following sizes: %s', | 1767 'Trimming evicted items with the following sizes: %s', |
| 1694 sorted(self._evicted)) | 1768 sorted(self._evicted)) |
| 1695 | 1769 |
| 1696 def _save(self): | 1770 def _save(self): |
| 1697 """Saves the LRU ordering.""" | 1771 """Saves the LRU ordering if changed.""" |
| 1698 self._lock.assert_locked() | 1772 self._lock.assert_locked() |
| 1773 if not self._dirty: | |
| 1774 return | |
| 1775 | |
| 1699 if sys.platform != 'win32': | 1776 if sys.platform != 'win32': |
| 1700 d = os.path.dirname(self.state_file) | 1777 d = os.path.dirname(self.state_file) |
| 1701 if fs.isdir(d): | 1778 if fs.isdir(d): |
| 1702 # Necessary otherwise the file can't be created. | 1779 # Necessary otherwise the file can't be created. |
| 1703 file_path.set_read_only(d, False) | 1780 file_path.set_read_only(d, False) |
| 1781 | |
| 1704 if fs.isfile(self.state_file): | 1782 if fs.isfile(self.state_file): |
| 1705 file_path.set_read_only(self.state_file, False) | 1783 file_path.set_read_only(self.state_file, False) |
| 1706 self._lru.save(self.state_file) | 1784 with open(self.state_file, 'w') as f: |
|
M-A Ruel
2016/10/13 20:51:12
'wb'
| |
| 1785 contents = { | |
| 1786 'version': 2, | |
| 1787 'entries': self._lru.items(), | |
| 1788 } | |
| 1789 json.dump(contents, f) | |
|
M-A Ruel
2016/10/13 20:51:12
separators=(',',':')
will save a few valuable byt
| |
| 1790 self._dirty = False | |
| 1707 | 1791 |
| 1708 def _trim(self): | 1792 def _trim(self): |
| 1709 """Trims anything we don't know, make sure enough free space exists.""" | 1793 """Trims anything we don't know, make sure enough free space exists.""" |
| 1710 self._lock.assert_locked() | 1794 self._lock.assert_locked() |
| 1711 | 1795 |
| 1712 # Ensure maximum cache size. | 1796 # Ensure maximum cache size. |
| 1713 if self.policies.max_cache_size: | 1797 if self.policies.max_cache_size: |
| 1714 total_size = sum(self._lru.itervalues()) | 1798 total_size = self._cache_disk_size() |
| 1715 while total_size > self.policies.max_cache_size: | 1799 while total_size > self.policies.max_cache_size: |
| 1716 total_size -= self._remove_lru_file(True) | 1800 total_size -= self._remove_lru_file(True) |
| 1717 | 1801 |
| 1718 # Ensure maximum number of items in the cache. | 1802 # Ensure maximum number of items in the cache. |
| 1719 if self.policies.max_items and len(self._lru) > self.policies.max_items: | 1803 if self.policies.max_items and len(self._lru) > self.policies.max_items: |
| 1720 for _ in xrange(len(self._lru) - self.policies.max_items): | 1804 for _ in xrange(len(self._lru) - self.policies.max_items): |
| 1721 self._remove_lru_file(True) | 1805 self._remove_lru_file(True) |
| 1722 | 1806 |
| 1723 # Ensure enough free space. | 1807 # Ensure enough free space. |
| 1724 self._free_disk = file_path.get_free_space(self.cache_dir) | 1808 self._free_disk = file_path.get_free_space(self.cache_dir) |
| 1725 trimmed_due_to_space = 0 | 1809 trimmed_due_to_space = 0 |
| 1726 while ( | 1810 while ( |
| 1727 self.policies.min_free_space and | 1811 self.policies.min_free_space and |
| 1728 self._lru and | 1812 self._lru and |
| 1729 self._free_disk < self.policies.min_free_space): | 1813 self._free_disk < self.policies.min_free_space): |
| 1730 trimmed_due_to_space += 1 | 1814 trimmed_due_to_space += 1 |
| 1731 self._remove_lru_file(True) | 1815 self._remove_lru_file(True) |
| 1732 | 1816 |
| 1733 if trimmed_due_to_space: | 1817 if trimmed_due_to_space: |
| 1734 total_usage = sum(self._lru.itervalues()) | 1818 total_usage = self._cache_disk_size() |
| 1735 usage_percent = 0. | 1819 usage_percent = 0. |
| 1736 if total_usage: | 1820 if total_usage: |
| 1737 usage_percent = 100. * float(total_usage) / self.policies.max_cache_size | 1821 usage_percent = 100. * float(total_usage) / self.policies.max_cache_size |
| 1738 | 1822 |
| 1739 logging.warning( | 1823 logging.warning( |
| 1740 'Trimmed %s file(s) due to not enough free disk space: %.1fkb free,' | 1824 'Trimmed %s file(s) due to not enough free disk space: %.1fkb free,' |
| 1741 ' %.1fkb cache (%.1f%% of its maximum capacity of %.1fkb)', | 1825 ' %.1fkb cache (%.1f%% of its maximum capacity of %.1fkb)', |
| 1742 trimmed_due_to_space, | 1826 trimmed_due_to_space, |
| 1743 self._free_disk / 1024., | 1827 self._free_disk / 1024., |
| 1744 total_usage / 1024., | 1828 total_usage / 1024., |
| 1745 usage_percent, | 1829 usage_percent, |
| 1746 self.policies.max_cache_size / 1024.) | 1830 self.policies.max_cache_size / 1024.) |
| 1747 self._save() | 1831 self._save() |
| 1748 | 1832 |
| 1749 def _path(self, digest): | 1833 def _path(self, digest): |
| 1750 """Returns the path to one item.""" | 1834 """Returns the path to one item.""" |
| 1751 return os.path.join(self.cache_dir, digest) | 1835 assert len(digest) > 2 |
| 1836 return os.path.join(self.cache_dir, digest[:2], digest[2:]) | |
| 1752 | 1837 |
| 1753 def _remove_lru_file(self, allow_protected): | 1838 def _remove_lru_file(self, allow_protected): |
| 1754 """Removes the lastest recently used file and returns its size.""" | 1839 """Removes the lastest recently used file and returns its size.""" |
|
M-A Ruel
2016/10/13 20:51:12
Probably a typo of mine; latest ?
| |
| 1755 self._lock.assert_locked() | 1840 self._lock.assert_locked() |
| 1756 try: | 1841 |
| 1757 digest, size = self._lru.get_oldest() | 1842 digest = None |
| 1758 if not allow_protected and digest == self._protected: | 1843 for key in self._lru: |
| 1759 raise Error('Not enough space to map the whole isolated tree') | 1844 digest = key |
| 1760 except KeyError: | 1845 break |
| 1846 if digest is None: | |
| 1761 raise Error('Nothing to remove') | 1847 raise Error('Nothing to remove') |
| 1762 digest, size = self._lru.pop_oldest() | 1848 |
| 1849 if not allow_protected and digest == self._protected: | |
| 1850 raise Error('Not enough space to map the whole isolated tree') | |
| 1851 | |
| 1852 digest, (size, _) = self._lru.popitem(last=False) | |
| 1853 self._dirty = True | |
| 1763 logging.debug("Removing LRU file %s", digest) | 1854 logging.debug("Removing LRU file %s", digest) |
| 1764 self._delete_file(digest, size) | 1855 self._delete_file(digest, size) |
| 1765 return size | 1856 return size |
| 1766 | 1857 |
| 1767 def _add(self, digest, size=UNKNOWN_FILE_SIZE): | 1858 def _add(self, digest, size=UNKNOWN_FILE_SIZE): |
| 1768 """Adds an item into LRU cache marking it as a newest one.""" | 1859 """Adds an item into LRU cache marking it as a newest one. |
| 1860 | |
| 1861 Asumes the file exists. | |
|
M-A Ruel
2016/10/13 20:51:12
Assumes
| |
| 1862 """ | |
| 1769 self._lock.assert_locked() | 1863 self._lock.assert_locked() |
| 1770 if size == UNKNOWN_FILE_SIZE: | 1864 if size == UNKNOWN_FILE_SIZE: |
| 1771 size = fs.stat(self._path(digest)).st_size | 1865 size = fs.stat(self._path(digest)).st_size |
| 1772 self._added.append(size) | 1866 self._added.append(size) |
| 1773 self._lru.add(digest, size) | 1867 self._lru.pop(digest, None) |
| 1868 self._lru[digest] = [size, self.time_fn()] | |
|
M-A Ruel
2016/10/13 20:51:12
self._lru[digest] = (size, self.time_fn())
it's s
| |
| 1869 self._dirty = True | |
| 1774 self._free_disk -= size | 1870 self._free_disk -= size |
| 1775 # Do a quicker version of self._trim(). It only enforces free disk space, | 1871 # Do a quicker version of self._trim(). It only enforces free disk space, |
| 1776 # not cache size limits. It doesn't actually look at real free disk space, | 1872 # not cache size limits. It doesn't actually look at real free disk space, |
| 1777 # only uses its cache values. self._trim() will be called later to enforce | 1873 # only uses its cache values. self._trim() will be called later to enforce |
| 1778 # real trimming but doing this quick version here makes it possible to map | 1874 # real trimming but doing this quick version here makes it possible to map |
| 1779 # an isolated that is larger than the current amount of free disk space when | 1875 # an isolated that is larger than the current amount of free disk space when |
| 1780 # the cache size is already large. | 1876 # the cache size is already large. |
| 1781 while ( | 1877 while ( |
| 1782 self.policies.min_free_space and | 1878 self.policies.min_free_space and |
| 1783 self._lru and | 1879 self._lru and |
| 1784 self._free_disk < self.policies.min_free_space): | 1880 self._free_disk < self.policies.min_free_space): |
| 1785 self._remove_lru_file(False) | 1881 self._remove_lru_file(False) |
| 1786 | 1882 |
| 1787 def _delete_file(self, digest, size=UNKNOWN_FILE_SIZE): | 1883 def _delete_file(self, digest, size=UNKNOWN_FILE_SIZE): |
| 1788 """Deletes cache file from the file system.""" | 1884 """Deletes cache file from the file system.""" |
| 1789 self._lock.assert_locked() | 1885 self._lock.assert_locked() |
| 1790 try: | 1886 try: |
| 1791 if size == UNKNOWN_FILE_SIZE: | 1887 if size == UNKNOWN_FILE_SIZE: |
| 1792 size = fs.stat(self._path(digest)).st_size | 1888 size = fs.stat(self._path(digest)).st_size |
| 1793 file_path.try_remove(self._path(digest)) | 1889 path = self._path(digest) |
| 1890 file_path.try_remove(path) | |
| 1794 self._evicted.append(size) | 1891 self._evicted.append(size) |
| 1795 self._free_disk += size | 1892 self._free_disk += size |
| 1893 | |
| 1894 parent = os.path.dirname(path) | |
| 1895 if len(os.listdir(parent)) == 0: | |
|
M-A Ruel
2016/10/13 20:51:12
I don't think it should be done. It's better to le
| |
| 1896 fs.rmtree(parent) | |
|
M-A Ruel
2016/10/13 20:51:12
I don't see a corresponding call to mkdir() to cre
| |
| 1796 except OSError as e: | 1897 except OSError as e: |
| 1797 logging.error('Error attempting to delete a file %s:\n%s' % (digest, e)) | 1898 logging.error('Error attempting to delete a file %s:\n%s' % (digest, e)) |
| 1798 | 1899 |
| 1799 | 1900 |
| 1800 class IsolatedBundle(object): | 1901 class IsolatedBundle(object): |
| 1801 """Fetched and parsed .isolated file with all dependencies.""" | 1902 """Fetched and parsed .isolated file with all dependencies.""" |
| 1802 | 1903 |
| 1803 def __init__(self): | 1904 def __init__(self): |
| 1804 self.command = [] | 1905 self.command = [] |
| 1805 self.files = {} | 1906 self.files = {} |
| (...skipping 625 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2431 return dispatcher.execute(OptionParserIsolateServer(), args) | 2532 return dispatcher.execute(OptionParserIsolateServer(), args) |
| 2432 | 2533 |
| 2433 | 2534 |
| 2434 if __name__ == '__main__': | 2535 if __name__ == '__main__': |
| 2435 subprocess42.inhibit_os_error_reporting() | 2536 subprocess42.inhibit_os_error_reporting() |
| 2436 fix_encoding.fix_encoding() | 2537 fix_encoding.fix_encoding() |
| 2437 tools.disable_buffering() | 2538 tools.disable_buffering() |
| 2438 colorama.init() | 2539 colorama.init() |
| 2439 file_path.enable_symlink() | 2540 file_path.enable_symlink() |
| 2440 sys.exit(main(sys.argv[1:])) | 2541 sys.exit(main(sys.argv[1:])) |
| OLD | NEW |