| OLD | NEW |
| 1 # Copyright 2013 The LUCI Authors. All rights reserved. | 1 # Copyright 2013 The LUCI Authors. All rights reserved. |
| 2 # Use of this source code is governed under the Apache License, Version 2.0 | 2 # Use of this source code is governed under the Apache License, Version 2.0 |
| 3 # that can be found in the LICENSE file. | 3 # that can be found in the LICENSE file. |
| 4 | 4 |
| 5 """Defines a dictionary that can evict least recently used items.""" | 5 """Defines a dictionary that can evict least recently used items.""" |
| 6 | 6 |
| 7 import collections | 7 import collections |
| 8 import json | 8 import json |
| 9 import time |
| 9 | 10 |
| 10 | 11 |
| 11 class LRUDict(object): | 12 class LRUDict(object): |
| 12 """Dictionary that can evict least recently used items. | 13 """Dictionary that can evict least recently used items. |
| 13 | 14 |
| 14 Implemented as a wrapper around OrderedDict object. An OrderedDict stores | 15 Implemented as a wrapper around OrderedDict object. An OrderedDict stores |
| 15 (key, value) pairs in order they are inserted and can effectively pop oldest | 16 (key, (value, timestamp)) pairs in order they are |
| 16 items. | 17 inserted and can effectively pop oldest items. |
| 17 | 18 |
| 18 Can also store its state as *.json file on disk. | 19 Can also store its state as *.json file on disk. |
| 19 """ | 20 """ |
| 20 | 21 |
| 22 # Used to determine current timestamp. |
| 23 # Can be substituted in individual LRUDict instances. |
| 24 time_fn = time.time |
| 25 |
| 21 def __init__(self): | 26 def __init__(self): |
| 22 # Ordered key -> value mapping, newest items at the bottom. | 27 # Ordered key -> (value, timestamp) mapping, |
| 28 # newest items at the bottom. |
| 23 self._items = collections.OrderedDict() | 29 self._items = collections.OrderedDict() |
| 24 # True if was modified after loading. | 30 # True if was modified after loading. |
| 25 self._dirty = True | 31 self._dirty = True |
| 26 | 32 |
| 27 def __nonzero__(self): | 33 def __nonzero__(self): |
| 28 """False if dict is empty.""" | 34 """False if dict is empty.""" |
| 29 return bool(self._items) | 35 return bool(self._items) |
| 30 | 36 |
| 31 def __iter__(self): | 37 def __iter__(self): |
| 32 """Iterate over the keys.""" | 38 """Iterate over the keys.""" |
| 33 return self._items.__iter__() | 39 return self._items.__iter__() |
| 34 | 40 |
| 35 def __len__(self): | 41 def __len__(self): |
| 36 """Number of items in the dict.""" | 42 """Number of items in the dict.""" |
| 37 return len(self._items) | 43 return len(self._items) |
| 38 | 44 |
| 39 def __contains__(self, key): | 45 def __contains__(self, key): |
| 40 """True if |key| is in the dict.""" | 46 """True if |key| is in the dict.""" |
| 41 return key in self._items | 47 return key in self._items |
| 42 | 48 |
| 43 def __getitem__(self, key): | 49 def __getitem__(self, key): |
| 44 """Returns value for |key| or raises KeyError if not found.""" | 50 """Returns value for |key| or raises KeyError if not found.""" |
| 45 return self._items[key] | 51 return self._items[key][0] |
| 46 | 52 |
| 47 @classmethod | 53 @classmethod |
| 48 def load(cls, state_file): | 54 def load(cls, state_file): |
| 49 """Loads previously saved state and returns LRUDict in that state. | 55 """Loads previously saved state and returns LRUDict in that state. |
| 50 | 56 |
| 51 Raises ValueError if state file is corrupted. | 57 Raises ValueError if state file is corrupted. |
| 52 """ | 58 """ |
| 53 try: | 59 try: |
| 54 state = json.load(open(state_file, 'r')) | 60 state = json.load(open(state_file, 'r')) |
| 55 except (IOError, ValueError) as e: | 61 except (IOError, ValueError) as e: |
| 56 raise ValueError('Broken state file %s: %s' % (state_file, e)) | 62 raise ValueError('Broken state file %s: %s' % (state_file, e)) |
| 57 | 63 |
| 58 if not isinstance(state, list): | 64 if isinstance(state, list): # Old format. |
| 65 # Items are stored oldest to newest. Put them back in the same order. |
| 66 lru = cls() |
| 67 for pair in state: |
| 68 if not isinstance(pair, (list, tuple)) or len(pair) != 2: |
| 69 raise ValueError( |
| 70 'Broken state file %s, expecting pairs: %s' % (state_file, pair)) |
| 71 lru._items[pair[0]] = (pair[1], 0) |
| 72 |
| 73 # Check for duplicate keys. |
| 74 if len(lru) != len(state): |
| 75 raise ValueError( |
| 76 'Broken state file %s, found duplicate keys' % (state_file,)) |
| 77 |
| 78 elif isinstance(state, dict): # New format. |
| 79 state_ver = state.get('version') |
| 80 if not isinstance(state_ver, int): |
| 81 raise ValueError( |
| 82 'Broken state file %s, version %r is not an integer' % ( |
| 83 state_file, state_ver)) |
| 84 if state_ver > 2: |
| 85 raise ValueError( |
| 86 'Unsupported state file %s, version is %d. ' |
| 87 'Latest supported is 2' % (state_file, state_ver)) |
| 88 state_items = state.get('items') |
| 89 if not isinstance(state_items, list): |
| 90 raise ValueError( |
| 91 'Broken state file %s, items should be json list' % (state_file,)) |
| 92 lru = cls() |
| 93 # Items are stored oldest to newest. Put them back in the same order. |
| 94 for item in state_items: |
| 95 if not isinstance(item, (list, tuple)) or len(item) != 2: |
| 96 raise ValueError( |
| 97 'Broken state file %s, expecting pairs: %s' % (state_file, item)) |
| 98 if not isinstance(item[1], (list, tuple)) or len(item[1]) != 2: |
| 99 raise ValueError( |
| 100 'Broken state file %s, expecting second item to be a item: %s' % ( |
| 101 state_file, item)) |
| 102 if not isinstance(item[1][1], (int, float)): |
| 103 raise ValueError( |
| 104 'Broken state file %s, expecting second item of the second item ' |
| 105 'to be a number: %s' % (state_file, item)) |
| 106 lru._items[item[0]] = item[1] |
| 107 |
| 108 # Check for duplicate keys. |
| 109 if len(lru) != len(state_items): |
| 110 raise ValueError( |
| 111 'Broken state file %s, found duplicate keys' % (state_file,)) |
| 112 |
| 113 else: |
| 59 raise ValueError( | 114 raise ValueError( |
| 60 'Broken state file %s, should be json list' % (state_file,)) | 115 'Broken state file %s, should be json object or list' % (state_file,)) |
| 61 | |
| 62 # Items are stored oldest to newest. Put them back in the same order. | |
| 63 lru = cls() | |
| 64 for pair in state: | |
| 65 if not isinstance(pair, (list, tuple)) or len(pair) != 2: | |
| 66 raise ValueError( | |
| 67 'Broken state file %s, expecting pairs: %s' % (state_file, pair)) | |
| 68 lru.add(pair[0], pair[1]) | |
| 69 | |
| 70 # Check for duplicate keys. | |
| 71 if len(lru) != len(state): | |
| 72 raise ValueError( | |
| 73 'Broken state file %s, found duplicate keys' % (state_file,)) | |
| 74 | 116 |
| 75 # Now state from the file corresponds to state in the memory. | 117 # Now state from the file corresponds to state in the memory. |
| 76 lru._dirty = False | 118 lru._dirty = False |
| 77 return lru | 119 return lru |
| 78 | 120 |
| 79 def save(self, state_file): | 121 def save(self, state_file): |
| 80 """Saves cache state to a file if it was modified.""" | 122 """Saves cache state to a file if it was modified.""" |
| 81 if not self._dirty: | 123 if not self._dirty: |
| 82 return False | 124 return False |
| 83 | 125 |
| 84 with open(state_file, 'wb') as f: | 126 with open(state_file, 'wb') as f: |
| 85 json.dump(self._items.items(), f, separators=(',',':')) | 127 contents = { |
| 128 'version': 2, |
| 129 'items': self._items.items(), |
| 130 } |
| 131 json.dump(contents, f, separators=(',',':')) |
| 86 | 132 |
| 87 self._dirty = False | 133 self._dirty = False |
| 88 return True | 134 return True |
| 89 | 135 |
| 90 def add(self, key, value): | 136 def add(self, key, value): |
| 91 """Adds or replaces a |value| for |key|, marks it as most recently used.""" | 137 """Adds or replaces a |value| for |key|, marks it as most recently used.""" |
| 92 self._items.pop(key, None) | 138 self._items.pop(key, None) |
| 93 self._items[key] = value | 139 self._items[key] = (value, self.time_fn()) |
| 94 self._dirty = True | |
| 95 | |
| 96 def batch_insert_oldest(self, items): | |
| 97 """Prepends list of |items| to the dict, marks them as least recently used. | |
| 98 | |
| 99 |items| is a list of (key, value) pairs to add. | |
| 100 | |
| 101 It's a very slow operation that completely rebuilds the dictionary. | |
| 102 """ | |
| 103 new_items = collections.OrderedDict() | |
| 104 | |
| 105 # Insert |items| first, so they became oldest. | |
| 106 for key, value in items: | |
| 107 new_items[key] = value | |
| 108 | |
| 109 # Insert the rest, be careful not to override keys from |items|. | |
| 110 for key, value in self._items.iteritems(): | |
| 111 if key not in new_items: | |
| 112 new_items[key] = value | |
| 113 | |
| 114 self._items = new_items | |
| 115 self._dirty = True | 140 self._dirty = True |
| 116 | 141 |
| 117 def keys_set(self): | 142 def keys_set(self): |
| 118 """Set of keys of items in this dict.""" | 143 """Set of keys of items in this dict.""" |
| 119 return set(self._items) | 144 return set(self._items) |
| 120 | 145 |
| 121 def get(self, key, default=None): | 146 def get(self, key, default=None): |
| 122 """Returns value for |key| or |default| if not found.""" | 147 """Returns value for |key| or |default| if not found.""" |
| 123 return self._items.get(key, default) | 148 item = self._items.get(key) |
| 149 return item[0] if item is not None else default |
| 124 | 150 |
| 125 def touch(self, key): | 151 def touch(self, key): |
| 126 """Marks |key| as most recently used. | 152 """Marks |key| as most recently used. |
| 127 | 153 |
| 128 Raises KeyError if |key| is not in the dict. | 154 Raises KeyError if |key| is not in the dict. |
| 129 """ | 155 """ |
| 130 self._items[key] = self._items.pop(key) | 156 self._items[key] = (self._items.pop(key)[0], self.time_fn()) |
| 131 self._dirty = True | 157 self._dirty = True |
| 132 | 158 |
| 133 def pop(self, key): | 159 def pop(self, key): |
| 134 """Removes item from the dict, returns its value. | 160 """Removes item from the dict, returns its value. |
| 135 | 161 |
| 136 Raises KeyError if |key| is not in the dict. | 162 Raises KeyError if |key| is not in the dict. |
| 137 """ | 163 """ |
| 138 value = self._items.pop(key) | 164 item = self._items.pop(key) |
| 139 self._dirty = True | 165 self._dirty = True |
| 140 return value | 166 return item[0] |
| 141 | 167 |
| 142 def get_oldest(self): | 168 def get_oldest(self): |
| 143 """Returns oldest item as tuple (key, value). | 169 """Returns oldest item as tuple (key, (value, timestamp)). |
| 144 | 170 |
| 145 Raises KeyError if dict is empty. | 171 Raises KeyError if dict is empty. |
| 146 """ | 172 """ |
| 147 for i in self._items.iteritems(): | 173 for item in self._items.iteritems(): |
| 148 return i | 174 return item |
| 149 raise KeyError('dictionary is empty') | 175 raise KeyError('dictionary is empty') |
| 150 | 176 |
| 151 def pop_oldest(self): | 177 def pop_oldest(self): |
| 152 """Removes oldest item from the dict and returns it as tuple (key, value). | 178 """Removes oldest item and returns it as (key, (value, timestamp)). |
| 153 | 179 |
| 154 Raises KeyError if dict is empty. | 180 Raises KeyError if dict is empty. |
| 155 """ | 181 """ |
| 156 pair = self._items.popitem(last=False) | 182 item = self._items.popitem(last=False) |
| 157 self._dirty = True | 183 self._dirty = True |
| 158 return pair | 184 return item |
| 159 | 185 |
| 160 def itervalues(self): | 186 def itervalues(self): |
| 161 """Iterator over stored values in arbitrary order.""" | 187 """Iterator over stored values in arbitrary order.""" |
| 162 return self._items.itervalues() | 188 for val, _ in self._items.itervalues(): |
| 189 yield val |
| OLD | NEW |