OLD | NEW |
(Empty) | |
| 1 |
| 2 # Cache implementaion with a Least Recently Used (LRU) replacement policy and |
| 3 # a basic dictionary interface. |
| 4 |
| 5 # Copyright (C) 2006, 2009, 2010, 2011 Jay Hutchinson |
| 6 |
| 7 # This program is free software; you can redistribute it and/or modify it |
| 8 # under the terms of the GNU General Public License as published by the Free |
| 9 # Software Foundation; either version 2 of the License, or (at your option) |
| 10 # any later version. |
| 11 |
| 12 # This program is distributed in the hope that it will be useful, but WITHOUT |
| 13 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| 14 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
| 15 # more details. |
| 16 |
| 17 # You should have received a copy of the GNU General Public License along |
| 18 # with this program; if not, write to the Free Software Foundation, Inc., 51 |
| 19 # Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
| 20 |
| 21 |
| 22 |
| 23 # The cache is implemented using a combination of a python dictionary (hash |
| 24 # table) and a circular doubly linked list. Items in the cache are stored in |
| 25 # nodes. These nodes make up the linked list. The list is used to efficiently |
| 26 # maintain the order that the items have been used in. The front or head of |
| 27 # the list contains the most recently used item, the tail of the list |
| 28 # contains the least recently used item. When an item is used it can easily |
| 29 # (in a constant amount of time) be moved to the front of the list, thus |
| 30 # updating its position in the ordering. These nodes are also placed in the |
| 31 # hash table under their associated key. The hash table allows efficient |
| 32 # lookup of values by key. |
| 33 |
| 34 # Class for the node objects. |
| 35 class _dlnode(object): |
| 36 def __init__(self): |
| 37 self.empty = True |
| 38 |
| 39 |
| 40 class lrucache(object): |
| 41 |
| 42 def __init__(self, size, callback=None): |
| 43 |
| 44 self.callback = callback |
| 45 |
| 46 # Create an empty hash table. |
| 47 self.table = {} |
| 48 |
| 49 # Initialize the doubly linked list with one empty node. This is an |
| 50 # invariant. The cache size must always be greater than zero. Each |
| 51 # node has a 'prev' and 'next' variable to hold the node that comes |
| 52 # before it and after it respectively. Initially the two variables |
| 53 # each point to the head node itself, creating a circular doubly |
| 54 # linked list of size one. Then the size() method is used to adjust |
| 55 # the list to the desired size. |
| 56 |
| 57 self.head = _dlnode() |
| 58 self.head.next = self.head |
| 59 self.head.prev = self.head |
| 60 |
| 61 self.listSize = 1 |
| 62 |
| 63 # Adjust the size |
| 64 self.size(size) |
| 65 |
| 66 |
| 67 def __len__(self): |
| 68 return len(self.table) |
| 69 |
| 70 def clear(self): |
| 71 for node in self.dli(): |
| 72 node.empty = True |
| 73 node.key = None |
| 74 node.value = None |
| 75 |
| 76 self.table.clear() |
| 77 |
| 78 |
| 79 def __contains__(self, key): |
| 80 return key in self.table |
| 81 |
| 82 # Looks up a value in the cache without affecting cache order. |
| 83 def peek(self, key): |
| 84 # Look up the node |
| 85 node = self.table[key] |
| 86 return node.value |
| 87 |
| 88 |
| 89 def __getitem__(self, key): |
| 90 # Look up the node |
| 91 node = self.table[key] |
| 92 |
| 93 # Update the list ordering. Move this node so that is directly |
| 94 # proceeds the head node. Then set the 'head' variable to it. This |
| 95 # makes it the new head of the list. |
| 96 self.mtf(node) |
| 97 self.head = node |
| 98 |
| 99 # Return the value. |
| 100 return node.value |
| 101 |
| 102 def get(self, key, default=None): |
| 103 """Get an item - return default (None) if not present""" |
| 104 try: |
| 105 return self[key] |
| 106 except KeyError: |
| 107 return default |
| 108 |
| 109 def __setitem__(self, key, value): |
| 110 # First, see if any value is stored under 'key' in the cache already. |
| 111 # If so we are going to replace that value with the new one. |
| 112 if key in self.table: |
| 113 |
| 114 # Lookup the node |
| 115 node = self.table[key] |
| 116 |
| 117 # Replace the value. |
| 118 node.value = value |
| 119 |
| 120 # Update the list ordering. |
| 121 self.mtf(node) |
| 122 self.head = node |
| 123 |
| 124 return |
| 125 |
| 126 # Ok, no value is currently stored under 'key' in the cache. We need |
| 127 # to choose a node to place the new item in. There are two cases. If |
| 128 # the cache is full some item will have to be pushed out of the |
| 129 # cache. We want to choose the node with the least recently used |
| 130 # item. This is the node at the tail of the list. If the cache is not |
| 131 # full we want to choose a node that is empty. Because of the way the |
| 132 # list is managed, the empty nodes are always together at the tail |
| 133 # end of the list. Thus, in either case, by chooseing the node at the |
| 134 # tail of the list our conditions are satisfied. |
| 135 |
| 136 # Since the list is circular, the tail node directly preceeds the |
| 137 # 'head' node. |
| 138 node = self.head.prev |
| 139 |
| 140 # If the node already contains something we need to remove the old |
| 141 # key from the dictionary. |
| 142 if not node.empty: |
| 143 if self.callback is not None: |
| 144 self.callback(node.key, node.value) |
| 145 del self.table[node.key] |
| 146 |
| 147 # Place the new key and value in the node |
| 148 node.empty = False |
| 149 node.key = key |
| 150 node.value = value |
| 151 |
| 152 # Add the node to the dictionary under the new key. |
| 153 self.table[key] = node |
| 154 |
| 155 # We need to move the node to the head of the list. The node is the |
| 156 # tail node, so it directly preceeds the head node due to the list |
| 157 # being circular. Therefore, the ordering is already correct, we just |
| 158 # need to adjust the 'head' variable. |
| 159 self.head = node |
| 160 |
| 161 |
| 162 def __delitem__(self, key): |
| 163 |
| 164 # Lookup the node, then remove it from the hash table. |
| 165 node = self.table[key] |
| 166 del self.table[key] |
| 167 |
| 168 node.empty = True |
| 169 |
| 170 # Not strictly necessary. |
| 171 node.key = None |
| 172 node.value = None |
| 173 |
| 174 # Because this node is now empty we want to reuse it before any |
| 175 # non-empty node. To do that we want to move it to the tail of the |
| 176 # list. We move it so that it directly preceeds the 'head' node. This |
| 177 # makes it the tail node. The 'head' is then adjusted. This |
| 178 # adjustment ensures correctness even for the case where the 'node' |
| 179 # is the 'head' node. |
| 180 self.mtf(node) |
| 181 self.head = node.next |
| 182 |
| 183 def __iter__(self): |
| 184 |
| 185 # Return an iterator that returns the keys in the cache in order from |
| 186 # the most recently to least recently used. Does not modify the cache |
| 187 # order. |
| 188 for node in self.dli(): |
| 189 yield node.key |
| 190 |
| 191 def items(self): |
| 192 |
| 193 # Return an iterator that returns the (key, value) pairs in the cache |
| 194 # in order from the most recently to least recently used. Does not |
| 195 # modify the cache order. |
| 196 for node in self.dli(): |
| 197 yield (node.key, node.value) |
| 198 |
| 199 def keys(self): |
| 200 |
| 201 # Return an iterator that returns the keys in the cache in order from |
| 202 # the most recently to least recently used. Does not modify the cache |
| 203 # order. |
| 204 for node in self.dli(): |
| 205 yield node.key |
| 206 |
| 207 def values(self): |
| 208 |
| 209 # Return an iterator that returns the values in the cache in order |
| 210 # from the most recently to least recently used. Does not modify the |
| 211 # cache order. |
| 212 for node in self.dli(): |
| 213 yield node.value |
| 214 |
| 215 def size(self, size=None): |
| 216 |
| 217 if size is not None: |
| 218 assert size > 0 |
| 219 if size > self.listSize: |
| 220 self.addTailNode(size - self.listSize) |
| 221 elif size < self.listSize: |
| 222 self.removeTailNode(self.listSize - size) |
| 223 |
| 224 return self.listSize |
| 225 |
| 226 # Increases the size of the cache by inserting n empty nodes at the tail |
| 227 # of the list. |
| 228 def addTailNode(self, n): |
| 229 for i in range(n): |
| 230 node = _dlnode() |
| 231 node.next = self.head |
| 232 node.prev = self.head.prev |
| 233 |
| 234 self.head.prev.next = node |
| 235 self.head.prev = node |
| 236 |
| 237 self.listSize += n |
| 238 |
| 239 # Decreases the size of the list by removing n nodes from the tail of the |
| 240 # list. |
| 241 def removeTailNode(self, n): |
| 242 assert self.listSize > n |
| 243 for i in range(n): |
| 244 node = self.head.prev |
| 245 if not node.empty: |
| 246 if self.callback is not None: |
| 247 self.callback(node.key, node.value) |
| 248 del self.table[node.key] |
| 249 |
| 250 # Splice the tail node out of the list |
| 251 self.head.prev = node.prev |
| 252 node.prev.next = self.head |
| 253 |
| 254 # The next four lines are not strictly necessary. |
| 255 node.prev = None |
| 256 node.next = None |
| 257 |
| 258 node.key = None |
| 259 node.value = None |
| 260 |
| 261 self.listSize -= n |
| 262 |
| 263 |
| 264 # This method adjusts the ordering of the doubly linked list so that |
| 265 # 'node' directly precedes the 'head' node. Because of the order of |
| 266 # operations, if 'node' already directly precedes the 'head' node or if |
| 267 # 'node' is the 'head' node the order of the list will be unchanged. |
| 268 def mtf(self, node): |
| 269 node.prev.next = node.next |
| 270 node.next.prev = node.prev |
| 271 |
| 272 node.prev = self.head.prev |
| 273 node.next = self.head.prev.next |
| 274 |
| 275 node.next.prev = node |
| 276 node.prev.next = node |
| 277 |
| 278 # This method returns an iterator that iterates over the non-empty nodes |
| 279 # in the doubly linked list in order from the most recently to the least |
| 280 # recently used. |
| 281 def dli(self): |
| 282 node = self.head |
| 283 for i in range(len(self.table)): |
| 284 yield node |
| 285 node = node.next |
| 286 |
| 287 |
| 288 |
| 289 |
| 290 class WriteThroughCacheManager(object): |
| 291 def __init__(self, store, size): |
| 292 self.store = store |
| 293 self.cache = lrucache(size) |
| 294 |
| 295 def __len__(self): |
| 296 return len(self.store) |
| 297 |
| 298 # Returns/sets the size of the managed cache. |
| 299 def size(self, size=None): |
| 300 return self.cache.size(size) |
| 301 |
| 302 def clear(self): |
| 303 self.cache.clear() |
| 304 self.store.clear() |
| 305 |
| 306 def __contains__(self, key): |
| 307 # Check the cache first. If it is there we can return quickly. |
| 308 if key in self.cache: |
| 309 return True |
| 310 |
| 311 # Not in the cache. Might be in the underlying store. |
| 312 if key in self.store: |
| 313 return True |
| 314 |
| 315 return False |
| 316 |
| 317 def __getitem__(self, key): |
| 318 # First we try the cache. If successful we just return the value. If |
| 319 # not we catch KeyError and ignore it since that just means the key |
| 320 # was not in the cache. |
| 321 try: |
| 322 return self.cache[key] |
| 323 except KeyError: |
| 324 pass |
| 325 |
| 326 # It wasn't in the cache. Look it up in the store, add the entry to |
| 327 # the cache, and return the value. |
| 328 value = self.store[key] |
| 329 self.cache[key] = value |
| 330 return value |
| 331 |
| 332 def get(self, key, default=None): |
| 333 """Get an item - return default (None) if not present""" |
| 334 try: |
| 335 return self[key] |
| 336 except KeyError: |
| 337 return default |
| 338 |
| 339 def __setitem__(self, key, value): |
| 340 # Add the key/value pair to the cache and store. |
| 341 self.cache[key] = value |
| 342 self.store[key] = value |
| 343 |
| 344 def __delitem__(self, key): |
| 345 # Write-through behavior cache and store should be consistent. Delete |
| 346 # it from the store. |
| 347 del self.store[key] |
| 348 try: |
| 349 # Ok, delete from the store was successful. It might also be in |
| 350 # the cache, try and delete it. If not we catch the KeyError and |
| 351 # ignore it. |
| 352 del self.cache[key] |
| 353 except KeyError: |
| 354 pass |
| 355 |
| 356 def __iter__(self): |
| 357 return self.keys() |
| 358 |
| 359 def keys(self): |
| 360 return self.store.keys() |
| 361 |
| 362 def values(self): |
| 363 return self.store.values() |
| 364 |
| 365 def items(self): |
| 366 return self.store.items() |
| 367 |
| 368 |
| 369 |
| 370 class WriteBackCacheManager(object): |
| 371 def __init__(self, store, size): |
| 372 self.store = store |
| 373 |
| 374 # Create a set to hold the dirty keys. |
| 375 self.dirty = set() |
| 376 |
| 377 # Define a callback function to be called by the cache when a |
| 378 # key/value pair is about to be ejected. This callback will check to |
| 379 # see if the key is in the dirty set. If so, then it will update the |
| 380 # store object and remove the key from the dirty set. |
| 381 def callback(key, value): |
| 382 if key in self.dirty: |
| 383 self.store[key] = value |
| 384 self.dirty.remove(key) |
| 385 |
| 386 # Create a cache and give it the callback function. |
| 387 self.cache = lrucache(size, callback) |
| 388 |
| 389 # Returns/sets the size of the managed cache. |
| 390 def size(self, size=None): |
| 391 return self.cache.size(size) |
| 392 |
| 393 def clear(self): |
| 394 self.cache.clear() |
| 395 self.dirty.clear() |
| 396 self.store.clear() |
| 397 |
| 398 def __contains__(self, key): |
| 399 # Check the cache first, since if it is there we can return quickly. |
| 400 if key in self.cache: |
| 401 return True |
| 402 |
| 403 # Not in the cache. Might be in the underlying store. |
| 404 if key in self.store: |
| 405 return True |
| 406 |
| 407 return False |
| 408 |
| 409 def __getitem__(self, key): |
| 410 # First we try the cache. If successful we just return the value. If |
| 411 # not we catch KeyError and ignore it since that just means the key |
| 412 # was not in the cache. |
| 413 try: |
| 414 return self.cache[key] |
| 415 except KeyError: |
| 416 pass |
| 417 |
| 418 # It wasn't in the cache. Look it up in the store, add the entry to |
| 419 # the cache, and return the value. |
| 420 value = self.store[key] |
| 421 self.cache[key] = value |
| 422 return value |
| 423 |
| 424 def get(self, key, default=None): |
| 425 """Get an item - return default (None) if not present""" |
| 426 try: |
| 427 return self[key] |
| 428 except KeyError: |
| 429 return default |
| 430 |
| 431 def __setitem__(self, key, value): |
| 432 # Add the key/value pair to the cache. |
| 433 self.cache[key] = value |
| 434 self.dirty.add(key) |
| 435 |
| 436 def __delitem__(self, key): |
| 437 |
| 438 found = False |
| 439 try: |
| 440 del self.cache[key] |
| 441 found = True |
| 442 self.dirty.remove(key) |
| 443 except KeyError: |
| 444 pass |
| 445 |
| 446 try: |
| 447 del self.store[key] |
| 448 found = True |
| 449 except KeyError: |
| 450 pass |
| 451 |
| 452 if not found: # If not found in cache or store, raise error. |
| 453 raise KeyError |
| 454 |
| 455 |
| 456 def __iter__(self): |
| 457 return self.keys() |
| 458 |
| 459 def keys(self): |
| 460 for key in self.store.keys(): |
| 461 if key not in self.dirty: |
| 462 yield key |
| 463 |
| 464 for key in self.dirty: |
| 465 yield key |
| 466 |
| 467 |
| 468 def values(self): |
| 469 for key, value in self.items(): |
| 470 yield value |
| 471 |
| 472 |
| 473 def items(self): |
| 474 for key, value in self.store.items(): |
| 475 if key not in self.dirty: |
| 476 yield (key, value) |
| 477 |
| 478 for key in self.dirty: |
| 479 value = self.cache.peek(key) |
| 480 yield (key, value) |
| 481 |
| 482 |
| 483 |
| 484 def sync(self): |
| 485 # For each dirty key, peek at its value in the cache and update the |
| 486 # store. Doesn't change the cache's order. |
| 487 for key in self.dirty: |
| 488 self.store[key] = self.cache.peek(key) |
| 489 # There are no dirty keys now. |
| 490 self.dirty.clear() |
| 491 |
| 492 def flush(self): |
| 493 self.sync() |
| 494 self.cache.clear() |
| 495 |
| 496 def __enter__(self): |
| 497 return self |
| 498 |
| 499 def __exit__(self, exc_type, exc_val, exc_tb): |
| 500 self.sync() |
| 501 return False |
| 502 |
| 503 |
| 504 class FunctionCacheManager(object): |
| 505 def __init__(self, func, size): |
| 506 self.func = func |
| 507 self.cache = lrucache(size) |
| 508 |
| 509 def size(self, size=None): |
| 510 return self.cache.size(size) |
| 511 |
| 512 def clear(self): |
| 513 self.cache.clear() |
| 514 |
| 515 def __call__(self, *args, **kwargs): |
| 516 kwtuple = tuple((key, kwargs[key]) for key in sorted(kwargs.keys())) |
| 517 key = (args, kwtuple) |
| 518 try: |
| 519 return self.cache[key] |
| 520 except KeyError: |
| 521 pass |
| 522 |
| 523 value = self.func(*args, **kwargs) |
| 524 self.cache[key] = value |
| 525 return value |
| 526 |
| 527 |
| 528 def lruwrap(store, size, writeback=False): |
| 529 if writeback: |
| 530 return WriteBackCacheManager(store, size) |
| 531 else: |
| 532 return WriteThroughCacheManager(store, size) |
| 533 |
| 534 import functools |
| 535 |
| 536 class lrudecorator(object): |
| 537 def __init__(self, size): |
| 538 self.cache = lrucache(size) |
| 539 |
| 540 def __call__(self, func): |
| 541 def wrapper(*args, **kwargs): |
| 542 kwtuple = tuple((key, kwargs[key]) for key in sorted(kwargs.keys())) |
| 543 key = (args, kwtuple) |
| 544 try: |
| 545 return self.cache[key] |
| 546 except KeyError: |
| 547 pass |
| 548 |
| 549 value = func(*args, **kwargs) |
| 550 self.cache[key] = value |
| 551 return value |
| 552 |
| 553 wrapper.cache = self.cache |
| 554 wrapper.size = self.cache.size |
| 555 wrapper.clear = self.cache.clear |
| 556 return functools.update_wrapper(wrapper, func) |
OLD | NEW |