Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 # Copyright 2016 The Chromium Authors. All rights reserved. | 1 # Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 """Takes care of manipulating the chrome's HTTP cache. | 5 """Takes care of manipulating the chrome's HTTP cache. |
| 6 """ | 6 """ |
| 7 | 7 |
| 8 from datetime import datetime | 8 from datetime import datetime |
| 9 import json | 9 import json |
| 10 import os | 10 import os |
| 11 import re | 11 import re |
| 12 import shutil | 12 import shutil |
| 13 import struct | |
| 13 import subprocess | 14 import subprocess |
| 14 import sys | 15 import sys |
| 15 import tempfile | 16 import tempfile |
| 16 import zipfile | 17 import zipfile |
| 17 | 18 |
| 18 _SRC_DIR = os.path.abspath(os.path.join( | 19 _SRC_DIR = os.path.abspath(os.path.join( |
| 19 os.path.dirname(__file__), '..', '..', '..')) | 20 os.path.dirname(__file__), '..', '..', '..')) |
| 20 | 21 |
| 21 sys.path.append(os.path.join(_SRC_DIR, 'build', 'android')) | 22 sys.path.append(os.path.join(_SRC_DIR, 'build', 'android')) |
| 22 from pylib import constants | 23 from pylib import constants |
| 23 | 24 |
| 24 import device_setup | 25 import device_setup |
| 25 import options | 26 import options |
| 26 | 27 |
| 27 | 28 |
| 28 OPTIONS = options.OPTIONS | 29 OPTIONS = options.OPTIONS |
| 29 | 30 |
| 30 | 31 |
| 32 class CacheBackendType(object): | |
| 33 Simple = 'simple' | |
|
mattcary
2016/07/01 12:09:29
Constants should be all caps: SIMPLE, BLOCKFILE
gabadie
2016/07/01 14:10:25
Done.
| |
| 34 Blockfile = 'blockfile' | |
| 35 | |
| 31 # Cache back-end types supported by cachetool. | 36 # Cache back-end types supported by cachetool. |
| 32 BACKEND_TYPES = {'simple', 'blockfile'} | 37 BACKEND_TYPES = {CacheBackendType.Simple, CacheBackendType.Blockfile} |
| 33 | 38 |
| 34 # Regex used to parse HTTP headers line by line. | 39 # Regex used to parse HTTP headers line by line. |
| 35 HEADER_PARSING_REGEX = re.compile(r'^(?P<header>\S+):(?P<value>.*)$') | 40 HEADER_PARSING_REGEX = re.compile(r'^(?P<header>\S+):(?P<value>.*)$') |
| 36 | 41 |
| 37 | 42 |
| 38 def _EnsureCleanCacheDirectory(directory_dest_path): | 43 def _EnsureCleanCacheDirectory(directory_dest_path): |
| 39 """Ensure that a cache directory is created and clean. | 44 """Ensure that a cache directory is created and clean. |
| 40 | 45 |
| 41 Args: | 46 Args: |
| 42 directory_dest_path: Path of the cache directory to ensure cleanliness. | 47 directory_dest_path: Path of the cache directory to ensure cleanliness. |
| (...skipping 182 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 225 | 230 |
| 226 Args: | 231 Args: |
| 227 directory_src_path: Path of the cache directory source. | 232 directory_src_path: Path of the cache directory source. |
| 228 directory_dest_path: Path of the cache directory destination. | 233 directory_dest_path: Path of the cache directory destination. |
| 229 """ | 234 """ |
| 230 assert os.path.isdir(directory_src_path) | 235 assert os.path.isdir(directory_src_path) |
| 231 _EnsureCleanCacheDirectory(directory_dest_path) | 236 _EnsureCleanCacheDirectory(directory_dest_path) |
| 232 shutil.copytree(directory_src_path, directory_dest_path) | 237 shutil.copytree(directory_src_path, directory_dest_path) |
| 233 | 238 |
| 234 | 239 |
| 240 class CacheBackendError(Exception): | |
| 241 def __init__(self, errors): | |
| 242 Exception.__init__(self, repr(errors)) | |
| 243 self.errors = errors | |
| 244 | |
| 245 | |
| 235 class CacheBackend(object): | 246 class CacheBackend(object): |
| 236 """Takes care of reading and deleting cached keys. | 247 """Takes care of reading and deleting cached keys. |
|
mattcary
2016/07/01 12:09:29
Add that this can be used as a context manager now
gabadie
2016/07/01 14:10:25
Done.
| |
| 237 """ | 248 """ |
| 238 | 249 |
| 239 def __init__(self, cache_directory_path, cache_backend_type): | 250 def __init__(self, cache_directory_path, cache_backend_type): |
| 240 """Chrome cache back-end constructor. | 251 """Chrome cache back-end constructor. |
| 241 | 252 |
| 242 Args: | 253 Args: |
| 243 cache_directory_path: The directory path where the cache is locally | 254 cache_directory_path: The directory path where the cache is locally |
| 244 stored. | 255 stored. |
| 245 cache_backend_type: A cache back-end type in BACKEND_TYPES. | 256 cache_backend_type: A cache back-end type in BACKEND_TYPES. |
| 246 """ | 257 """ |
| 247 assert os.path.isdir(cache_directory_path) | 258 assert os.path.isdir(cache_directory_path) |
| 248 assert cache_backend_type in BACKEND_TYPES | 259 assert cache_backend_type in BACKEND_TYPES |
| 249 self._cache_directory_path = cache_directory_path | 260 self._cache_directory_path = cache_directory_path |
| 250 self._cache_backend_type = cache_backend_type | 261 self._cache_backend_type = cache_backend_type |
| 251 # Make sure cache_directory_path is a valid cache. | 262 # Make sure cache_directory_path is a valid cache. |
| 252 self._CachetoolCmd('validate') | 263 self._CachetoolCmd('stop') |
| 253 | 264 |
| 254 def GetSize(self): | 265 def GetSize(self): |
| 255 """Gets total size of cache entries in bytes.""" | 266 """Gets total size of cache entries in bytes.""" |
| 256 size = self._CachetoolCmd('get_size') | 267 size = self._CachetoolCmd('get_size') |
| 257 return int(size.strip()) | 268 return int(size.strip()) |
| 258 | 269 |
| 259 def ListKeys(self): | 270 def ListKeys(self): |
| 260 """Lists cache's keys. | 271 """Lists cache's keys. |
| 261 | 272 |
| 262 Returns: | 273 Returns: |
| 263 A list of all keys stored in the cache. | 274 A list of all keys stored in the cache. |
| 264 """ | 275 """ |
| 265 return [k.strip() for k in self._CachetoolCmd('list_keys').split('\n')[:-1]] | 276 return [k.strip() for k in self._CachetoolCmd('list_keys').split('\n')[:-2]] |
|
mattcary
2016/07/01 12:09:29
This -2 is a magic constant that's a little worris
gabadie
2016/07/01 14:10:26
Oh this change is because of a impl change in cach
| |
| 266 | 277 |
| 267 def GetStreamForKey(self, key, index): | 278 def GetStreamForKey(self, key, index): |
| 268 """Gets a key's stream. | 279 """Gets a key's stream. |
| 269 | 280 |
| 270 Args: | 281 Args: |
| 271 key: The key to access the stream. | 282 key: The key to access the stream. |
| 272 index: The stream index: | 283 index: The stream index: |
| 273 index=0 is the HTTP response header; | 284 index=0 is the HTTP response header; |
| 274 index=1 is the transport encoded content; | 285 index=1 is the transport encoded content; |
| 275 index=2 is the compiled content. | 286 index=2 is the compiled content. |
| (...skipping 24 matching lines...) Expand all Loading... | |
| 300 """Runs the cache editor tool and return the stdout. | 311 """Runs the cache editor tool and return the stdout. |
| 301 | 312 |
| 302 Args: | 313 Args: |
| 303 operation: Cachetool operation. | 314 operation: Cachetool operation. |
| 304 args: Additional operation argument to append to the command line. | 315 args: Additional operation argument to append to the command line. |
| 305 stdin: String to pipe to the Cachetool's stdin. | 316 stdin: String to pipe to the Cachetool's stdin. |
| 306 | 317 |
| 307 Returns: | 318 Returns: |
| 308 Cachetool's stdout string. | 319 Cachetool's stdout string. |
| 309 """ | 320 """ |
| 321 args = args or [] | |
| 310 editor_tool_cmd = [ | 322 editor_tool_cmd = [ |
| 311 OPTIONS.LocalBinary('cachetool'), | 323 OPTIONS.LocalBinary('cachetool'), |
| 312 self._cache_directory_path, | 324 self._cache_directory_path, |
| 313 self._cache_backend_type, | 325 self._cache_backend_type, |
| 314 operation] | 326 operation] + args |
| 315 editor_tool_cmd.extend(args or []) | 327 process = subprocess.Popen(editor_tool_cmd, stdout=subprocess.PIPE, |
| 316 process = subprocess.Popen( | 328 stderr=subprocess.PIPE, stdin=subprocess.PIPE) |
| 317 editor_tool_cmd, stdout=subprocess.PIPE, stdin=subprocess.PIPE) | 329 stdout_data, stderr_data = process.communicate(input=stdin) |
| 318 stdout_data, _ = process.communicate(input=stdin) | 330 if process.returncode != 0: |
| 319 assert process.returncode == 0 | 331 raise CacheBackendError([([operation] + args, stderr_data.strip())]) |
| 320 return stdout_data | 332 return stdout_data |
| 321 | 333 |
| 322 def UpdateRawResponseHeaders(self, key, raw_headers): | 334 def UpdateRawResponseHeaders(self, key, raw_headers): |
| 323 """Updates a key's raw response headers. | 335 """Updates a key's raw response headers. |
| 324 | 336 |
| 325 Args: | 337 Args: |
| 326 key: The key to modify. | 338 key: The key to modify. |
| 327 raw_headers: Raw response headers to set. | 339 raw_headers: Raw response headers to set. |
| 328 """ | 340 """ |
| 329 self._CachetoolCmd('update_raw_headers', [key], stdin=raw_headers) | 341 self._CachetoolCmd('update_raw_headers', [key], stdin=raw_headers) |
| (...skipping 27 matching lines...) Expand all Loading... | |
| 357 | 369 |
| 358 cmd = [OPTIONS.LocalBinary('content_decoder_tool')] | 370 cmd = [OPTIONS.LocalBinary('content_decoder_tool')] |
| 359 cmd.extend([s.strip() for s in content_encoding.split(',')]) | 371 cmd.extend([s.strip() for s in content_encoding.split(',')]) |
| 360 process = subprocess.Popen(cmd, | 372 process = subprocess.Popen(cmd, |
| 361 stdin=subprocess.PIPE, | 373 stdin=subprocess.PIPE, |
| 362 stdout=subprocess.PIPE) | 374 stdout=subprocess.PIPE) |
| 363 decoded_content, _ = process.communicate(input=encoded_content) | 375 decoded_content, _ = process.communicate(input=encoded_content) |
| 364 assert process.returncode == 0 | 376 assert process.returncode == 0 |
| 365 return decoded_content | 377 return decoded_content |
| 366 | 378 |
| 379 def __enter__(self): | |
| 380 return self | |
| 381 | |
| 382 def __exit__(self, exc_type, exc_val, exc_tb): | |
| 383 del exc_type, exc_val, exc_tb | |
|
mattcary
2016/07/01 12:09:29
Shouldn't you return True if you're going to suppr
gabadie
2016/07/01 14:10:26
return False (the implicit return None was already
| |
| 384 | |
| 385 | |
| 386 class OnlineCacheBackend(object): | |
|
mattcary
2016/07/01 12:09:29
Class comment, including context manager semantics
gabadie
2016/07/01 14:10:26
Done.
| |
| 387 _INST_IDS = { | |
| 388 'stop': 0, | |
| 389 'get_size': 1, | |
| 390 'list_keys': 2, | |
| 391 'get_stream_for_key': 3, | |
| 392 'delete_stream': 4, | |
| 393 'delete_key': 5, | |
| 394 'update_raw_headers': 6 | |
| 395 } | |
| 396 | |
| 397 def __init__(self, cache_directory_path, cache_backend_type, auto_sync=False): | |
| 398 assert os.path.isdir(cache_directory_path) | |
| 399 assert cache_backend_type in BACKEND_TYPES | |
| 400 self._cache_directory_path = cache_directory_path | |
| 401 self._cache_backend_type = cache_backend_type | |
| 402 self._in_flight_insts = [] | |
| 403 self._cachetool_process = None | |
| 404 self._cachetool_stdin = None | |
| 405 self._cachetool_stdout = None | |
| 406 self._auto_sync = auto_sync | |
| 407 | |
| 408 def __enter__(self): | |
| 409 self.Start() | |
| 410 return self | |
| 411 | |
| 412 def __exit__(self, exc_type, exc_val, exc_tb): | |
| 413 del exc_val, exc_tb | |
| 414 self.Stop(force_stop=exc_type == CacheBackendError) | |
|
mattcary
2016/07/01 12:09:29
return True?
gabadie
2016/07/01 14:10:25
Not really. The point here is to just stop cacheto
mattcary
2016/07/01 14:30:21
I think prefixing the variables with an underscore
gabadie
2016/07/01 14:35:39
The del ... # unused. is a common pattern in chrom
mattcary
2016/07/01 20:24:24
Fair enough, thanks.
| |
| 415 | |
| 416 def GetSize(self): | |
| 417 self._PushInsts('get_size') | |
| 418 self.Sync() | |
| 419 return self._UnpackResult('i')[0] | |
| 420 | |
| 421 def ListKeys(self): | |
| 422 self._PushInsts('list_keys') | |
| 423 self.Sync() | |
| 424 keys = [] | |
| 425 while True: | |
| 426 key_size = self._UnpackResult('i')[0] | |
| 427 if key_size == 0: | |
| 428 break | |
| 429 keys.append(self._UnpackResult('{}s'.format(key_size))[0]) | |
| 430 return keys | |
| 431 | |
| 432 def GetStreamForKey(self, key, index): | |
| 433 self._PushInsts('update_raw_headers', str(key), index) | |
| 434 self.Sync() | |
| 435 stream_size = self._UnpackResult('i')[0] | |
| 436 return self._UnpackResult('{}s'.format(stream_size))[0] | |
| 437 | |
| 438 def DeleteStreamForKey(self, key, index): | |
| 439 self._PushInsts('delete_stream', str(key), index) | |
| 440 | |
| 441 def DeleteKey(self, key): | |
| 442 self._PushInsts('delete_key', str(key)) | |
| 443 | |
| 444 def UpdateRawResponseHeaders(self, key, raw_headers): | |
| 445 self._PushInsts('update_raw_headers', str(key), raw_headers) | |
| 446 | |
| 447 def Start(self): | |
| 448 assert self._cachetool_process == None | |
| 449 stdin = os.pipe() | |
| 450 stdout = os.pipe() | |
| 451 cache_tool_cmd = [ | |
| 452 OPTIONS.LocalBinary('cachetool'), | |
| 453 self._cache_directory_path, | |
| 454 self._cache_backend_type, | |
| 455 'online'] | |
| 456 self._cachetool_process = subprocess.Popen( | |
| 457 cache_tool_cmd, stdout=subprocess.PIPE, stdin=subprocess.PIPE) | |
| 458 os.close(stdin[0]) | |
| 459 self._cachetool_stdin = stdin[1] | |
| 460 self._cachetool_stdout = stdout[0] | |
| 461 os.close(stdout[1]) | |
| 462 assert not self._in_flight_insts | |
| 463 | |
| 464 def Stop(self, force_stop=False): | |
| 465 assert self._cachetool_process != None | |
| 466 if force_stop: | |
| 467 self._cachetool_process.kill() | |
| 468 self._cachetool_process.wait() | |
| 469 del self._in_flight_insts[:] | |
| 470 else: | |
| 471 self._PushInsts('stop') | |
| 472 self.Sync() | |
| 473 self._cachetool_process.wait() | |
| 474 assert not self._in_flight_insts | |
| 475 assert self._cachetool_process.returncode == 0 | |
| 476 os.close(self._cachetool_stdin) | |
| 477 os.close(self._cachetool_stdout) | |
| 478 assert len(self._in_flight_insts) == 0 | |
| 479 self._cachetool_process = None | |
| 480 | |
| 481 def Sync(self): | |
| 482 self._PullInstsResults(len(self._in_flight_insts)) | |
| 483 | |
| 484 def _PushInsts(self, inst_name, *args): | |
| 485 assert self._cachetool_process != None | |
| 486 inst_id = self._INST_IDS[inst_name] | |
| 487 inst_code = struct.pack('b', inst_id) | |
| 488 for param in args: | |
| 489 if type(param) == int: | |
| 490 inst_code += struct.pack('i', param) | |
| 491 elif type(param) == str: | |
| 492 inst_code += struct.pack('i{}s'.format(len(param)), len(param), param) | |
| 493 else: | |
| 494 assert False, 'Couldn\'t passdown parameter: {}'.format(repr(param)) | |
| 495 self._cachetool_process.stdin.write(inst_code) | |
|
mattcary
2016/07/01 12:09:29
This is dangerous. If the subprocess stdout buffer
gabadie
2016/07/01 14:10:25
Good catch, I always forgot that python buffer stu
mattcary
2016/07/01 14:30:21
Don't system pipes have a finite buffer as well?
gabadie
2016/07/01 14:35:39
Of course they do, but using os.write() allows me
pasko
2016/07/04 16:53:52
just noticed this discussion. Can you explain more
| |
| 496 self._cachetool_process.stdin.flush() | |
| 497 self._in_flight_insts.append([inst_name] + list(args)) | |
| 498 if self._auto_sync: | |
| 499 assert len(self._in_flight_insts) == 1 | |
| 500 self.Sync() | |
| 501 | |
| 502 def _UnpackResult(self, fmt): | |
| 503 buf_size = struct.calcsize(fmt) | |
| 504 return struct.unpack(fmt, self._cachetool_process.stdout.read(buf_size)) | |
| 505 | |
| 506 def _PullInstsResults(self, count): | |
| 507 assert self._cachetool_process != None | |
| 508 if count == 0: | |
| 509 return | |
| 510 assert count <= len(self._in_flight_insts) | |
| 511 errors = [] | |
| 512 for inst_position in xrange(count): | |
| 513 status_len = self._UnpackResult('i')[0] | |
| 514 if status_len == 0: | |
| 515 # print repr(self._in_flight_insts[inst_position]) + ' OK' | |
| 516 continue | |
| 517 status = self._UnpackResult('{}s'.format(status_len))[0] | |
| 518 errors.append((self._in_flight_insts[inst_position], status)) | |
| 519 del self._in_flight_insts[:count] | |
| 520 if errors: | |
| 521 raise CacheBackendError(errors) | |
| 522 | |
| 367 | 523 |
| 368 def ApplyUrlWhitelistToCacheArchive(cache_archive_path, | 524 def ApplyUrlWhitelistToCacheArchive(cache_archive_path, |
| 369 whitelisted_urls, | 525 whitelisted_urls, |
| 370 output_cache_archive_path): | 526 output_cache_archive_path): |
| 371 """Generate a new cache archive containing only whitelisted urls. | 527 """Generate a new cache archive containing only whitelisted urls. |
| 372 | 528 |
| 373 Args: | 529 Args: |
| 374 cache_archive_path: Path of the cache archive to apply the white listing. | 530 cache_archive_path: Path of the cache archive to apply the white listing. |
| 375 whitelisted_urls: Set of url to keep in cache. | 531 whitelisted_urls: Set of url to keep in cache. |
| 376 output_cache_archive_path: Destination path of cache archive containing only | 532 output_cache_archive_path: Destination path of cache archive containing only |
| 377 white-listed urls. | 533 white-listed urls. |
| 378 """ | 534 """ |
| 379 cache_temp_directory = tempfile.mkdtemp(suffix='.cache') | 535 cache_temp_directory = tempfile.mkdtemp(suffix='.cache') |
| 380 try: | 536 try: |
| 381 UnzipDirectoryContent(cache_archive_path, cache_temp_directory) | 537 UnzipDirectoryContent(cache_archive_path, cache_temp_directory) |
| 382 backend = CacheBackend(cache_temp_directory, 'simple') | 538 with OnlineCacheBackend( |
| 383 cached_urls = backend.ListKeys() | 539 cache_temp_directory, CacheBackendType.Simple) as backend: |
| 384 for cached_url in cached_urls: | 540 cached_urls = backend.ListKeys() |
| 385 if cached_url not in whitelisted_urls: | 541 for cached_url in cached_urls: |
| 386 backend.DeleteKey(cached_url) | 542 if cached_url not in whitelisted_urls: |
| 387 for cached_url in backend.ListKeys(): | 543 backend.DeleteKey(cached_url) |
| 388 assert cached_url in whitelisted_urls | 544 for cached_url in backend.ListKeys(): |
| 545 assert cached_url in whitelisted_urls | |
| 389 ZipDirectoryContent(cache_temp_directory, output_cache_archive_path) | 546 ZipDirectoryContent(cache_temp_directory, output_cache_archive_path) |
| 390 finally: | 547 finally: |
| 391 shutil.rmtree(cache_temp_directory) | 548 shutil.rmtree(cache_temp_directory) |
| 392 | 549 |
| 393 | 550 |
| 394 def ManualTestMain(): | 551 def ManualTestMain(): |
| 395 import argparse | 552 import argparse |
| 396 parser = argparse.ArgumentParser(description='Tests cache back-end.') | 553 parser = argparse.ArgumentParser(description='Tests cache back-end.') |
| 397 parser.add_argument('cache_archive_path', type=str) | 554 parser.add_argument('cache_archive_path', type=str) |
| 398 parser.add_argument('backend_type', type=str, choices=BACKEND_TYPES) | 555 parser.add_argument('backend_type', type=str, choices=BACKEND_TYPES) |
| (...skipping 15 matching lines...) Expand all Loading... | |
| 414 print '{}\'s HTTP response header:'.format(selected_key) | 571 print '{}\'s HTTP response header:'.format(selected_key) |
| 415 print cache_backend.GetStreamForKey(selected_key, 0) | 572 print cache_backend.GetStreamForKey(selected_key, 0) |
| 416 print cache_backend.GetDecodedContentForKey(selected_key) | 573 print cache_backend.GetDecodedContentForKey(selected_key) |
| 417 cache_backend.DeleteKey(keys[1]) | 574 cache_backend.DeleteKey(keys[1]) |
| 418 assert keys[1] not in cache_backend.ListKeys() | 575 assert keys[1] not in cache_backend.ListKeys() |
| 419 shutil.rmtree(cache_path) | 576 shutil.rmtree(cache_path) |
| 420 | 577 |
| 421 | 578 |
| 422 if __name__ == '__main__': | 579 if __name__ == '__main__': |
| 423 ManualTestMain() | 580 ManualTestMain() |
| OLD | NEW |