Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(9)

Side by Side Diff: tools/android/loading/chrome_cache.py

Issue 2112013003: sandwich: Use cachetool's batch mode to speed-up cache processing. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@af00
Patch Set: Makes os.{read,write} non blocking Created 4 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | tools/android/loading/sandwich_prefetch.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2016 The Chromium Authors. All rights reserved. 1 # Copyright 2016 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 """Takes care of manipulating the chrome's HTTP cache. 5 """Takes care of manipulating the chrome's HTTP cache.
6 """ 6 """
7 7
8 from datetime import datetime 8 from datetime import datetime
9 import errno
10 import fcntl
9 import json 11 import json
10 import os 12 import os
11 import re 13 import re
12 import shutil 14 import shutil
15 import struct
13 import subprocess 16 import subprocess
14 import sys 17 import sys
15 import tempfile 18 import tempfile
19 import time
16 import zipfile 20 import zipfile
17 21
18 _SRC_DIR = os.path.abspath(os.path.join( 22 _SRC_DIR = os.path.abspath(os.path.join(
19 os.path.dirname(__file__), '..', '..', '..')) 23 os.path.dirname(__file__), '..', '..', '..'))
20 24
21 sys.path.append(os.path.join(_SRC_DIR, 'build', 'android')) 25 sys.path.append(os.path.join(_SRC_DIR, 'build', 'android'))
22 from pylib import constants 26 from pylib import constants
23 27
24 import device_setup 28 import device_setup
25 import options 29 import options
26 30
27 31
28 OPTIONS = options.OPTIONS 32 OPTIONS = options.OPTIONS
29 33
30 34
35 class CacheBackendType(object):
36 SIMPLE = 'simple'
37 BLOCKFILE = 'blockfile'
38
31 # Cache back-end types supported by cachetool. 39 # Cache back-end types supported by cachetool.
32 BACKEND_TYPES = {'simple', 'blockfile'} 40 BACKEND_TYPES = {CacheBackendType.SIMPLE, CacheBackendType.BLOCKFILE}
33 41
34 # Regex used to parse HTTP headers line by line. 42 # Regex used to parse HTTP headers line by line.
35 HEADER_PARSING_REGEX = re.compile(r'^(?P<header>\S+):(?P<value>.*)$') 43 HEADER_PARSING_REGEX = re.compile(r'^(?P<header>\S+):(?P<value>.*)$')
36 44
37 45
38 def _EnsureCleanCacheDirectory(directory_dest_path): 46 def _EnsureCleanCacheDirectory(directory_dest_path):
39 """Ensure that a cache directory is created and clean. 47 """Ensure that a cache directory is created and clean.
40 48
41 Args: 49 Args:
42 directory_dest_path: Path of the cache directory to ensure cleanliness. 50 directory_dest_path: Path of the cache directory to ensure cleanliness.
(...skipping 182 matching lines...) Expand 10 before | Expand all | Expand 10 after
225 233
226 Args: 234 Args:
227 directory_src_path: Path of the cache directory source. 235 directory_src_path: Path of the cache directory source.
228 directory_dest_path: Path of the cache directory destination. 236 directory_dest_path: Path of the cache directory destination.
229 """ 237 """
230 assert os.path.isdir(directory_src_path) 238 assert os.path.isdir(directory_src_path)
231 _EnsureCleanCacheDirectory(directory_dest_path) 239 _EnsureCleanCacheDirectory(directory_dest_path)
232 shutil.copytree(directory_src_path, directory_dest_path) 240 shutil.copytree(directory_src_path, directory_dest_path)
233 241
234 242
235 class CacheBackend(object): 243 class CacheBackendError(Exception):
236 """Takes care of reading and deleting cached keys. 244 def __init__(self, errors):
245 Exception.__init__(self, repr(errors))
246 self.errors = errors
247
248
249 class CacheBackendBase(object):
250 """Takes care of reading and deleting cached keys. Can be used as a context
251 manager to be compatible with OnlineCacheBackend.
237 """ 252 """
238 253
239 def __init__(self, cache_directory_path, cache_backend_type): 254 def __init__(self, cache_directory_path, cache_backend_type):
240 """Chrome cache back-end constructor. 255 """Chrome cache back-end constructor.
241 256
242 Args: 257 Args:
243 cache_directory_path: The directory path where the cache is locally 258 cache_directory_path: The directory path where the cache is locally
244 stored. 259 stored.
245 cache_backend_type: A cache back-end type in BACKEND_TYPES. 260 cache_backend_type: A cache back-end type in BACKEND_TYPES.
246 """ 261 """
247 assert os.path.isdir(cache_directory_path) 262 assert os.path.isdir(cache_directory_path)
248 assert cache_backend_type in BACKEND_TYPES 263 assert cache_backend_type in BACKEND_TYPES
249 self._cache_directory_path = cache_directory_path 264 self._cache_directory_path = cache_directory_path
250 self._cache_backend_type = cache_backend_type 265 self._cache_backend_type = cache_backend_type
251 # Make sure cache_directory_path is a valid cache.
252 self._CachetoolCmd('validate')
253 266
254 def GetSize(self): 267 def GetSize(self):
255 """Gets total size of cache entries in bytes.""" 268 """Gets total size of cache entries in bytes."""
256 size = self._CachetoolCmd('get_size') 269 raise NotImplementedError
257 return int(size.strip())
258 270
259 def ListKeys(self): 271 def ListKeys(self):
260 """Lists cache's keys. 272 """Lists cache's keys.
261 273
262 Returns: 274 Returns:
263 A list of all keys stored in the cache. 275 A list of all keys stored in the cache.
264 """ 276 """
265 return [k.strip() for k in self._CachetoolCmd('list_keys').split('\n')[:-1]] 277 raise NotImplementedError
266 278
267 def GetStreamForKey(self, key, index): 279 def GetStreamForKey(self, key, index):
268 """Gets a key's stream. 280 """Gets a key's stream.
269 281
270 Args: 282 Args:
271 key: The key to access the stream. 283 key: The key to access the stream.
272 index: The stream index: 284 index: The stream index:
273 index=0 is the HTTP response header; 285 index=0 is the HTTP response header;
274 index=1 is the transport encoded content; 286 index=1 is the transport encoded content;
275 index=2 is the compiled content. 287 index=2 is the compiled content.
276 288
277 Returns: 289 Returns:
278 String holding stream binary content. 290 String holding stream binary content.
279 """ 291 """
280 return self._CachetoolCmd('get_stream', [key, str(index)]) 292 raise NotImplementedError
281 293
282 def DeleteStreamForKey(self, key, index): 294 def DeleteStreamForKey(self, key, index):
283 """Delete a key's stream. 295 """Delete a key's stream.
284 296
285 Args: 297 Args:
286 key: The key to access the stream. 298 key: The key to access the stream.
287 index: The stream index 299 index: The stream index
288 """ 300 """
289 self._CachetoolCmd('delete_stream', [key, str(index)]) 301 raise NotImplementedError
290 302
291 def DeleteKey(self, key): 303 def DeleteKey(self, key):
292 """Deletes a key from the cache. 304 """Deletes a key from the cache.
293 305
294 Args: 306 Args:
295 key: The key delete. 307 key: The key delete.
296 """ 308 """
297 self._CachetoolCmd('delete_key', [key]) 309 raise NotImplementedError
298
299 def _CachetoolCmd(self, operation, args=None, stdin=''):
300 """Runs the cache editor tool and return the stdout.
301
302 Args:
303 operation: Cachetool operation.
304 args: Additional operation argument to append to the command line.
305 stdin: String to pipe to the Cachetool's stdin.
306
307 Returns:
308 Cachetool's stdout string.
309 """
310 editor_tool_cmd = [
311 OPTIONS.LocalBinary('cachetool'),
312 self._cache_directory_path,
313 self._cache_backend_type,
314 operation]
315 editor_tool_cmd.extend(args or [])
316 process = subprocess.Popen(
317 editor_tool_cmd, stdout=subprocess.PIPE, stdin=subprocess.PIPE)
318 stdout_data, _ = process.communicate(input=stdin)
319 assert process.returncode == 0
320 return stdout_data
321 310
322 def UpdateRawResponseHeaders(self, key, raw_headers): 311 def UpdateRawResponseHeaders(self, key, raw_headers):
323 """Updates a key's raw response headers. 312 """Updates a key's raw response headers.
324 313
325 Args: 314 Args:
326 key: The key to modify. 315 key: The key to modify.
327 raw_headers: Raw response headers to set. 316 raw_headers: Raw response headers to set.
328 """ 317 """
329 self._CachetoolCmd('update_raw_headers', [key], stdin=raw_headers) 318 self._CachetoolCmd('update_raw_headers', [key], stdin=raw_headers)
330 319
(...skipping 26 matching lines...) Expand all
357 346
358 cmd = [OPTIONS.LocalBinary('content_decoder_tool')] 347 cmd = [OPTIONS.LocalBinary('content_decoder_tool')]
359 cmd.extend([s.strip() for s in content_encoding.split(',')]) 348 cmd.extend([s.strip() for s in content_encoding.split(',')])
360 process = subprocess.Popen(cmd, 349 process = subprocess.Popen(cmd,
361 stdin=subprocess.PIPE, 350 stdin=subprocess.PIPE,
362 stdout=subprocess.PIPE) 351 stdout=subprocess.PIPE)
363 decoded_content, _ = process.communicate(input=encoded_content) 352 decoded_content, _ = process.communicate(input=encoded_content)
364 assert process.returncode == 0 353 assert process.returncode == 0
365 return decoded_content 354 return decoded_content
366 355
356 def Sync(self):
357 """No-op for compatibility with OnlineCacheBackend."""
358 pass
359
360 def __enter__(self):
361 return self
362
363 def __exit__(self, exc_type, exc_val, exc_tb):
364 del exc_type, exc_val, exc_tb # unused.
365 return False
366
367
368 class CacheBackend(CacheBackendBase):
369 """Takes care of manipulating cache directories. Can be used as a context
370 manager to be seamlessly compatible with OnlineCacheBackend.
371
372 Each method issue a command line invocation of cachetool.
373 """
374
375 def __init__(self, cache_directory_path, cache_backend_type):
376 """Chrome cache back-end constructor.
377
378 Args:
379 cache_directory_path: As in for CacheBackendBase.__init__
380 cache_backend_type: As in for CacheBackendBase.__init__
381 """
382 CacheBackendBase.__init__(self, cache_directory_path, cache_backend_type)
383 # Make sure cache_directory_path is a valid cache.
384 self._CachetoolCmd('stop')
385
386 def GetSize(self):
387 """Implements CacheBackendBase.GetSize()."""
388 size = self._CachetoolCmd('get_size')
389 return int(size.strip())
390
391 def ListKeys(self):
392 """Implements CacheBackendBase.ListKeys()."""
393 out_lines = self._CachetoolCmd('list_keys').split('\n')
394 # cachetool finishes the list of keys with '\n\n'.
395 assert out_lines[-2:] == ['', '']
396 return [k.strip() for k in out_lines[:-2]]
397
398 def GetStreamForKey(self, key, index):
399 """Implements CacheBackendBase.GetStreamForKey()."""
400 return self._CachetoolCmd('get_stream', [key, str(index)])
401
402 def DeleteStreamForKey(self, key, index):
403 """Implements CacheBackendBase.DeleteStreamForKey()."""
404 self._CachetoolCmd('delete_stream', [key, str(index)])
405
406 def DeleteKey(self, key):
407 """Implements CacheBackendBase.DeleteKey()."""
408 self._CachetoolCmd('delete_key', [key])
409
410 def UpdateRawResponseHeaders(self, key, raw_headers):
411 """Implements CacheBackendBase.UpdateRawResponseHeaders()."""
412 self._CachetoolCmd('update_raw_headers', [key], stdin=raw_headers)
413
414 def _CachetoolCmd(self, operation, args=None, stdin=''):
415 """Runs the cache editor tool and return the stdout.
416
417 Args:
418 operation: Cachetool operation.
419 args: Additional operation argument to append to the command line.
420 stdin: String to pipe to the Cachetool's stdin.
421
422 Returns:
423 Cachetool's stdout string.
424 """
425 args = args or []
426 editor_tool_cmd = [
427 OPTIONS.LocalBinary('cachetool'),
428 self._cache_directory_path,
429 self._cache_backend_type,
430 operation] + args
431 process = subprocess.Popen(editor_tool_cmd, stdout=subprocess.PIPE,
432 stderr=subprocess.PIPE, stdin=subprocess.PIPE)
433 stdout_data, stderr_data = process.communicate(input=stdin)
434 if process.returncode != 0:
435 raise CacheBackendError([([operation] + args, stderr_data.strip())])
436 return stdout_data
437
438
439 class OnlineCacheBackend(CacheBackendBase):
440 """Takes care of manipulating cache directories efficiently using the
441 cachetool's online mode. It must be used as a context manager to spawn a
442 cachetool process ready to process commands issued by the method calls.
443
444 The cachetool commands are by default unsynchronized with the methods calls
445 for further speed improvement. However the drawback of this asynchronous
446 behavior is that the methods can raise CacheBackendError caused by previous
447 calls.
448 """
449 _INST_IDS = {
450 'stop': 0,
451 'get_size': 1,
452 'list_keys': 2,
453 'get_stream_for_key': 3,
454 'delete_stream': 4,
455 'delete_key': 5,
456 'update_raw_headers': 6
457 }
458
459 def __init__(self, cache_directory_path, cache_backend_type, auto_sync=False):
460 """Chrome cache back-end constructor.
461
462 Args:
463 cache_directory_path: As in for CacheBackendBase.__init__
464 cache_backend_type: As in for CacheBackendBase.__init__
465 auto_sync: Weather the methods call should be synchronized with cachetool
466 commands (as if Sync() was called automatically in each methods).
467 """
468 CacheBackendBase.__init__(self, cache_directory_path, cache_backend_type)
469 self._in_flight_insts = []
470 self._enqueued_compiled_insts = b''
471 self._cachetool_process = None
472 self._cachetool_stdin = None
473 self._cachetool_stdout = None
474 self._auto_sync = auto_sync
475
476 def __enter__(self):
477 self._Start()
478 return self
479
480 def __exit__(self, exc_type, exc_val, exc_tb):
481 del exc_val, exc_tb # unused.
482 self._Stop(force_stop=exc_type == CacheBackendError)
483 return False
484
485 def GetSize(self):
486 """Implements CacheBackendBase.GetSize()."""
487 self._PushInst('get_size')
488 self.Sync()
489 return self._UnpackResult('i')[0]
490
491 def ListKeys(self):
492 """Implements CacheBackendBase.GetSize()."""
493 self._PushInst('list_keys')
494 self.Sync()
495 keys = []
496 while True:
497 key_size = self._UnpackResult('i')[0]
498 if key_size == 0:
499 break
500 keys.append(self._UnpackResult('{}s'.format(key_size))[0])
501 return keys
502
503 def GetStreamForKey(self, key, index):
504 """Implements CacheBackendBase.GetSize()."""
505 self._PushInst('update_raw_headers', str(key), index)
506 self.Sync()
507 stream_size = self._UnpackResult('i')[0]
508 return self._UnpackResult('{}s'.format(stream_size))[0]
509
510 def DeleteStreamForKey(self, key, index):
511 """Implements CacheBackendBase.DeleteStreamForKey()."""
512 self._PushInst('delete_stream', str(key), index)
513
514 def DeleteKey(self, key):
515 """Implements CacheBackendBase.DeleteKey()."""
516 self._PushInst('delete_key', str(key))
517
518 def UpdateRawResponseHeaders(self, key, raw_headers):
519 """Implements CacheBackendBase.UpdateRawResponseHeaders()."""
520 self._PushInst('update_raw_headers', str(key), raw_headers)
521
522 def Sync(self):
523 """Overrides CacheBackendBase.Sync().
524
525 The call of this methods synchronizes the current thread with previously
526 issued commands. As a result, this method may raise CacheBackendError caused
527 by previous method calls.
528 """
529 self._PullInstsResults(len(self._in_flight_insts))
530
531 def _Start(self):
532 assert self._cachetool_process == None
533 stdin = os.pipe()
534 stdout = os.pipe()
535 cache_tool_cmd = [
536 OPTIONS.LocalBinary('cachetool'),
537 self._cache_directory_path,
538 self._cache_backend_type,
539 'online']
540 self._cachetool_process = subprocess.Popen(
541 cache_tool_cmd, stdout=stdout[1], stdin=stdin[0])
542 os.close(stdin[0])
543 fcntl.fcntl(stdin[1], fcntl.F_SETFL, os.O_NONBLOCK)
544 fcntl.fcntl(stdout[0], fcntl.F_SETFL, os.O_NONBLOCK)
545 os.close(stdout[1])
546 self._cachetool_stdin = stdin[1]
547 self._cachetool_stdout = stdout[0]
548 assert not self._in_flight_insts
549
550 def _Stop(self, force_stop=False):
551 assert self._cachetool_process != None
552 if force_stop:
553 self._cachetool_process.kill()
554 self._cachetool_process.wait()
555 del self._in_flight_insts[:]
556 self._enqueued_compiled_insts = b''
557 else:
558 self._PushInst('stop')
559 self.Sync()
560 self._cachetool_process.wait()
561 assert not self._in_flight_insts
562 assert self._cachetool_process.returncode == 0
563 os.close(self._cachetool_stdin)
564 os.close(self._cachetool_stdout)
565 assert len(self._in_flight_insts) == 0
566 self._cachetool_process = None
567
568 def _PushInst(self, inst_name, *args):
569 assert self._cachetool_process != None
570 inst_id = self._INST_IDS[inst_name]
571 inst_code = struct.pack('b', inst_id)
572 for param in args:
573 if type(param) == int:
574 inst_code += struct.pack('i', param)
575 elif type(param) == str:
576 inst_code += struct.pack('i{}s'.format(len(param)), len(param), param)
577 else:
578 assert False, 'Couldn\'t passdown parameter: {}'.format(repr(param))
579 self._enqueued_compiled_insts += inst_code
580 self._PushEnqueuedInsts()
581 self._in_flight_insts.append([inst_name] + list(args))
582 if self._auto_sync:
583 assert len(self._in_flight_insts) == 1
584 self.Sync()
585
586 def _PushEnqueuedInsts(self):
587 if not self._enqueued_compiled_insts:
588 return 0
589 pushed_sized = os.write(
590 self._cachetool_stdin, self._enqueued_compiled_insts)
591 self._enqueued_compiled_insts = self._enqueued_compiled_insts[pushed_sized:]
592 return pushed_sized
593
594 def _UnpackResult(self, fmt):
595 buf_size = struct.calcsize(fmt)
596 buf = b''
597 delay_reset = 0.05
598 delay = delay_reset
599 while True:
600 try:
601 buf += os.read(self._cachetool_stdout, buf_size - len(buf))
602 except OSError as error:
603 if error.errno != errno.EAGAIN:
mattcary 2016/07/01 20:24:24 Add comment that EAGAIN is what we get when a read
gabadie 2016/07/04 14:23:16 Done.
pasko 2016/07/04 16:48:23 checking for EAGAIN and not for EWOULDBLOCK? That'
604 raise
605 if len(buf) == buf_size:
606 return struct.unpack(fmt, buf)
607 pushed_sized = self._PushEnqueuedInsts()
608 if pushed_sized == 0:
609 time.sleep(delay)
610 delay = min(1, delay * 2)
611 else:
612 delay = delay_reset
613
614 def _PullInstsResults(self, count):
615 assert self._cachetool_process != None
616 if count == 0:
617 return
618 assert count <= len(self._in_flight_insts)
619 errors = []
620 for inst_position in xrange(count):
621 status_len = self._UnpackResult('i')[0]
622 if status_len == 0:
623 continue
624 status = self._UnpackResult('{}s'.format(status_len))[0]
625 errors.append((self._in_flight_insts[inst_position], status))
626 del self._in_flight_insts[:count]
627 if errors:
628 raise CacheBackendError(errors)
629
367 630
368 def ApplyUrlWhitelistToCacheArchive(cache_archive_path, 631 def ApplyUrlWhitelistToCacheArchive(cache_archive_path,
369 whitelisted_urls, 632 whitelisted_urls,
370 output_cache_archive_path): 633 output_cache_archive_path):
371 """Generate a new cache archive containing only whitelisted urls. 634 """Generate a new cache archive containing only whitelisted urls.
372 635
373 Args: 636 Args:
374 cache_archive_path: Path of the cache archive to apply the white listing. 637 cache_archive_path: Path of the cache archive to apply the white listing.
375 whitelisted_urls: Set of url to keep in cache. 638 whitelisted_urls: Set of url to keep in cache.
376 output_cache_archive_path: Destination path of cache archive containing only 639 output_cache_archive_path: Destination path of cache archive containing only
377 white-listed urls. 640 white-listed urls.
378 """ 641 """
379 cache_temp_directory = tempfile.mkdtemp(suffix='.cache') 642 cache_temp_directory = tempfile.mkdtemp(suffix='.cache')
380 try: 643 try:
381 UnzipDirectoryContent(cache_archive_path, cache_temp_directory) 644 UnzipDirectoryContent(cache_archive_path, cache_temp_directory)
382 backend = CacheBackend(cache_temp_directory, 'simple') 645 with OnlineCacheBackend(
383 cached_urls = backend.ListKeys() 646 cache_temp_directory, CacheBackendType.SIMPLE) as backend:
384 for cached_url in cached_urls: 647 cached_urls = backend.ListKeys()
385 if cached_url not in whitelisted_urls: 648 for cached_url in cached_urls:
386 backend.DeleteKey(cached_url) 649 if cached_url not in whitelisted_urls:
387 for cached_url in backend.ListKeys(): 650 backend.DeleteKey(cached_url)
388 assert cached_url in whitelisted_urls 651 for cached_url in backend.ListKeys():
652 assert cached_url in whitelisted_urls
389 ZipDirectoryContent(cache_temp_directory, output_cache_archive_path) 653 ZipDirectoryContent(cache_temp_directory, output_cache_archive_path)
390 finally: 654 finally:
391 shutil.rmtree(cache_temp_directory) 655 shutil.rmtree(cache_temp_directory)
392 656
393 657
394 def ManualTestMain(): 658 def ManualTestMain():
395 import argparse 659 import argparse
396 parser = argparse.ArgumentParser(description='Tests cache back-end.') 660 parser = argparse.ArgumentParser(description='Tests cache back-end.')
397 parser.add_argument('cache_archive_path', type=str) 661 parser.add_argument('cache_archive_path', type=str)
398 parser.add_argument('backend_type', type=str, choices=BACKEND_TYPES) 662 parser.add_argument('backend_type', type=str, choices=BACKEND_TYPES)
(...skipping 15 matching lines...) Expand all
414 print '{}\'s HTTP response header:'.format(selected_key) 678 print '{}\'s HTTP response header:'.format(selected_key)
415 print cache_backend.GetStreamForKey(selected_key, 0) 679 print cache_backend.GetStreamForKey(selected_key, 0)
416 print cache_backend.GetDecodedContentForKey(selected_key) 680 print cache_backend.GetDecodedContentForKey(selected_key)
417 cache_backend.DeleteKey(keys[1]) 681 cache_backend.DeleteKey(keys[1])
418 assert keys[1] not in cache_backend.ListKeys() 682 assert keys[1] not in cache_backend.ListKeys()
419 shutil.rmtree(cache_path) 683 shutil.rmtree(cache_path)
420 684
421 685
422 if __name__ == '__main__': 686 if __name__ == '__main__':
423 ManualTestMain() 687 ManualTestMain()
OLDNEW
« no previous file with comments | « no previous file | tools/android/loading/sandwich_prefetch.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698