Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(4)

Side by Side Diff: tools/android/loading/chrome_cache.py

Issue 2112013003: sandwich: Use cachetool's batch mode to speed-up cache processing. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@af00
Patch Set: Created 4 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Copyright 2016 The Chromium Authors. All rights reserved. 1 # Copyright 2016 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 """Takes care of manipulating the chrome's HTTP cache. 5 """Takes care of manipulating the chrome's HTTP cache.
6 """ 6 """
7 7
8 from datetime import datetime 8 from datetime import datetime
9 import json 9 import json
10 import os 10 import os
11 import re 11 import re
12 import shutil 12 import shutil
13 import struct
13 import subprocess 14 import subprocess
14 import sys 15 import sys
15 import tempfile 16 import tempfile
16 import zipfile 17 import zipfile
17 18
18 _SRC_DIR = os.path.abspath(os.path.join( 19 _SRC_DIR = os.path.abspath(os.path.join(
19 os.path.dirname(__file__), '..', '..', '..')) 20 os.path.dirname(__file__), '..', '..', '..'))
20 21
21 sys.path.append(os.path.join(_SRC_DIR, 'build', 'android')) 22 sys.path.append(os.path.join(_SRC_DIR, 'build', 'android'))
22 from pylib import constants 23 from pylib import constants
23 24
24 import device_setup 25 import device_setup
25 import options 26 import options
26 27
27 28
28 OPTIONS = options.OPTIONS 29 OPTIONS = options.OPTIONS
29 30
30 31
32 class CacheBackendType(object):
33 Simple = 'simple'
mattcary 2016/07/01 12:09:29 Constants should be all caps: SIMPLE, BLOCKFILE
gabadie 2016/07/01 14:10:25 Done.
34 Blockfile = 'blockfile'
35
31 # Cache back-end types supported by cachetool. 36 # Cache back-end types supported by cachetool.
32 BACKEND_TYPES = {'simple', 'blockfile'} 37 BACKEND_TYPES = {CacheBackendType.Simple, CacheBackendType.Blockfile}
33 38
34 # Regex used to parse HTTP headers line by line. 39 # Regex used to parse HTTP headers line by line.
35 HEADER_PARSING_REGEX = re.compile(r'^(?P<header>\S+):(?P<value>.*)$') 40 HEADER_PARSING_REGEX = re.compile(r'^(?P<header>\S+):(?P<value>.*)$')
36 41
37 42
38 def _EnsureCleanCacheDirectory(directory_dest_path): 43 def _EnsureCleanCacheDirectory(directory_dest_path):
39 """Ensure that a cache directory is created and clean. 44 """Ensure that a cache directory is created and clean.
40 45
41 Args: 46 Args:
42 directory_dest_path: Path of the cache directory to ensure cleanliness. 47 directory_dest_path: Path of the cache directory to ensure cleanliness.
(...skipping 182 matching lines...) Expand 10 before | Expand all | Expand 10 after
225 230
226 Args: 231 Args:
227 directory_src_path: Path of the cache directory source. 232 directory_src_path: Path of the cache directory source.
228 directory_dest_path: Path of the cache directory destination. 233 directory_dest_path: Path of the cache directory destination.
229 """ 234 """
230 assert os.path.isdir(directory_src_path) 235 assert os.path.isdir(directory_src_path)
231 _EnsureCleanCacheDirectory(directory_dest_path) 236 _EnsureCleanCacheDirectory(directory_dest_path)
232 shutil.copytree(directory_src_path, directory_dest_path) 237 shutil.copytree(directory_src_path, directory_dest_path)
233 238
234 239
240 class CacheBackendError(Exception):
241 def __init__(self, errors):
242 Exception.__init__(self, repr(errors))
243 self.errors = errors
244
245
235 class CacheBackend(object): 246 class CacheBackend(object):
236 """Takes care of reading and deleting cached keys. 247 """Takes care of reading and deleting cached keys.
mattcary 2016/07/01 12:09:29 Add that this can be used as a context manager now
gabadie 2016/07/01 14:10:25 Done.
237 """ 248 """
238 249
239 def __init__(self, cache_directory_path, cache_backend_type): 250 def __init__(self, cache_directory_path, cache_backend_type):
240 """Chrome cache back-end constructor. 251 """Chrome cache back-end constructor.
241 252
242 Args: 253 Args:
243 cache_directory_path: The directory path where the cache is locally 254 cache_directory_path: The directory path where the cache is locally
244 stored. 255 stored.
245 cache_backend_type: A cache back-end type in BACKEND_TYPES. 256 cache_backend_type: A cache back-end type in BACKEND_TYPES.
246 """ 257 """
247 assert os.path.isdir(cache_directory_path) 258 assert os.path.isdir(cache_directory_path)
248 assert cache_backend_type in BACKEND_TYPES 259 assert cache_backend_type in BACKEND_TYPES
249 self._cache_directory_path = cache_directory_path 260 self._cache_directory_path = cache_directory_path
250 self._cache_backend_type = cache_backend_type 261 self._cache_backend_type = cache_backend_type
251 # Make sure cache_directory_path is a valid cache. 262 # Make sure cache_directory_path is a valid cache.
252 self._CachetoolCmd('validate') 263 self._CachetoolCmd('stop')
253 264
254 def GetSize(self): 265 def GetSize(self):
255 """Gets total size of cache entries in bytes.""" 266 """Gets total size of cache entries in bytes."""
256 size = self._CachetoolCmd('get_size') 267 size = self._CachetoolCmd('get_size')
257 return int(size.strip()) 268 return int(size.strip())
258 269
259 def ListKeys(self): 270 def ListKeys(self):
260 """Lists cache's keys. 271 """Lists cache's keys.
261 272
262 Returns: 273 Returns:
263 A list of all keys stored in the cache. 274 A list of all keys stored in the cache.
264 """ 275 """
265 return [k.strip() for k in self._CachetoolCmd('list_keys').split('\n')[:-1]] 276 return [k.strip() for k in self._CachetoolCmd('list_keys').split('\n')[:-2]]
mattcary 2016/07/01 12:09:29 This -2 is a magic constant that's a little worris
gabadie 2016/07/01 14:10:26 Oh this change is because of a impl change in cach
266 277
267 def GetStreamForKey(self, key, index): 278 def GetStreamForKey(self, key, index):
268 """Gets a key's stream. 279 """Gets a key's stream.
269 280
270 Args: 281 Args:
271 key: The key to access the stream. 282 key: The key to access the stream.
272 index: The stream index: 283 index: The stream index:
273 index=0 is the HTTP response header; 284 index=0 is the HTTP response header;
274 index=1 is the transport encoded content; 285 index=1 is the transport encoded content;
275 index=2 is the compiled content. 286 index=2 is the compiled content.
(...skipping 24 matching lines...) Expand all
300 """Runs the cache editor tool and return the stdout. 311 """Runs the cache editor tool and return the stdout.
301 312
302 Args: 313 Args:
303 operation: Cachetool operation. 314 operation: Cachetool operation.
304 args: Additional operation argument to append to the command line. 315 args: Additional operation argument to append to the command line.
305 stdin: String to pipe to the Cachetool's stdin. 316 stdin: String to pipe to the Cachetool's stdin.
306 317
307 Returns: 318 Returns:
308 Cachetool's stdout string. 319 Cachetool's stdout string.
309 """ 320 """
321 args = args or []
310 editor_tool_cmd = [ 322 editor_tool_cmd = [
311 OPTIONS.LocalBinary('cachetool'), 323 OPTIONS.LocalBinary('cachetool'),
312 self._cache_directory_path, 324 self._cache_directory_path,
313 self._cache_backend_type, 325 self._cache_backend_type,
314 operation] 326 operation] + args
315 editor_tool_cmd.extend(args or []) 327 process = subprocess.Popen(editor_tool_cmd, stdout=subprocess.PIPE,
316 process = subprocess.Popen( 328 stderr=subprocess.PIPE, stdin=subprocess.PIPE)
317 editor_tool_cmd, stdout=subprocess.PIPE, stdin=subprocess.PIPE) 329 stdout_data, stderr_data = process.communicate(input=stdin)
318 stdout_data, _ = process.communicate(input=stdin) 330 if process.returncode != 0:
319 assert process.returncode == 0 331 raise CacheBackendError([([operation] + args, stderr_data.strip())])
320 return stdout_data 332 return stdout_data
321 333
322 def UpdateRawResponseHeaders(self, key, raw_headers): 334 def UpdateRawResponseHeaders(self, key, raw_headers):
323 """Updates a key's raw response headers. 335 """Updates a key's raw response headers.
324 336
325 Args: 337 Args:
326 key: The key to modify. 338 key: The key to modify.
327 raw_headers: Raw response headers to set. 339 raw_headers: Raw response headers to set.
328 """ 340 """
329 self._CachetoolCmd('update_raw_headers', [key], stdin=raw_headers) 341 self._CachetoolCmd('update_raw_headers', [key], stdin=raw_headers)
(...skipping 27 matching lines...) Expand all
357 369
358 cmd = [OPTIONS.LocalBinary('content_decoder_tool')] 370 cmd = [OPTIONS.LocalBinary('content_decoder_tool')]
359 cmd.extend([s.strip() for s in content_encoding.split(',')]) 371 cmd.extend([s.strip() for s in content_encoding.split(',')])
360 process = subprocess.Popen(cmd, 372 process = subprocess.Popen(cmd,
361 stdin=subprocess.PIPE, 373 stdin=subprocess.PIPE,
362 stdout=subprocess.PIPE) 374 stdout=subprocess.PIPE)
363 decoded_content, _ = process.communicate(input=encoded_content) 375 decoded_content, _ = process.communicate(input=encoded_content)
364 assert process.returncode == 0 376 assert process.returncode == 0
365 return decoded_content 377 return decoded_content
366 378
379 def __enter__(self):
380 return self
381
382 def __exit__(self, exc_type, exc_val, exc_tb):
383 del exc_type, exc_val, exc_tb
mattcary 2016/07/01 12:09:29 Shouldn't you return True if you're going to suppr
gabadie 2016/07/01 14:10:26 return False (the implicit return None was already
384
385
386 class OnlineCacheBackend(object):
mattcary 2016/07/01 12:09:29 Class comment, including context manager semantics
gabadie 2016/07/01 14:10:26 Done.
387 _INST_IDS = {
388 'stop': 0,
389 'get_size': 1,
390 'list_keys': 2,
391 'get_stream_for_key': 3,
392 'delete_stream': 4,
393 'delete_key': 5,
394 'update_raw_headers': 6
395 }
396
397 def __init__(self, cache_directory_path, cache_backend_type, auto_sync=False):
398 assert os.path.isdir(cache_directory_path)
399 assert cache_backend_type in BACKEND_TYPES
400 self._cache_directory_path = cache_directory_path
401 self._cache_backend_type = cache_backend_type
402 self._in_flight_insts = []
403 self._cachetool_process = None
404 self._cachetool_stdin = None
405 self._cachetool_stdout = None
406 self._auto_sync = auto_sync
407
408 def __enter__(self):
409 self.Start()
410 return self
411
412 def __exit__(self, exc_type, exc_val, exc_tb):
413 del exc_val, exc_tb
414 self.Stop(force_stop=exc_type == CacheBackendError)
mattcary 2016/07/01 12:09:29 return True?
gabadie 2016/07/01 14:10:25 Not really. The point here is to just stop cacheto
mattcary 2016/07/01 14:30:21 I think prefixing the variables with an underscore
gabadie 2016/07/01 14:35:39 The del ... # unused. is a common pattern in chrom
mattcary 2016/07/01 20:24:24 Fair enough, thanks.
415
416 def GetSize(self):
417 self._PushInsts('get_size')
418 self.Sync()
419 return self._UnpackResult('i')[0]
420
421 def ListKeys(self):
422 self._PushInsts('list_keys')
423 self.Sync()
424 keys = []
425 while True:
426 key_size = self._UnpackResult('i')[0]
427 if key_size == 0:
428 break
429 keys.append(self._UnpackResult('{}s'.format(key_size))[0])
430 return keys
431
432 def GetStreamForKey(self, key, index):
433 self._PushInsts('update_raw_headers', str(key), index)
434 self.Sync()
435 stream_size = self._UnpackResult('i')[0]
436 return self._UnpackResult('{}s'.format(stream_size))[0]
437
438 def DeleteStreamForKey(self, key, index):
439 self._PushInsts('delete_stream', str(key), index)
440
441 def DeleteKey(self, key):
442 self._PushInsts('delete_key', str(key))
443
444 def UpdateRawResponseHeaders(self, key, raw_headers):
445 self._PushInsts('update_raw_headers', str(key), raw_headers)
446
447 def Start(self):
448 assert self._cachetool_process == None
449 stdin = os.pipe()
450 stdout = os.pipe()
451 cache_tool_cmd = [
452 OPTIONS.LocalBinary('cachetool'),
453 self._cache_directory_path,
454 self._cache_backend_type,
455 'online']
456 self._cachetool_process = subprocess.Popen(
457 cache_tool_cmd, stdout=subprocess.PIPE, stdin=subprocess.PIPE)
458 os.close(stdin[0])
459 self._cachetool_stdin = stdin[1]
460 self._cachetool_stdout = stdout[0]
461 os.close(stdout[1])
462 assert not self._in_flight_insts
463
464 def Stop(self, force_stop=False):
465 assert self._cachetool_process != None
466 if force_stop:
467 self._cachetool_process.kill()
468 self._cachetool_process.wait()
469 del self._in_flight_insts[:]
470 else:
471 self._PushInsts('stop')
472 self.Sync()
473 self._cachetool_process.wait()
474 assert not self._in_flight_insts
475 assert self._cachetool_process.returncode == 0
476 os.close(self._cachetool_stdin)
477 os.close(self._cachetool_stdout)
478 assert len(self._in_flight_insts) == 0
479 self._cachetool_process = None
480
481 def Sync(self):
482 self._PullInstsResults(len(self._in_flight_insts))
483
484 def _PushInsts(self, inst_name, *args):
485 assert self._cachetool_process != None
486 inst_id = self._INST_IDS[inst_name]
487 inst_code = struct.pack('b', inst_id)
488 for param in args:
489 if type(param) == int:
490 inst_code += struct.pack('i', param)
491 elif type(param) == str:
492 inst_code += struct.pack('i{}s'.format(len(param)), len(param), param)
493 else:
494 assert False, 'Couldn\'t passdown parameter: {}'.format(repr(param))
495 self._cachetool_process.stdin.write(inst_code)
mattcary 2016/07/01 12:09:29 This is dangerous. If the subprocess stdout buffer
gabadie 2016/07/01 14:10:25 Good catch, I always forgot that python buffer stu
mattcary 2016/07/01 14:30:21 Don't system pipes have a finite buffer as well?
gabadie 2016/07/01 14:35:39 Of course they do, but using os.write() allows me
pasko 2016/07/04 16:53:52 just noticed this discussion. Can you explain more
496 self._cachetool_process.stdin.flush()
497 self._in_flight_insts.append([inst_name] + list(args))
498 if self._auto_sync:
499 assert len(self._in_flight_insts) == 1
500 self.Sync()
501
502 def _UnpackResult(self, fmt):
503 buf_size = struct.calcsize(fmt)
504 return struct.unpack(fmt, self._cachetool_process.stdout.read(buf_size))
505
506 def _PullInstsResults(self, count):
507 assert self._cachetool_process != None
508 if count == 0:
509 return
510 assert count <= len(self._in_flight_insts)
511 errors = []
512 for inst_position in xrange(count):
513 status_len = self._UnpackResult('i')[0]
514 if status_len == 0:
515 # print repr(self._in_flight_insts[inst_position]) + ' OK'
516 continue
517 status = self._UnpackResult('{}s'.format(status_len))[0]
518 errors.append((self._in_flight_insts[inst_position], status))
519 del self._in_flight_insts[:count]
520 if errors:
521 raise CacheBackendError(errors)
522
367 523
368 def ApplyUrlWhitelistToCacheArchive(cache_archive_path, 524 def ApplyUrlWhitelistToCacheArchive(cache_archive_path,
369 whitelisted_urls, 525 whitelisted_urls,
370 output_cache_archive_path): 526 output_cache_archive_path):
371 """Generate a new cache archive containing only whitelisted urls. 527 """Generate a new cache archive containing only whitelisted urls.
372 528
373 Args: 529 Args:
374 cache_archive_path: Path of the cache archive to apply the white listing. 530 cache_archive_path: Path of the cache archive to apply the white listing.
375 whitelisted_urls: Set of url to keep in cache. 531 whitelisted_urls: Set of url to keep in cache.
376 output_cache_archive_path: Destination path of cache archive containing only 532 output_cache_archive_path: Destination path of cache archive containing only
377 white-listed urls. 533 white-listed urls.
378 """ 534 """
379 cache_temp_directory = tempfile.mkdtemp(suffix='.cache') 535 cache_temp_directory = tempfile.mkdtemp(suffix='.cache')
380 try: 536 try:
381 UnzipDirectoryContent(cache_archive_path, cache_temp_directory) 537 UnzipDirectoryContent(cache_archive_path, cache_temp_directory)
382 backend = CacheBackend(cache_temp_directory, 'simple') 538 with OnlineCacheBackend(
383 cached_urls = backend.ListKeys() 539 cache_temp_directory, CacheBackendType.Simple) as backend:
384 for cached_url in cached_urls: 540 cached_urls = backend.ListKeys()
385 if cached_url not in whitelisted_urls: 541 for cached_url in cached_urls:
386 backend.DeleteKey(cached_url) 542 if cached_url not in whitelisted_urls:
387 for cached_url in backend.ListKeys(): 543 backend.DeleteKey(cached_url)
388 assert cached_url in whitelisted_urls 544 for cached_url in backend.ListKeys():
545 assert cached_url in whitelisted_urls
389 ZipDirectoryContent(cache_temp_directory, output_cache_archive_path) 546 ZipDirectoryContent(cache_temp_directory, output_cache_archive_path)
390 finally: 547 finally:
391 shutil.rmtree(cache_temp_directory) 548 shutil.rmtree(cache_temp_directory)
392 549
393 550
394 def ManualTestMain(): 551 def ManualTestMain():
395 import argparse 552 import argparse
396 parser = argparse.ArgumentParser(description='Tests cache back-end.') 553 parser = argparse.ArgumentParser(description='Tests cache back-end.')
397 parser.add_argument('cache_archive_path', type=str) 554 parser.add_argument('cache_archive_path', type=str)
398 parser.add_argument('backend_type', type=str, choices=BACKEND_TYPES) 555 parser.add_argument('backend_type', type=str, choices=BACKEND_TYPES)
(...skipping 15 matching lines...) Expand all
414 print '{}\'s HTTP response header:'.format(selected_key) 571 print '{}\'s HTTP response header:'.format(selected_key)
415 print cache_backend.GetStreamForKey(selected_key, 0) 572 print cache_backend.GetStreamForKey(selected_key, 0)
416 print cache_backend.GetDecodedContentForKey(selected_key) 573 print cache_backend.GetDecodedContentForKey(selected_key)
417 cache_backend.DeleteKey(keys[1]) 574 cache_backend.DeleteKey(keys[1])
418 assert keys[1] not in cache_backend.ListKeys() 575 assert keys[1] not in cache_backend.ListKeys()
419 shutil.rmtree(cache_path) 576 shutil.rmtree(cache_path)
420 577
421 578
422 if __name__ == '__main__': 579 if __name__ == '__main__':
423 ManualTestMain() 580 ManualTestMain()
OLDNEW
« no previous file with comments | « no previous file | tools/android/loading/common_util.py » ('j') | tools/android/loading/common_util.py » ('J')

Powered by Google App Engine
This is Rietveld 408576698