Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(3)

Side by Side Diff: tools/android/loading/chrome_cache.py

Issue 2112013003: sandwich: Use cachetool's batch mode to speed-up cache processing. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@af00
Patch Set: s/Online/Batch Created 4 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | tools/android/loading/request_track.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2016 The Chromium Authors. All rights reserved. 1 # Copyright 2016 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 """Takes care of manipulating the chrome's HTTP cache. 5 """Takes care of manipulating the chrome's HTTP cache.
6 """ 6 """
7 7
8 from datetime import datetime 8 from datetime import datetime
9 import errno
10 import fcntl
9 import json 11 import json
10 import os 12 import os
11 import re 13 import re
12 import shutil 14 import shutil
15 import struct
13 import subprocess 16 import subprocess
14 import sys 17 import sys
15 import tempfile 18 import tempfile
19 import time
16 import zipfile 20 import zipfile
17 21
18 _SRC_DIR = os.path.abspath(os.path.join( 22 _SRC_DIR = os.path.abspath(os.path.join(
19 os.path.dirname(__file__), '..', '..', '..')) 23 os.path.dirname(__file__), '..', '..', '..'))
20 24
21 sys.path.append(os.path.join(_SRC_DIR, 'build', 'android')) 25 sys.path.append(os.path.join(_SRC_DIR, 'build', 'android'))
22 from pylib import constants 26 from pylib import constants
23 27
24 import device_setup 28 import device_setup
25 import options 29 import options
26 30
27 31
28 OPTIONS = options.OPTIONS 32 OPTIONS = options.OPTIONS
29 33
30 34
35 class CacheBackendType(object):
36 SIMPLE = 'simple'
37 BLOCKFILE = 'blockfile'
38
31 # Cache back-end types supported by cachetool. 39 # Cache back-end types supported by cachetool.
32 BACKEND_TYPES = {'simple', 'blockfile'} 40 BACKEND_TYPES = {CacheBackendType.SIMPLE, CacheBackendType.BLOCKFILE}
33 41
34 # Regex used to parse HTTP headers line by line. 42 # Regex used to parse HTTP headers line by line.
35 HEADER_PARSING_REGEX = re.compile(r'^(?P<header>\S+):(?P<value>.*)$') 43 HEADER_PARSING_REGEX = re.compile(r'^(?P<header>\S+):(?P<value>.*)$')
36 44
37 45
38 def _EnsureCleanCacheDirectory(directory_dest_path): 46 def _EnsureCleanCacheDirectory(directory_dest_path):
39 """Ensure that a cache directory is created and clean. 47 """Ensure that a cache directory is created and clean.
40 48
41 Args: 49 Args:
42 directory_dest_path: Path of the cache directory to ensure cleanliness. 50 directory_dest_path: Path of the cache directory to ensure cleanliness.
(...skipping 182 matching lines...) Expand 10 before | Expand all | Expand 10 after
225 233
226 Args: 234 Args:
227 directory_src_path: Path of the cache directory source. 235 directory_src_path: Path of the cache directory source.
228 directory_dest_path: Path of the cache directory destination. 236 directory_dest_path: Path of the cache directory destination.
229 """ 237 """
230 assert os.path.isdir(directory_src_path) 238 assert os.path.isdir(directory_src_path)
231 _EnsureCleanCacheDirectory(directory_dest_path) 239 _EnsureCleanCacheDirectory(directory_dest_path)
232 shutil.copytree(directory_src_path, directory_dest_path) 240 shutil.copytree(directory_src_path, directory_dest_path)
233 241
234 242
235 class CacheBackend(object): 243 class CacheBackendError(Exception):
236 """Takes care of reading and deleting cached keys. 244 def __init__(self, errors):
237 """ 245 Exception.__init__(self, repr(errors))
246 self.errors = errors
247
248
249 class CacheBackendBase(object):
250 """Takes care of reading and deleting cached keys."""
238 251
239 def __init__(self, cache_directory_path, cache_backend_type): 252 def __init__(self, cache_directory_path, cache_backend_type):
240 """Chrome cache back-end constructor. 253 """Chrome cache back-end constructor.
241 254
242 Args: 255 Args:
243 cache_directory_path: The directory path where the cache is locally 256 cache_directory_path: The directory path where the cache is locally
244 stored. 257 stored.
245 cache_backend_type: A cache back-end type in BACKEND_TYPES. 258 cache_backend_type: A cache back-end type in BACKEND_TYPES.
246 """ 259 """
247 assert os.path.isdir(cache_directory_path) 260 assert os.path.isdir(cache_directory_path)
248 assert cache_backend_type in BACKEND_TYPES 261 assert cache_backend_type in BACKEND_TYPES
249 self._cache_directory_path = cache_directory_path 262 self._cache_directory_path = cache_directory_path
250 self._cache_backend_type = cache_backend_type 263 self._cache_backend_type = cache_backend_type
251 # Make sure cache_directory_path is a valid cache.
252 self._CachetoolCmd('validate')
253 264
254 def GetSize(self): 265 def GetSize(self):
255 """Gets total size of cache entries in bytes.""" 266 """Gets total size of cache entries in bytes."""
256 size = self._CachetoolCmd('get_size') 267 raise NotImplementedError
257 return int(size.strip())
258 268
259 def ListKeys(self): 269 def ListKeys(self):
260 """Lists cache's keys. 270 """Lists cache's keys.
261 271
262 Returns: 272 Returns:
263 A list of all keys stored in the cache. 273 A list of all keys stored in the cache.
264 """ 274 """
265 return [k.strip() for k in self._CachetoolCmd('list_keys').split('\n')[:-1]] 275 raise NotImplementedError
266 276
267 def GetStreamForKey(self, key, index): 277 def GetStreamForKey(self, key, index):
268 """Gets a key's stream. 278 """Gets a key's stream.
269 279
270 Args: 280 Args:
271 key: The key to access the stream. 281 key: The key to access the stream.
272 index: The stream index: 282 index: The stream index:
273 index=0 is the HTTP response header; 283 index=0 is the HTTP response header;
274 index=1 is the transport encoded content; 284 index=1 is the transport encoded content;
275 index=2 is the compiled content. 285 index=2 is the compiled content.
276 286
277 Returns: 287 Returns:
278 String holding stream binary content. 288 String holding stream binary content.
279 """ 289 """
280 return self._CachetoolCmd('get_stream', [key, str(index)]) 290 raise NotImplementedError
281 291
282 def DeleteStreamForKey(self, key, index): 292 def DeleteStreamForKey(self, key, index):
283 """Delete a key's stream. 293 """Delete a key's stream.
284 294
285 Args: 295 Args:
286 key: The key to access the stream. 296 key: The key to access the stream.
287 index: The stream index 297 index: The stream index
288 """ 298 """
289 self._CachetoolCmd('delete_stream', [key, str(index)]) 299 raise NotImplementedError
290 300
291 def DeleteKey(self, key): 301 def DeleteKey(self, key):
292 """Deletes a key from the cache. 302 """Deletes a key from the cache.
293 303
294 Args: 304 Args:
295 key: The key delete. 305 key: The key delete.
296 """ 306 """
297 self._CachetoolCmd('delete_key', [key]) 307 raise NotImplementedError
298
299 def _CachetoolCmd(self, operation, args=None, stdin=''):
300 """Runs the cache editor tool and return the stdout.
301
302 Args:
303 operation: Cachetool operation.
304 args: Additional operation argument to append to the command line.
305 stdin: String to pipe to the Cachetool's stdin.
306
307 Returns:
308 Cachetool's stdout string.
309 """
310 editor_tool_cmd = [
311 OPTIONS.LocalBinary('cachetool'),
312 self._cache_directory_path,
313 self._cache_backend_type,
314 operation]
315 editor_tool_cmd.extend(args or [])
316 process = subprocess.Popen(
317 editor_tool_cmd, stdout=subprocess.PIPE, stdin=subprocess.PIPE)
318 stdout_data, _ = process.communicate(input=stdin)
319 assert process.returncode == 0
320 return stdout_data
321 308
322 def UpdateRawResponseHeaders(self, key, raw_headers): 309 def UpdateRawResponseHeaders(self, key, raw_headers):
323 """Updates a key's raw response headers. 310 """Updates a key's raw response headers.
324 311
325 Args: 312 Args:
326 key: The key to modify. 313 key: The key to modify.
327 raw_headers: Raw response headers to set. 314 raw_headers: Raw response headers to set.
328 """ 315 """
329 self._CachetoolCmd('update_raw_headers', [key], stdin=raw_headers) 316 self._CachetoolCmd('update_raw_headers', [key], stdin=raw_headers)
330 317
(...skipping 26 matching lines...) Expand all
357 344
358 cmd = [OPTIONS.LocalBinary('content_decoder_tool')] 345 cmd = [OPTIONS.LocalBinary('content_decoder_tool')]
359 cmd.extend([s.strip() for s in content_encoding.split(',')]) 346 cmd.extend([s.strip() for s in content_encoding.split(',')])
360 process = subprocess.Popen(cmd, 347 process = subprocess.Popen(cmd,
361 stdin=subprocess.PIPE, 348 stdin=subprocess.PIPE,
362 stdout=subprocess.PIPE) 349 stdout=subprocess.PIPE)
363 decoded_content, _ = process.communicate(input=encoded_content) 350 decoded_content, _ = process.communicate(input=encoded_content)
364 assert process.returncode == 0 351 assert process.returncode == 0
365 return decoded_content 352 return decoded_content
366 353
354 def ProcessBatch(self):
355 """No-op for compatibility with BatchCacheBackend."""
356 pass
357
358
359 class CacheBackend(CacheBackendBase):
360 """Takes care of manipulating cache directories. Can be used as a context
361 manager to be seamlessly compatible with BatchCacheBackend.
362
363 Each method issue a command line invocation of cachetool.
364 """
365
366 def __init__(self, cache_directory_path, cache_backend_type):
367 """Chrome cache back-end constructor.
368
369 Args:
370 cache_directory_path: As in for CacheBackendBase.__init__
371 cache_backend_type: As in for CacheBackendBase.__init__
372 """
373 CacheBackendBase.__init__(self, cache_directory_path, cache_backend_type)
374 # Make sure cache_directory_path is a valid cache.
375 self._CachetoolCmd('stop')
376
377 def GetSize(self):
378 """Implements CacheBackendBase.GetSize()."""
379 size = self._CachetoolCmd('get_size')
380 return int(size.strip())
381
382 def ListKeys(self):
383 """Implements CacheBackendBase.ListKeys()."""
384 out_lines = self._CachetoolCmd('list_keys').split('\n')
385 # cachetool finishes the list of keys with '\n\n'.
386 assert out_lines[-2:] == ['', '']
387 return [k.strip() for k in out_lines[:-2]]
388
389 def GetStreamForKey(self, key, index):
390 """Implements CacheBackendBase.GetStreamForKey()."""
391 return self._CachetoolCmd('get_stream', [key, str(index)])
392
393 def DeleteStreamForKey(self, key, index):
394 """Implements CacheBackendBase.DeleteStreamForKey()."""
395 self._CachetoolCmd('delete_stream', [key, str(index)])
396
397 def DeleteKey(self, key):
398 """Implements CacheBackendBase.DeleteKey()."""
399 self._CachetoolCmd('delete_key', [key])
400
401 def UpdateRawResponseHeaders(self, key, raw_headers):
402 """Implements CacheBackendBase.UpdateRawResponseHeaders()."""
403 self._CachetoolCmd('update_raw_headers', [key], stdin=raw_headers)
404
405 def _CachetoolCmd(self, operation, args=None, stdin=''):
406 """Runs the cache editor tool and return the stdout.
407
408 Args:
409 operation: Cachetool operation.
410 args: Additional operation argument to append to the command line.
411 stdin: String to pipe to the Cachetool's stdin.
412
413 Returns:
414 Cachetool's stdout string.
415 """
416 args = args or []
417 editor_tool_cmd = [
418 OPTIONS.LocalBinary('cachetool'),
419 self._cache_directory_path,
420 self._cache_backend_type,
421 operation] + args
422 process = subprocess.Popen(editor_tool_cmd, stdout=subprocess.PIPE,
423 stderr=subprocess.PIPE, stdin=subprocess.PIPE)
424 stdout_data, stderr_data = process.communicate(input=stdin)
425 if process.returncode != 0:
426 raise CacheBackendError([([operation] + args, stderr_data.strip())])
427 return stdout_data
428
429
430 class BatchCacheBackend(CacheBackendBase):
431 """Takes care of manipulating cache directories efficiently using the
432 cachetool's online mode.
433 """
434 _INST_IDS = {
435 'stop': 0,
436 'get_size': 1,
437 'list_keys': 2,
438 'get_stream_for_key': 3,
439 'delete_stream': 4,
440 'delete_key': 5,
441 'update_raw_headers': 6
442 }
443
444 def __init__(self, cache_directory_path, cache_backend_type):
445 """Chrome cache back-end constructor.
446
447 Args:
448 cache_directory_path: As in for CacheBackendBase.__init__
449 cache_backend_type: As in for CacheBackendBase.__init__
450 """
451 CacheBackendBase.__init__(self, cache_directory_path, cache_backend_type)
452 self._in_flight_insts = []
453 self._enqueued_compiled_insts = b''
454 self._compiled_results = b''
455 self._compiled_result_cursor = 0
456
457 def GetSize(self):
458 """Implements CacheBackendBase.GetSize()."""
459 self._PushInst('get_size')
460 self.ProcessBatch()
461 return self._UnpackResult('i')[0]
462
463 def ListKeys(self):
464 """Implements CacheBackendBase.GetSize()."""
465 self._PushInst('list_keys')
466 self.ProcessBatch()
467 keys = []
468 while True:
469 key_size = self._UnpackResult('i')[0]
470 if key_size == 0:
471 break
472 keys.append(self._UnpackResult('{}s'.format(key_size))[0])
473 return keys
474
475 def GetStreamForKey(self, key, index):
476 """Implements CacheBackendBase.GetSize()."""
477 self._PushInst('update_raw_headers', str(key), index)
478 self.ProcessBatch()
479 stream_size = self._UnpackResult('i')[0]
480 return self._UnpackResult('{}s'.format(stream_size))[0]
481
482 def DeleteStreamForKey(self, key, index):
483 """Implements CacheBackendBase.DeleteStreamForKey()."""
484 self._PushInst('delete_stream', str(key), index)
485
486 def DeleteKey(self, key):
487 """Implements CacheBackendBase.DeleteKey()."""
488 self._PushInst('delete_key', str(key))
489
490 def UpdateRawResponseHeaders(self, key, raw_headers):
491 """Implements CacheBackendBase.UpdateRawResponseHeaders()."""
492 self._PushInst('update_raw_headers', str(key), raw_headers)
493
494 def ProcessBatch(self):
495 """Overrides CacheBackendBase.ProcessBatch()."""
496 cache_tool_cmd = [
497 OPTIONS.LocalBinary('cachetool'),
498 self._cache_directory_path,
499 self._cache_backend_type,
500 'batch']
501 cachetool_process = subprocess.Popen(
502 cache_tool_cmd, stdout=subprocess.PIPE, stdin=subprocess.PIPE)
503 self._PushInst('stop')
504 self._compiled_result_cursor = 0
505 self._compiled_results, _ = cachetool_process.communicate(
506 input=self._enqueued_compiled_insts)
507 errors = []
508 for inst in self._in_flight_insts[:-1]:
509 status_len = self._UnpackResult('i')[0]
510 if status_len == 0:
511 continue
512 status = self._UnpackResult('{}s'.format(status_len))[0]
513 errors.append((inst, status))
514 del self._in_flight_insts[:]
515 self._enqueued_compiled_insts = b''
516 if errors:
517 raise CacheBackendError(errors)
518
519 def _PushInst(self, inst_name, *args):
520 inst_id = self._INST_IDS[inst_name]
521 inst_code = struct.pack('b', inst_id)
522 for param in args:
523 if type(param) == int:
524 inst_code += struct.pack('i', param)
525 elif type(param) == str:
526 inst_code += struct.pack('i{}s'.format(len(param)), len(param), param)
527 else:
528 assert False, 'Couldn\'t passdown parameter: {}'.format(repr(param))
529 self._enqueued_compiled_insts += inst_code
530 self._in_flight_insts.append([inst_name] + list(args))
531
532 def _UnpackResult(self, fmt):
533 buf_size = struct.calcsize(fmt)
534 assert (
535 self._compiled_result_cursor + buf_size <= len(self._compiled_results))
536 buf = self._compiled_results[
537 self._compiled_result_cursor:self._compiled_result_cursor + buf_size]
538 self._compiled_result_cursor += buf_size
539 return struct.unpack(fmt, buf)
540
367 541
368 def ApplyUrlWhitelistToCacheArchive(cache_archive_path, 542 def ApplyUrlWhitelistToCacheArchive(cache_archive_path,
369 whitelisted_urls, 543 whitelisted_urls,
370 output_cache_archive_path): 544 output_cache_archive_path):
371 """Generate a new cache archive containing only whitelisted urls. 545 """Generate a new cache archive containing only whitelisted urls.
372 546
373 Args: 547 Args:
374 cache_archive_path: Path of the cache archive to apply the white listing. 548 cache_archive_path: Path of the cache archive to apply the white listing.
375 whitelisted_urls: Set of url to keep in cache. 549 whitelisted_urls: Set of url to keep in cache.
376 output_cache_archive_path: Destination path of cache archive containing only 550 output_cache_archive_path: Destination path of cache archive containing only
377 white-listed urls. 551 white-listed urls.
378 """ 552 """
379 cache_temp_directory = tempfile.mkdtemp(suffix='.cache') 553 cache_temp_directory = tempfile.mkdtemp(suffix='.cache')
380 try: 554 try:
381 UnzipDirectoryContent(cache_archive_path, cache_temp_directory) 555 UnzipDirectoryContent(cache_archive_path, cache_temp_directory)
382 backend = CacheBackend(cache_temp_directory, 'simple') 556 backend = BatchCacheBackend(
557 cache_temp_directory, CacheBackendType.SIMPLE)
383 cached_urls = backend.ListKeys() 558 cached_urls = backend.ListKeys()
384 for cached_url in cached_urls: 559 for cached_url in cached_urls:
385 if cached_url not in whitelisted_urls: 560 if cached_url not in whitelisted_urls:
386 backend.DeleteKey(cached_url) 561 backend.DeleteKey(cached_url)
387 for cached_url in backend.ListKeys(): 562 for cached_url in backend.ListKeys():
388 assert cached_url in whitelisted_urls 563 assert cached_url in whitelisted_urls
564 backend.ProcessBatch()
389 ZipDirectoryContent(cache_temp_directory, output_cache_archive_path) 565 ZipDirectoryContent(cache_temp_directory, output_cache_archive_path)
390 finally: 566 finally:
391 shutil.rmtree(cache_temp_directory) 567 shutil.rmtree(cache_temp_directory)
392 568
393 569
394 def ManualTestMain(): 570 def ManualTestMain():
395 import argparse 571 import argparse
396 parser = argparse.ArgumentParser(description='Tests cache back-end.') 572 parser = argparse.ArgumentParser(description='Tests cache back-end.')
397 parser.add_argument('cache_archive_path', type=str) 573 parser.add_argument('cache_archive_path', type=str)
398 parser.add_argument('backend_type', type=str, choices=BACKEND_TYPES) 574 parser.add_argument('backend_type', type=str, choices=BACKEND_TYPES)
(...skipping 15 matching lines...) Expand all
414 print '{}\'s HTTP response header:'.format(selected_key) 590 print '{}\'s HTTP response header:'.format(selected_key)
415 print cache_backend.GetStreamForKey(selected_key, 0) 591 print cache_backend.GetStreamForKey(selected_key, 0)
416 print cache_backend.GetDecodedContentForKey(selected_key) 592 print cache_backend.GetDecodedContentForKey(selected_key)
417 cache_backend.DeleteKey(keys[1]) 593 cache_backend.DeleteKey(keys[1])
418 assert keys[1] not in cache_backend.ListKeys() 594 assert keys[1] not in cache_backend.ListKeys()
419 shutil.rmtree(cache_path) 595 shutil.rmtree(cache_path)
420 596
421 597
422 if __name__ == '__main__': 598 if __name__ == '__main__':
423 ManualTestMain() 599 ManualTestMain()
OLDNEW
« no previous file with comments | « no previous file | tools/android/loading/request_track.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698