Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(463)

Side by Side Diff: tools/android/loading/sandwich_prefetch.py

Issue 2112483002: sandwich: Fixes two sources of KeyError task failures (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 4 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | tools/android/loading/sandwich_utils.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2016 The Chromium Authors. All rights reserved. 1 # Copyright 2016 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 """ 5 """
6 Implements a task builder for benchmarking effects of NoState Prefetch. 6 Implements a task builder for benchmarking effects of NoState Prefetch.
7 Noticeable steps of the task pipeline: 7 Noticeable steps of the task pipeline:
8 * Save a WPR archive 8 * Save a WPR archive
9 * Process the WPR archive to make all resources cacheable 9 * Process the WPR archive to make all resources cacheable
10 * Process cache archive to patch response headers back to their original 10 * Process cache archive to patch response headers back to their original
(...skipping 12 matching lines...) Expand all
23 import shutil 23 import shutil
24 from urlparse import urlparse 24 from urlparse import urlparse
25 25
26 import chrome_cache 26 import chrome_cache
27 import common_util 27 import common_util
28 import loading_trace 28 import loading_trace
29 from prefetch_view import PrefetchSimulationView 29 from prefetch_view import PrefetchSimulationView
30 from request_dependencies_lens import RequestDependencyLens 30 from request_dependencies_lens import RequestDependencyLens
31 import sandwich_metrics 31 import sandwich_metrics
32 import sandwich_runner 32 import sandwich_runner
33 import sandwich_utils
33 import task_manager 34 import task_manager
34 import wpr_backend 35 import wpr_backend
35 36
36 37
37 class Discoverer(object): 38 class Discoverer(object):
38 # Do not prefetch anything. 39 # Do not prefetch anything.
39 EmptyCache = 'empty-cache' 40 EmptyCache = 'empty-cache'
40 41
41 # Prefetches everything to load fully from cache (impossible in practice). 42 # Prefetches everything to load fully from cache (impossible in practice).
42 FullCache = 'full-cache' 43 FullCache = 'full-cache'
(...skipping 149 matching lines...) Expand 10 before | Expand all | Expand 10 after
192 logging.info('number of requests discovered by %s: %d', 193 logging.info('number of requests discovered by %s: %d',
193 subresource_discoverer, len(requests)) 194 subresource_discoverer, len(requests))
194 return requests 195 return requests
195 196
196 197
197 def _PruneOutOriginalNoStoreRequests(original_headers_path, requests): 198 def _PruneOutOriginalNoStoreRequests(original_headers_path, requests):
198 with open(original_headers_path) as file_input: 199 with open(original_headers_path) as file_input:
199 original_headers = json.load(file_input) 200 original_headers = json.load(file_input)
200 pruned_requests = set() 201 pruned_requests = set()
201 for request in requests: 202 for request in requests:
202 request_original_headers = original_headers[request.url] 203 url = sandwich_utils.NormalizeUrl(request.url)
204 if url not in original_headers:
205 # TODO(gabadie): Dig why these requests were not in WPR.
pasko 2016/07/04 15:55:19 nit: s/Dig/Investigate/ yeah, that's a bit surpri
gabadie 2016/07/04 17:03:43 Done.
206 assert request.failed
207 logging.warning(
208 'could not find original headers for: %s (failure: %s)',
209 url, request.error_text)
210 continue
211 request_original_headers = original_headers[url]
203 if ('cache-control' in request_original_headers and 212 if ('cache-control' in request_original_headers and
204 'no-store' in request_original_headers['cache-control'].lower()): 213 'no-store' in request_original_headers['cache-control'].lower()):
205 pruned_requests.add(request) 214 pruned_requests.add(request)
206 return [r for r in requests if r not in pruned_requests] 215 return [r for r in requests if r not in pruned_requests]
207 216
208 217
209 def _ExtractDiscoverableUrls( 218 def _ExtractDiscoverableUrls(
210 original_headers_path, loading_trace_path, subresource_discoverer): 219 original_headers_path, loading_trace_path, subresource_discoverer):
211 """Extracts discoverable resource urls from a loading trace according to a 220 """Extracts discoverable resource urls from a loading trace according to a
212 sub-resource discoverer. 221 sub-resource discoverer.
(...skipping 259 matching lines...) Expand 10 before | Expand all | Expand 10 after
472 logging.info('loading trace: %s', trace_path) 481 logging.info('loading trace: %s', trace_path)
473 trace = loading_trace.LoadingTrace.FromJsonFile(trace_path) 482 trace = loading_trace.LoadingTrace.FromJsonFile(trace_path)
474 483
475 logging.info('verifying trace: %s', trace_path) 484 logging.info('verifying trace: %s', trace_path)
476 run_output_verifier.VerifyTrace(trace) 485 run_output_verifier.VerifyTrace(trace)
477 486
478 logging.info('extracting metrics from trace: %s', trace_path) 487 logging.info('extracting metrics from trace: %s', trace_path)
479 served_from_network_bytes = 0 488 served_from_network_bytes = 0
480 served_from_cache_bytes = 0 489 served_from_cache_bytes = 0
481 urls_hitting_network = set() 490 urls_hitting_network = set()
491 response_sizes = {}
482 for request in _FilterOutDataAndIncompleteRequests( 492 for request in _FilterOutDataAndIncompleteRequests(
483 trace.request_track.GetEvents()): 493 trace.request_track.GetEvents()):
484 # Ignore requests served from the blink's cache. 494 # Ignore requests served from the blink's cache.
485 if request.served_from_cache: 495 if request.served_from_cache:
486 continue 496 continue
487 urls_hitting_network.add(request.url) 497 urls_hitting_network.add(request.url)
488 if request.from_disk_cache: 498 if request.from_disk_cache:
489 served_from_cache_bytes += cached_encoded_data_lengths[request.url] 499 if request.url in cached_encoded_data_lengths:
500 response_size = cached_encoded_data_lengths[request.url]
501 else:
502 # Some fat webpages may overflow the Memory cache, and so some
503 # requests might be served from disk cache couple of times per page
504 # load.
pasko 2016/07/04 15:55:19 can you spit a log message here? It would be nice
gabadie 2016/07/04 17:03:43 Done.
505 response_size = response_sizes[request.url]
506 served_from_cache_bytes += response_size
490 else: 507 else:
491 served_from_network_bytes += request.GetEncodedDataLength() 508 response_size = request.GetEncodedDataLength()
509 served_from_network_bytes += response_size
510 response_sizes[request.url] = response_size
492 511
493 # Make sure the served from blink's cache requests have at least one 512 # Make sure the served from blink's cache requests have at least one
494 # corresponding request that was not served from the blink's cache. 513 # corresponding request that was not served from the blink's cache.
495 for request in _FilterOutDataAndIncompleteRequests( 514 for request in _FilterOutDataAndIncompleteRequests(
496 trace.request_track.GetEvents()): 515 trace.request_track.GetEvents()):
497 assert (request.url in urls_hitting_network or 516 assert (request.url in urls_hitting_network or
498 not request.served_from_cache) 517 not request.served_from_cache)
499 518
500 run_metrics = { 519 run_metrics = {
501 'url': trace.url, 520 'url': trace.url,
(...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after
565 dependencies=[self._common_builder.original_wpr_task]) 584 dependencies=[self._common_builder.original_wpr_task])
566 def BuildPatchedWpr(): 585 def BuildPatchedWpr():
567 common_util.EnsureParentDirectoryExists(BuildPatchedWpr.path) 586 common_util.EnsureParentDirectoryExists(BuildPatchedWpr.path)
568 shutil.copyfile( 587 shutil.copyfile(
569 self._common_builder.original_wpr_task.path, BuildPatchedWpr.path) 588 self._common_builder.original_wpr_task.path, BuildPatchedWpr.path)
570 wpr_archive = wpr_backend.WprArchiveBackend(BuildPatchedWpr.path) 589 wpr_archive = wpr_backend.WprArchiveBackend(BuildPatchedWpr.path)
571 590
572 # Save up original response headers. 591 # Save up original response headers.
573 original_response_headers = {e.url: e.GetResponseHeadersDict() \ 592 original_response_headers = {e.url: e.GetResponseHeadersDict() \
574 for e in wpr_archive.ListUrlEntries()} 593 for e in wpr_archive.ListUrlEntries()}
594 logging.info('save up response headers for %d resources',
595 len(original_response_headers))
596 if not original_response_headers:
597 # TODO(gabadie): How is it possible to not even have the main resource
598 # in the WPR archive?
pasko 2016/07/04 15:55:18 please link to the bug (and preferably the comment
gabadie 2016/07/04 17:03:43 Done.
599 raise sandwich_utils.SandwichKnownError(
600 'Looks like no resources were recorded in WPR during: {}'.format(
601 self._common_builder.original_wpr_task.name))
575 with open(self._original_headers_path, 'w') as file_output: 602 with open(self._original_headers_path, 'w') as file_output:
576 json.dump(original_response_headers, file_output) 603 json.dump(original_response_headers, file_output)
577 604
578 # Patch WPR. 605 # Patch WPR.
579 _PatchWpr(wpr_archive) 606 _PatchWpr(wpr_archive)
580 wpr_archive.Persist() 607 wpr_archive.Persist()
581 608
582 @self.RegisterTask('common/original-cache.zip', [BuildPatchedWpr]) 609 @self.RegisterTask('common/original-cache.zip', [BuildPatchedWpr])
583 def BuildOriginalCache(): 610 def BuildOriginalCache():
584 runner = self._common_builder.CreateSandwichRunner() 611 runner = self._common_builder.CreateSandwichRunner()
(...skipping 111 matching lines...) Expand 10 before | Expand all | Expand 10 after
696 run_metrics_list = _ProcessRunOutputDir( 723 run_metrics_list = _ProcessRunOutputDir(
697 cache_validation_result, benchmark_setup, RunBenchmark.path) 724 cache_validation_result, benchmark_setup, RunBenchmark.path)
698 with open(ProcessRunOutputDir.path, 'w') as csv_file: 725 with open(ProcessRunOutputDir.path, 'w') as csv_file:
699 writer = csv.DictWriter(csv_file, fieldnames=(additional_column_names + 726 writer = csv.DictWriter(csv_file, fieldnames=(additional_column_names +
700 sandwich_metrics.COMMON_CSV_COLUMN_NAMES)) 727 sandwich_metrics.COMMON_CSV_COLUMN_NAMES))
701 writer.writeheader() 728 writer.writeheader()
702 for trace_metrics in run_metrics_list: 729 for trace_metrics in run_metrics_list:
703 writer.writerow(trace_metrics) 730 writer.writerow(trace_metrics)
704 731
705 self._common_builder.default_final_tasks.append(ProcessRunOutputDir) 732 self._common_builder.default_final_tasks.append(ProcessRunOutputDir)
OLDNEW
« no previous file with comments | « no previous file | tools/android/loading/sandwich_utils.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698