Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(117)

Side by Side Diff: tools/android/loading/sandwich_prefetch.py

Issue 2112483002: sandwich: Fixes two sources of KeyError task failures (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Addresses Egor's comments Created 4 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Copyright 2016 The Chromium Authors. All rights reserved. 1 # Copyright 2016 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 """ 5 """
6 Implements a task builder for benchmarking effects of NoState Prefetch. 6 Implements a task builder for benchmarking effects of NoState Prefetch.
7 Noticeable steps of the task pipeline: 7 Noticeable steps of the task pipeline:
8 * Save a WPR archive 8 * Save a WPR archive
9 * Process the WPR archive to make all resources cacheable 9 * Process the WPR archive to make all resources cacheable
10 * Process cache archive to patch response headers back to their original 10 * Process cache archive to patch response headers back to their original
(...skipping 12 matching lines...) Expand all
23 import shutil 23 import shutil
24 from urlparse import urlparse 24 from urlparse import urlparse
25 25
26 import chrome_cache 26 import chrome_cache
27 import common_util 27 import common_util
28 import loading_trace 28 import loading_trace
29 from prefetch_view import PrefetchSimulationView 29 from prefetch_view import PrefetchSimulationView
30 from request_dependencies_lens import RequestDependencyLens 30 from request_dependencies_lens import RequestDependencyLens
31 import sandwich_metrics 31 import sandwich_metrics
32 import sandwich_runner 32 import sandwich_runner
33 import sandwich_utils
33 import task_manager 34 import task_manager
34 import wpr_backend 35 import wpr_backend
35 36
36 37
37 class Discoverer(object): 38 class Discoverer(object):
38 # Do not prefetch anything. 39 # Do not prefetch anything.
39 EmptyCache = 'empty-cache' 40 EmptyCache = 'empty-cache'
40 41
41 # Prefetches everything to load fully from cache (impossible in practice). 42 # Prefetches everything to load fully from cache (impossible in practice).
42 FullCache = 'full-cache' 43 FullCache = 'full-cache'
(...skipping 151 matching lines...) Expand 10 before | Expand all | Expand 10 after
194 logging.info('number of requests discovered by %s: %d', 195 logging.info('number of requests discovered by %s: %d',
195 subresource_discoverer, len(requests)) 196 subresource_discoverer, len(requests))
196 return requests 197 return requests
197 198
198 199
199 def _PruneOutOriginalNoStoreRequests(original_headers_path, requests): 200 def _PruneOutOriginalNoStoreRequests(original_headers_path, requests):
200 with open(original_headers_path) as file_input: 201 with open(original_headers_path) as file_input:
201 original_headers = json.load(file_input) 202 original_headers = json.load(file_input)
202 pruned_requests = set() 203 pruned_requests = set()
203 for request in requests: 204 for request in requests:
204 request_original_headers = original_headers[request.url] 205 url = sandwich_utils.NormalizeUrl(request.url)
206 if url not in original_headers:
207 # TODO(gabadie): Investigate why these requests were not in WPR.
208 assert request.failed
209 logging.warning(
210 'could not find original headers for: %s (failure: %s)',
211 url, request.error_text)
212 continue
213 request_original_headers = original_headers[url]
205 if ('cache-control' in request_original_headers and 214 if ('cache-control' in request_original_headers and
206 'no-store' in request_original_headers['cache-control'].lower()): 215 'no-store' in request_original_headers['cache-control'].lower()):
207 pruned_requests.add(request) 216 pruned_requests.add(request)
208 return [r for r in requests if r not in pruned_requests] 217 return [r for r in requests if r not in pruned_requests]
209 218
210 219
211 def _ExtractDiscoverableUrls( 220 def _ExtractDiscoverableUrls(
212 original_headers_path, loading_trace_path, subresource_discoverer): 221 original_headers_path, loading_trace_path, subresource_discoverer):
213 """Extracts discoverable resource urls from a loading trace according to a 222 """Extracts discoverable resource urls from a loading trace according to a
214 sub-resource discoverer. 223 sub-resource discoverer.
(...skipping 259 matching lines...) Expand 10 before | Expand all | Expand 10 after
474 logging.info('loading trace: %s', trace_path) 483 logging.info('loading trace: %s', trace_path)
475 trace = loading_trace.LoadingTrace.FromJsonFile(trace_path) 484 trace = loading_trace.LoadingTrace.FromJsonFile(trace_path)
476 485
477 logging.info('verifying trace: %s', trace_path) 486 logging.info('verifying trace: %s', trace_path)
478 run_output_verifier.VerifyTrace(trace) 487 run_output_verifier.VerifyTrace(trace)
479 488
480 logging.info('extracting metrics from trace: %s', trace_path) 489 logging.info('extracting metrics from trace: %s', trace_path)
481 served_from_network_bytes = 0 490 served_from_network_bytes = 0
482 served_from_cache_bytes = 0 491 served_from_cache_bytes = 0
483 urls_hitting_network = set() 492 urls_hitting_network = set()
493 response_sizes = {}
484 for request in _FilterOutDataAndIncompleteRequests( 494 for request in _FilterOutDataAndIncompleteRequests(
485 trace.request_track.GetEvents()): 495 trace.request_track.GetEvents()):
486 # Ignore requests served from the blink's cache. 496 # Ignore requests served from the blink's cache.
487 if request.served_from_cache: 497 if request.served_from_cache:
488 continue 498 continue
489 urls_hitting_network.add(request.url) 499 urls_hitting_network.add(request.url)
490 if request.from_disk_cache: 500 if request.from_disk_cache:
491 served_from_cache_bytes += cached_encoded_data_lengths[request.url] 501 if request.url in cached_encoded_data_lengths:
502 response_size = cached_encoded_data_lengths[request.url]
503 else:
504 # Some fat webpages may overflow the Memory cache, and so some
505 # requests might be served from disk cache couple of times per page
506 # load.
507 logging.warning('Looks like could be served from memory cache: %s',
508 request.url)
509 response_size = response_sizes[request.url]
510 served_from_cache_bytes += response_size
492 else: 511 else:
493 served_from_network_bytes += request.GetEncodedDataLength() 512 response_size = request.GetEncodedDataLength()
513 served_from_network_bytes += response_size
514 response_sizes[request.url] = response_size
494 515
495 # Make sure the served from blink's cache requests have at least one 516 # Make sure the served from blink's cache requests have at least one
496 # corresponding request that was not served from the blink's cache. 517 # corresponding request that was not served from the blink's cache.
497 for request in _FilterOutDataAndIncompleteRequests( 518 for request in _FilterOutDataAndIncompleteRequests(
498 trace.request_track.GetEvents()): 519 trace.request_track.GetEvents()):
499 assert (request.url in urls_hitting_network or 520 assert (request.url in urls_hitting_network or
500 not request.served_from_cache) 521 not request.served_from_cache)
501 522
502 run_metrics = { 523 run_metrics = {
503 'url': trace.url, 524 'url': trace.url,
(...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after
567 dependencies=[self._common_builder.original_wpr_task]) 588 dependencies=[self._common_builder.original_wpr_task])
568 def BuildPatchedWpr(): 589 def BuildPatchedWpr():
569 common_util.EnsureParentDirectoryExists(BuildPatchedWpr.path) 590 common_util.EnsureParentDirectoryExists(BuildPatchedWpr.path)
570 shutil.copyfile( 591 shutil.copyfile(
571 self._common_builder.original_wpr_task.path, BuildPatchedWpr.path) 592 self._common_builder.original_wpr_task.path, BuildPatchedWpr.path)
572 wpr_archive = wpr_backend.WprArchiveBackend(BuildPatchedWpr.path) 593 wpr_archive = wpr_backend.WprArchiveBackend(BuildPatchedWpr.path)
573 594
574 # Save up original response headers. 595 # Save up original response headers.
575 original_response_headers = {e.url: e.GetResponseHeadersDict() \ 596 original_response_headers = {e.url: e.GetResponseHeadersDict() \
576 for e in wpr_archive.ListUrlEntries()} 597 for e in wpr_archive.ListUrlEntries()}
598 logging.info('save up response headers for %d resources',
599 len(original_response_headers))
600 if not original_response_headers:
601 # TODO(gabadie): How is it possible to not even have the main resource
602 # in the WPR archive? crbug.com/623966#c5
pasko 2016/07/04 18:05:57 Example URL can be found in: http://crbug.com/6239
gabadie 2016/07/06 08:57:55 Done.
603 raise sandwich_utils.SandwichKnownError(
604 'Looks like no resources were recorded in WPR during: {}'.format(
605 self._common_builder.original_wpr_task.name))
577 with open(self._original_headers_path, 'w') as file_output: 606 with open(self._original_headers_path, 'w') as file_output:
578 json.dump(original_response_headers, file_output) 607 json.dump(original_response_headers, file_output)
579 608
580 # Patch WPR. 609 # Patch WPR.
581 _PatchWpr(wpr_archive) 610 _PatchWpr(wpr_archive)
582 wpr_archive.Persist() 611 wpr_archive.Persist()
583 612
584 @self.RegisterTask('common/original-cache.zip', [BuildPatchedWpr]) 613 @self.RegisterTask('common/original-cache.zip', [BuildPatchedWpr])
585 def BuildOriginalCache(): 614 def BuildOriginalCache():
586 runner = self._common_builder.CreateSandwichRunner() 615 runner = self._common_builder.CreateSandwichRunner()
(...skipping 111 matching lines...) Expand 10 before | Expand all | Expand 10 after
698 run_metrics_list = _ProcessRunOutputDir( 727 run_metrics_list = _ProcessRunOutputDir(
699 cache_validation_result, benchmark_setup, RunBenchmark.path) 728 cache_validation_result, benchmark_setup, RunBenchmark.path)
700 with open(ProcessRunOutputDir.path, 'w') as csv_file: 729 with open(ProcessRunOutputDir.path, 'w') as csv_file:
701 writer = csv.DictWriter(csv_file, fieldnames=(additional_column_names + 730 writer = csv.DictWriter(csv_file, fieldnames=(additional_column_names +
702 sandwich_metrics.COMMON_CSV_COLUMN_NAMES)) 731 sandwich_metrics.COMMON_CSV_COLUMN_NAMES))
703 writer.writeheader() 732 writer.writeheader()
704 for trace_metrics in run_metrics_list: 733 for trace_metrics in run_metrics_list:
705 writer.writerow(trace_metrics) 734 writer.writerow(trace_metrics)
706 735
707 self._common_builder.default_final_tasks.append(ProcessRunOutputDir) 736 self._common_builder.default_final_tasks.append(ProcessRunOutputDir)
OLDNEW
« no previous file with comments | « no previous file | tools/android/loading/sandwich_utils.py » ('j') | tools/android/loading/sandwich_utils.py » ('J')

Powered by Google App Engine
This is Rietveld 408576698