OLD | NEW |
---|---|
1 # Copyright 2016 The Chromium Authors. All rights reserved. | 1 # Copyright 2016 The Chromium Authors. All rights reserved. |
2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 | 4 |
5 """ | 5 """ |
6 Implements a task builder for benchmarking effects of NoState Prefetch. | 6 Implements a task builder for benchmarking effects of NoState Prefetch. |
7 Noticeable steps of the task pipeline: | 7 Noticeable steps of the task pipeline: |
8 * Save a WPR archive | 8 * Save a WPR archive |
9 * Process the WPR archive to make all resources cacheable | 9 * Process the WPR archive to make all resources cacheable |
10 * Process cache archive to patch response headers back to their original | 10 * Process cache archive to patch response headers back to their original |
(...skipping 12 matching lines...) Expand all Loading... | |
23 import shutil | 23 import shutil |
24 from urlparse import urlparse | 24 from urlparse import urlparse |
25 | 25 |
26 import chrome_cache | 26 import chrome_cache |
27 import common_util | 27 import common_util |
28 import loading_trace | 28 import loading_trace |
29 from prefetch_view import PrefetchSimulationView | 29 from prefetch_view import PrefetchSimulationView |
30 from request_dependencies_lens import RequestDependencyLens | 30 from request_dependencies_lens import RequestDependencyLens |
31 import sandwich_metrics | 31 import sandwich_metrics |
32 import sandwich_runner | 32 import sandwich_runner |
33 import sandwich_utils | |
33 import task_manager | 34 import task_manager |
34 import wpr_backend | 35 import wpr_backend |
35 | 36 |
36 | 37 |
37 class Discoverer(object): | 38 class Discoverer(object): |
38 # Do not prefetch anything. | 39 # Do not prefetch anything. |
39 EmptyCache = 'empty-cache' | 40 EmptyCache = 'empty-cache' |
40 | 41 |
41 # Prefetches everything to load fully from cache (impossible in practice). | 42 # Prefetches everything to load fully from cache (impossible in practice). |
42 FullCache = 'full-cache' | 43 FullCache = 'full-cache' |
(...skipping 149 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
192 logging.info('number of requests discovered by %s: %d', | 193 logging.info('number of requests discovered by %s: %d', |
193 subresource_discoverer, len(requests)) | 194 subresource_discoverer, len(requests)) |
194 return requests | 195 return requests |
195 | 196 |
196 | 197 |
197 def _PruneOutOriginalNoStoreRequests(original_headers_path, requests): | 198 def _PruneOutOriginalNoStoreRequests(original_headers_path, requests): |
198 with open(original_headers_path) as file_input: | 199 with open(original_headers_path) as file_input: |
199 original_headers = json.load(file_input) | 200 original_headers = json.load(file_input) |
200 pruned_requests = set() | 201 pruned_requests = set() |
201 for request in requests: | 202 for request in requests: |
202 request_original_headers = original_headers[request.url] | 203 url = sandwich_utils.NormalizeUrl(request.url) |
204 if url not in original_headers: | |
205 # TODO(gabadie): Dig why these requests were not in WPR. | |
pasko
2016/07/04 15:55:19
nit: s/Dig/Investigate/
yeah, that's a bit surpri
gabadie
2016/07/04 17:03:43
Done.
| |
206 assert request.failed | |
207 logging.warning( | |
208 'could not find original headers for: %s (failure: %s)', | |
209 url, request.error_text) | |
210 continue | |
211 request_original_headers = original_headers[url] | |
203 if ('cache-control' in request_original_headers and | 212 if ('cache-control' in request_original_headers and |
204 'no-store' in request_original_headers['cache-control'].lower()): | 213 'no-store' in request_original_headers['cache-control'].lower()): |
205 pruned_requests.add(request) | 214 pruned_requests.add(request) |
206 return [r for r in requests if r not in pruned_requests] | 215 return [r for r in requests if r not in pruned_requests] |
207 | 216 |
208 | 217 |
209 def _ExtractDiscoverableUrls( | 218 def _ExtractDiscoverableUrls( |
210 original_headers_path, loading_trace_path, subresource_discoverer): | 219 original_headers_path, loading_trace_path, subresource_discoverer): |
211 """Extracts discoverable resource urls from a loading trace according to a | 220 """Extracts discoverable resource urls from a loading trace according to a |
212 sub-resource discoverer. | 221 sub-resource discoverer. |
(...skipping 259 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
472 logging.info('loading trace: %s', trace_path) | 481 logging.info('loading trace: %s', trace_path) |
473 trace = loading_trace.LoadingTrace.FromJsonFile(trace_path) | 482 trace = loading_trace.LoadingTrace.FromJsonFile(trace_path) |
474 | 483 |
475 logging.info('verifying trace: %s', trace_path) | 484 logging.info('verifying trace: %s', trace_path) |
476 run_output_verifier.VerifyTrace(trace) | 485 run_output_verifier.VerifyTrace(trace) |
477 | 486 |
478 logging.info('extracting metrics from trace: %s', trace_path) | 487 logging.info('extracting metrics from trace: %s', trace_path) |
479 served_from_network_bytes = 0 | 488 served_from_network_bytes = 0 |
480 served_from_cache_bytes = 0 | 489 served_from_cache_bytes = 0 |
481 urls_hitting_network = set() | 490 urls_hitting_network = set() |
491 response_sizes = {} | |
482 for request in _FilterOutDataAndIncompleteRequests( | 492 for request in _FilterOutDataAndIncompleteRequests( |
483 trace.request_track.GetEvents()): | 493 trace.request_track.GetEvents()): |
484 # Ignore requests served from the blink's cache. | 494 # Ignore requests served from the blink's cache. |
485 if request.served_from_cache: | 495 if request.served_from_cache: |
486 continue | 496 continue |
487 urls_hitting_network.add(request.url) | 497 urls_hitting_network.add(request.url) |
488 if request.from_disk_cache: | 498 if request.from_disk_cache: |
489 served_from_cache_bytes += cached_encoded_data_lengths[request.url] | 499 if request.url in cached_encoded_data_lengths: |
500 response_size = cached_encoded_data_lengths[request.url] | |
501 else: | |
502 # Some fat webpages may overflow the Memory cache, and so some | |
503 # requests might be served from disk cache couple of times per page | |
504 # load. | |
pasko
2016/07/04 15:55:19
can you spit a log message here? It would be nice
gabadie
2016/07/04 17:03:43
Done.
| |
505 response_size = response_sizes[request.url] | |
506 served_from_cache_bytes += response_size | |
490 else: | 507 else: |
491 served_from_network_bytes += request.GetEncodedDataLength() | 508 response_size = request.GetEncodedDataLength() |
509 served_from_network_bytes += response_size | |
510 response_sizes[request.url] = response_size | |
492 | 511 |
493 # Make sure the served from blink's cache requests have at least one | 512 # Make sure the served from blink's cache requests have at least one |
494 # corresponding request that was not served from the blink's cache. | 513 # corresponding request that was not served from the blink's cache. |
495 for request in _FilterOutDataAndIncompleteRequests( | 514 for request in _FilterOutDataAndIncompleteRequests( |
496 trace.request_track.GetEvents()): | 515 trace.request_track.GetEvents()): |
497 assert (request.url in urls_hitting_network or | 516 assert (request.url in urls_hitting_network or |
498 not request.served_from_cache) | 517 not request.served_from_cache) |
499 | 518 |
500 run_metrics = { | 519 run_metrics = { |
501 'url': trace.url, | 520 'url': trace.url, |
(...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
565 dependencies=[self._common_builder.original_wpr_task]) | 584 dependencies=[self._common_builder.original_wpr_task]) |
566 def BuildPatchedWpr(): | 585 def BuildPatchedWpr(): |
567 common_util.EnsureParentDirectoryExists(BuildPatchedWpr.path) | 586 common_util.EnsureParentDirectoryExists(BuildPatchedWpr.path) |
568 shutil.copyfile( | 587 shutil.copyfile( |
569 self._common_builder.original_wpr_task.path, BuildPatchedWpr.path) | 588 self._common_builder.original_wpr_task.path, BuildPatchedWpr.path) |
570 wpr_archive = wpr_backend.WprArchiveBackend(BuildPatchedWpr.path) | 589 wpr_archive = wpr_backend.WprArchiveBackend(BuildPatchedWpr.path) |
571 | 590 |
572 # Save up original response headers. | 591 # Save up original response headers. |
573 original_response_headers = {e.url: e.GetResponseHeadersDict() \ | 592 original_response_headers = {e.url: e.GetResponseHeadersDict() \ |
574 for e in wpr_archive.ListUrlEntries()} | 593 for e in wpr_archive.ListUrlEntries()} |
594 logging.info('save up response headers for %d resources', | |
595 len(original_response_headers)) | |
596 if not original_response_headers: | |
597 # TODO(gabadie): How is it possible to not even have the main resource | |
598 # in the WPR archive? | |
pasko
2016/07/04 15:55:18
please link to the bug (and preferably the comment
gabadie
2016/07/04 17:03:43
Done.
| |
599 raise sandwich_utils.SandwichKnownError( | |
600 'Looks like no resources were recorded in WPR during: {}'.format( | |
601 self._common_builder.original_wpr_task.name)) | |
575 with open(self._original_headers_path, 'w') as file_output: | 602 with open(self._original_headers_path, 'w') as file_output: |
576 json.dump(original_response_headers, file_output) | 603 json.dump(original_response_headers, file_output) |
577 | 604 |
578 # Patch WPR. | 605 # Patch WPR. |
579 _PatchWpr(wpr_archive) | 606 _PatchWpr(wpr_archive) |
580 wpr_archive.Persist() | 607 wpr_archive.Persist() |
581 | 608 |
582 @self.RegisterTask('common/original-cache.zip', [BuildPatchedWpr]) | 609 @self.RegisterTask('common/original-cache.zip', [BuildPatchedWpr]) |
583 def BuildOriginalCache(): | 610 def BuildOriginalCache(): |
584 runner = self._common_builder.CreateSandwichRunner() | 611 runner = self._common_builder.CreateSandwichRunner() |
(...skipping 111 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
696 run_metrics_list = _ProcessRunOutputDir( | 723 run_metrics_list = _ProcessRunOutputDir( |
697 cache_validation_result, benchmark_setup, RunBenchmark.path) | 724 cache_validation_result, benchmark_setup, RunBenchmark.path) |
698 with open(ProcessRunOutputDir.path, 'w') as csv_file: | 725 with open(ProcessRunOutputDir.path, 'w') as csv_file: |
699 writer = csv.DictWriter(csv_file, fieldnames=(additional_column_names + | 726 writer = csv.DictWriter(csv_file, fieldnames=(additional_column_names + |
700 sandwich_metrics.COMMON_CSV_COLUMN_NAMES)) | 727 sandwich_metrics.COMMON_CSV_COLUMN_NAMES)) |
701 writer.writeheader() | 728 writer.writeheader() |
702 for trace_metrics in run_metrics_list: | 729 for trace_metrics in run_metrics_list: |
703 writer.writerow(trace_metrics) | 730 writer.writerow(trace_metrics) |
704 | 731 |
705 self._common_builder.default_final_tasks.append(ProcessRunOutputDir) | 732 self._common_builder.default_final_tasks.append(ProcessRunOutputDir) |
OLD | NEW |