OLD | NEW |
---|---|
1 # Copyright 2016 The Chromium Authors. All rights reserved. | 1 # Copyright 2016 The Chromium Authors. All rights reserved. |
2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 | 4 |
5 """ | 5 """ |
6 Implements a task builder for benchmarking effects of NoState Prefetch. | 6 Implements a task builder for benchmarking effects of NoState Prefetch. |
7 Noticeable steps of the task pipeline: | 7 Noticeable steps of the task pipeline: |
8 * Save a WPR archive | 8 * Save a WPR archive |
9 * Process the WPR archive to make all resources cacheable | 9 * Process the WPR archive to make all resources cacheable |
10 * Process cache archive to patch response headers back to their original | 10 * Process cache archive to patch response headers back to their original |
(...skipping 12 matching lines...) Expand all Loading... | |
23 import shutil | 23 import shutil |
24 from urlparse import urlparse | 24 from urlparse import urlparse |
25 | 25 |
26 import chrome_cache | 26 import chrome_cache |
27 import common_util | 27 import common_util |
28 import loading_trace | 28 import loading_trace |
29 from prefetch_view import PrefetchSimulationView | 29 from prefetch_view import PrefetchSimulationView |
30 from request_dependencies_lens import RequestDependencyLens | 30 from request_dependencies_lens import RequestDependencyLens |
31 import sandwich_metrics | 31 import sandwich_metrics |
32 import sandwich_runner | 32 import sandwich_runner |
33 import sandwich_utils | |
33 import task_manager | 34 import task_manager |
34 import wpr_backend | 35 import wpr_backend |
35 | 36 |
36 | 37 |
37 class Discoverer(object): | 38 class Discoverer(object): |
38 # Do not prefetch anything. | 39 # Do not prefetch anything. |
39 EmptyCache = 'empty-cache' | 40 EmptyCache = 'empty-cache' |
40 | 41 |
41 # Prefetches everything to load fully from cache (impossible in practice). | 42 # Prefetches everything to load fully from cache (impossible in practice). |
42 FullCache = 'full-cache' | 43 FullCache = 'full-cache' |
(...skipping 151 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
194 logging.info('number of requests discovered by %s: %d', | 195 logging.info('number of requests discovered by %s: %d', |
195 subresource_discoverer, len(requests)) | 196 subresource_discoverer, len(requests)) |
196 return requests | 197 return requests |
197 | 198 |
198 | 199 |
199 def _PruneOutOriginalNoStoreRequests(original_headers_path, requests): | 200 def _PruneOutOriginalNoStoreRequests(original_headers_path, requests): |
200 with open(original_headers_path) as file_input: | 201 with open(original_headers_path) as file_input: |
201 original_headers = json.load(file_input) | 202 original_headers = json.load(file_input) |
202 pruned_requests = set() | 203 pruned_requests = set() |
203 for request in requests: | 204 for request in requests: |
204 request_original_headers = original_headers[request.url] | 205 url = sandwich_utils.NormalizeUrl(request.url) |
206 if url not in original_headers: | |
207 # TODO(gabadie): Investigate why these requests were not in WPR. | |
208 assert request.failed | |
209 logging.warning( | |
210 'could not find original headers for: %s (failure: %s)', | |
211 url, request.error_text) | |
212 continue | |
213 request_original_headers = original_headers[url] | |
205 if ('cache-control' in request_original_headers and | 214 if ('cache-control' in request_original_headers and |
206 'no-store' in request_original_headers['cache-control'].lower()): | 215 'no-store' in request_original_headers['cache-control'].lower()): |
207 pruned_requests.add(request) | 216 pruned_requests.add(request) |
208 return [r for r in requests if r not in pruned_requests] | 217 return [r for r in requests if r not in pruned_requests] |
209 | 218 |
210 | 219 |
211 def _ExtractDiscoverableUrls( | 220 def _ExtractDiscoverableUrls( |
212 original_headers_path, loading_trace_path, subresource_discoverer): | 221 original_headers_path, loading_trace_path, subresource_discoverer): |
213 """Extracts discoverable resource urls from a loading trace according to a | 222 """Extracts discoverable resource urls from a loading trace according to a |
214 sub-resource discoverer. | 223 sub-resource discoverer. |
(...skipping 259 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
474 logging.info('loading trace: %s', trace_path) | 483 logging.info('loading trace: %s', trace_path) |
475 trace = loading_trace.LoadingTrace.FromJsonFile(trace_path) | 484 trace = loading_trace.LoadingTrace.FromJsonFile(trace_path) |
476 | 485 |
477 logging.info('verifying trace: %s', trace_path) | 486 logging.info('verifying trace: %s', trace_path) |
478 run_output_verifier.VerifyTrace(trace) | 487 run_output_verifier.VerifyTrace(trace) |
479 | 488 |
480 logging.info('extracting metrics from trace: %s', trace_path) | 489 logging.info('extracting metrics from trace: %s', trace_path) |
481 served_from_network_bytes = 0 | 490 served_from_network_bytes = 0 |
482 served_from_cache_bytes = 0 | 491 served_from_cache_bytes = 0 |
483 urls_hitting_network = set() | 492 urls_hitting_network = set() |
493 response_sizes = {} | |
484 for request in _FilterOutDataAndIncompleteRequests( | 494 for request in _FilterOutDataAndIncompleteRequests( |
485 trace.request_track.GetEvents()): | 495 trace.request_track.GetEvents()): |
486 # Ignore requests served from the blink's cache. | 496 # Ignore requests served from the blink's cache. |
487 if request.served_from_cache: | 497 if request.served_from_cache: |
488 continue | 498 continue |
489 urls_hitting_network.add(request.url) | 499 urls_hitting_network.add(request.url) |
490 if request.from_disk_cache: | 500 if request.from_disk_cache: |
491 served_from_cache_bytes += cached_encoded_data_lengths[request.url] | 501 if request.url in cached_encoded_data_lengths: |
502 response_size = cached_encoded_data_lengths[request.url] | |
503 else: | |
504 # Some fat webpages may overflow the Memory cache, and so some | |
505 # requests might be served from disk cache couple of times per page | |
506 # load. | |
507 logging.warning('Looks like could be served from memory cache: %s', | |
508 request.url) | |
509 response_size = response_sizes[request.url] | |
510 served_from_cache_bytes += response_size | |
492 else: | 511 else: |
493 served_from_network_bytes += request.GetEncodedDataLength() | 512 response_size = request.GetEncodedDataLength() |
513 served_from_network_bytes += response_size | |
514 response_sizes[request.url] = response_size | |
494 | 515 |
495 # Make sure the served from blink's cache requests have at least one | 516 # Make sure the served from blink's cache requests have at least one |
496 # corresponding request that was not served from the blink's cache. | 517 # corresponding request that was not served from the blink's cache. |
497 for request in _FilterOutDataAndIncompleteRequests( | 518 for request in _FilterOutDataAndIncompleteRequests( |
498 trace.request_track.GetEvents()): | 519 trace.request_track.GetEvents()): |
499 assert (request.url in urls_hitting_network or | 520 assert (request.url in urls_hitting_network or |
500 not request.served_from_cache) | 521 not request.served_from_cache) |
501 | 522 |
502 run_metrics = { | 523 run_metrics = { |
503 'url': trace.url, | 524 'url': trace.url, |
(...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
567 dependencies=[self._common_builder.original_wpr_task]) | 588 dependencies=[self._common_builder.original_wpr_task]) |
568 def BuildPatchedWpr(): | 589 def BuildPatchedWpr(): |
569 common_util.EnsureParentDirectoryExists(BuildPatchedWpr.path) | 590 common_util.EnsureParentDirectoryExists(BuildPatchedWpr.path) |
570 shutil.copyfile( | 591 shutil.copyfile( |
571 self._common_builder.original_wpr_task.path, BuildPatchedWpr.path) | 592 self._common_builder.original_wpr_task.path, BuildPatchedWpr.path) |
572 wpr_archive = wpr_backend.WprArchiveBackend(BuildPatchedWpr.path) | 593 wpr_archive = wpr_backend.WprArchiveBackend(BuildPatchedWpr.path) |
573 | 594 |
574 # Save up original response headers. | 595 # Save up original response headers. |
575 original_response_headers = {e.url: e.GetResponseHeadersDict() \ | 596 original_response_headers = {e.url: e.GetResponseHeadersDict() \ |
576 for e in wpr_archive.ListUrlEntries()} | 597 for e in wpr_archive.ListUrlEntries()} |
598 logging.info('save up response headers for %d resources', | |
599 len(original_response_headers)) | |
600 if not original_response_headers: | |
601 # TODO(gabadie): How is it possible to not even have the main resource | |
602 # in the WPR archive? crbug.com/623966#c5 | |
pasko
2016/07/04 18:05:57
Example URL can be found in: http://crbug.com/6239
gabadie
2016/07/06 08:57:55
Done.
| |
603 raise sandwich_utils.SandwichKnownError( | |
604 'Looks like no resources were recorded in WPR during: {}'.format( | |
605 self._common_builder.original_wpr_task.name)) | |
577 with open(self._original_headers_path, 'w') as file_output: | 606 with open(self._original_headers_path, 'w') as file_output: |
578 json.dump(original_response_headers, file_output) | 607 json.dump(original_response_headers, file_output) |
579 | 608 |
580 # Patch WPR. | 609 # Patch WPR. |
581 _PatchWpr(wpr_archive) | 610 _PatchWpr(wpr_archive) |
582 wpr_archive.Persist() | 611 wpr_archive.Persist() |
583 | 612 |
584 @self.RegisterTask('common/original-cache.zip', [BuildPatchedWpr]) | 613 @self.RegisterTask('common/original-cache.zip', [BuildPatchedWpr]) |
585 def BuildOriginalCache(): | 614 def BuildOriginalCache(): |
586 runner = self._common_builder.CreateSandwichRunner() | 615 runner = self._common_builder.CreateSandwichRunner() |
(...skipping 111 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
698 run_metrics_list = _ProcessRunOutputDir( | 727 run_metrics_list = _ProcessRunOutputDir( |
699 cache_validation_result, benchmark_setup, RunBenchmark.path) | 728 cache_validation_result, benchmark_setup, RunBenchmark.path) |
700 with open(ProcessRunOutputDir.path, 'w') as csv_file: | 729 with open(ProcessRunOutputDir.path, 'w') as csv_file: |
701 writer = csv.DictWriter(csv_file, fieldnames=(additional_column_names + | 730 writer = csv.DictWriter(csv_file, fieldnames=(additional_column_names + |
702 sandwich_metrics.COMMON_CSV_COLUMN_NAMES)) | 731 sandwich_metrics.COMMON_CSV_COLUMN_NAMES)) |
703 writer.writeheader() | 732 writer.writeheader() |
704 for trace_metrics in run_metrics_list: | 733 for trace_metrics in run_metrics_list: |
705 writer.writerow(trace_metrics) | 734 writer.writerow(trace_metrics) |
706 | 735 |
707 self._common_builder.default_final_tasks.append(ProcessRunOutputDir) | 736 self._common_builder.default_final_tasks.append(ProcessRunOutputDir) |
OLD | NEW |