Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 # Copyright 2016 The Chromium Authors. All rights reserved. | 1 # Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 """ | 5 """ |
| 6 Implements a task builder for benchmarking effects of NoState Prefetch. | 6 Implements a task builder for benchmarking effects of NoState Prefetch. |
| 7 Noticeable steps of the task pipeline: | 7 Noticeable steps of the task pipeline: |
| 8 * Save a WPR archive | 8 * Save a WPR archive |
| 9 * Process the WPR archive to make all resources cacheable | 9 * Process the WPR archive to make all resources cacheable |
| 10 * Process cache archive to patch response headers back to their original | 10 * Process cache archive to patch response headers back to their original |
| (...skipping 12 matching lines...) Expand all Loading... | |
| 23 import shutil | 23 import shutil |
| 24 from urlparse import urlparse | 24 from urlparse import urlparse |
| 25 | 25 |
| 26 import chrome_cache | 26 import chrome_cache |
| 27 import common_util | 27 import common_util |
| 28 import loading_trace | 28 import loading_trace |
| 29 from prefetch_view import PrefetchSimulationView | 29 from prefetch_view import PrefetchSimulationView |
| 30 from request_dependencies_lens import RequestDependencyLens | 30 from request_dependencies_lens import RequestDependencyLens |
| 31 import sandwich_metrics | 31 import sandwich_metrics |
| 32 import sandwich_runner | 32 import sandwich_runner |
| 33 import sandwich_utils | |
| 33 import task_manager | 34 import task_manager |
| 34 import wpr_backend | 35 import wpr_backend |
| 35 | 36 |
| 36 | 37 |
| 37 class Discoverer(object): | 38 class Discoverer(object): |
| 38 # Do not prefetch anything. | 39 # Do not prefetch anything. |
| 39 EmptyCache = 'empty-cache' | 40 EmptyCache = 'empty-cache' |
| 40 | 41 |
| 41 # Prefetches everything to load fully from cache (impossible in practice). | 42 # Prefetches everything to load fully from cache (impossible in practice). |
| 42 FullCache = 'full-cache' | 43 FullCache = 'full-cache' |
| (...skipping 151 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 194 logging.info('number of requests discovered by %s: %d', | 195 logging.info('number of requests discovered by %s: %d', |
| 195 subresource_discoverer, len(requests)) | 196 subresource_discoverer, len(requests)) |
| 196 return requests | 197 return requests |
| 197 | 198 |
| 198 | 199 |
| 199 def _PruneOutOriginalNoStoreRequests(original_headers_path, requests): | 200 def _PruneOutOriginalNoStoreRequests(original_headers_path, requests): |
| 200 with open(original_headers_path) as file_input: | 201 with open(original_headers_path) as file_input: |
| 201 original_headers = json.load(file_input) | 202 original_headers = json.load(file_input) |
| 202 pruned_requests = set() | 203 pruned_requests = set() |
| 203 for request in requests: | 204 for request in requests: |
| 204 request_original_headers = original_headers[request.url] | 205 url = sandwich_utils.NormalizeUrl(request.url) |
| 206 if url not in original_headers: | |
| 207 # TODO(gabadie): Investigate why these requests were not in WPR. | |
| 208 assert request.failed | |
| 209 logging.warning( | |
| 210 'could not find original headers for: %s (failure: %s)', | |
| 211 url, request.error_text) | |
| 212 continue | |
| 213 request_original_headers = original_headers[url] | |
| 205 if ('cache-control' in request_original_headers and | 214 if ('cache-control' in request_original_headers and |
| 206 'no-store' in request_original_headers['cache-control'].lower()): | 215 'no-store' in request_original_headers['cache-control'].lower()): |
| 207 pruned_requests.add(request) | 216 pruned_requests.add(request) |
| 208 return [r for r in requests if r not in pruned_requests] | 217 return [r for r in requests if r not in pruned_requests] |
| 209 | 218 |
| 210 | 219 |
| 211 def _ExtractDiscoverableUrls( | 220 def _ExtractDiscoverableUrls( |
| 212 original_headers_path, loading_trace_path, subresource_discoverer): | 221 original_headers_path, loading_trace_path, subresource_discoverer): |
| 213 """Extracts discoverable resource urls from a loading trace according to a | 222 """Extracts discoverable resource urls from a loading trace according to a |
| 214 sub-resource discoverer. | 223 sub-resource discoverer. |
| (...skipping 259 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 474 logging.info('loading trace: %s', trace_path) | 483 logging.info('loading trace: %s', trace_path) |
| 475 trace = loading_trace.LoadingTrace.FromJsonFile(trace_path) | 484 trace = loading_trace.LoadingTrace.FromJsonFile(trace_path) |
| 476 | 485 |
| 477 logging.info('verifying trace: %s', trace_path) | 486 logging.info('verifying trace: %s', trace_path) |
| 478 run_output_verifier.VerifyTrace(trace) | 487 run_output_verifier.VerifyTrace(trace) |
| 479 | 488 |
| 480 logging.info('extracting metrics from trace: %s', trace_path) | 489 logging.info('extracting metrics from trace: %s', trace_path) |
| 481 served_from_network_bytes = 0 | 490 served_from_network_bytes = 0 |
| 482 served_from_cache_bytes = 0 | 491 served_from_cache_bytes = 0 |
| 483 urls_hitting_network = set() | 492 urls_hitting_network = set() |
| 493 response_sizes = {} | |
| 484 for request in _FilterOutDataAndIncompleteRequests( | 494 for request in _FilterOutDataAndIncompleteRequests( |
| 485 trace.request_track.GetEvents()): | 495 trace.request_track.GetEvents()): |
| 486 # Ignore requests served from the blink's cache. | 496 # Ignore requests served from the blink's cache. |
| 487 if request.served_from_cache: | 497 if request.served_from_cache: |
| 488 continue | 498 continue |
| 489 urls_hitting_network.add(request.url) | 499 urls_hitting_network.add(request.url) |
| 490 if request.from_disk_cache: | 500 if request.from_disk_cache: |
| 491 served_from_cache_bytes += cached_encoded_data_lengths[request.url] | 501 if request.url in cached_encoded_data_lengths: |
| 502 response_size = cached_encoded_data_lengths[request.url] | |
| 503 else: | |
| 504 # Some fat webpages may overflow the Memory cache, and so some | |
| 505 # requests might be served from disk cache couple of times per page | |
| 506 # load. | |
| 507 logging.warning('Looks like could be served from memory cache: %s', | |
| 508 request.url) | |
| 509 response_size = response_sizes[request.url] | |
| 510 served_from_cache_bytes += response_size | |
| 492 else: | 511 else: |
| 493 served_from_network_bytes += request.GetEncodedDataLength() | 512 response_size = request.GetEncodedDataLength() |
| 513 served_from_network_bytes += response_size | |
| 514 response_sizes[request.url] = response_size | |
| 494 | 515 |
| 495 # Make sure the served from blink's cache requests have at least one | 516 # Make sure the served from blink's cache requests have at least one |
| 496 # corresponding request that was not served from the blink's cache. | 517 # corresponding request that was not served from the blink's cache. |
| 497 for request in _FilterOutDataAndIncompleteRequests( | 518 for request in _FilterOutDataAndIncompleteRequests( |
| 498 trace.request_track.GetEvents()): | 519 trace.request_track.GetEvents()): |
| 499 assert (request.url in urls_hitting_network or | 520 assert (request.url in urls_hitting_network or |
| 500 not request.served_from_cache) | 521 not request.served_from_cache) |
| 501 | 522 |
| 502 run_metrics = { | 523 run_metrics = { |
| 503 'url': trace.url, | 524 'url': trace.url, |
| (...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 567 dependencies=[self._common_builder.original_wpr_task]) | 588 dependencies=[self._common_builder.original_wpr_task]) |
| 568 def BuildPatchedWpr(): | 589 def BuildPatchedWpr(): |
| 569 common_util.EnsureParentDirectoryExists(BuildPatchedWpr.path) | 590 common_util.EnsureParentDirectoryExists(BuildPatchedWpr.path) |
| 570 shutil.copyfile( | 591 shutil.copyfile( |
| 571 self._common_builder.original_wpr_task.path, BuildPatchedWpr.path) | 592 self._common_builder.original_wpr_task.path, BuildPatchedWpr.path) |
| 572 wpr_archive = wpr_backend.WprArchiveBackend(BuildPatchedWpr.path) | 593 wpr_archive = wpr_backend.WprArchiveBackend(BuildPatchedWpr.path) |
| 573 | 594 |
| 574 # Save up original response headers. | 595 # Save up original response headers. |
| 575 original_response_headers = {e.url: e.GetResponseHeadersDict() \ | 596 original_response_headers = {e.url: e.GetResponseHeadersDict() \ |
| 576 for e in wpr_archive.ListUrlEntries()} | 597 for e in wpr_archive.ListUrlEntries()} |
| 598 logging.info('save up response headers for %d resources', | |
| 599 len(original_response_headers)) | |
| 600 if not original_response_headers: | |
| 601 # TODO(gabadie): How is it possible to not even have the main resource | |
| 602 # in the WPR archive? crbug.com/623966#c5 | |
|
pasko
2016/07/04 18:05:57
Example URL can be found in: http://crbug.com/6239
gabadie
2016/07/06 08:57:55
Done.
| |
| 603 raise sandwich_utils.SandwichKnownError( | |
| 604 'Looks like no resources were recorded in WPR during: {}'.format( | |
| 605 self._common_builder.original_wpr_task.name)) | |
| 577 with open(self._original_headers_path, 'w') as file_output: | 606 with open(self._original_headers_path, 'w') as file_output: |
| 578 json.dump(original_response_headers, file_output) | 607 json.dump(original_response_headers, file_output) |
| 579 | 608 |
| 580 # Patch WPR. | 609 # Patch WPR. |
| 581 _PatchWpr(wpr_archive) | 610 _PatchWpr(wpr_archive) |
| 582 wpr_archive.Persist() | 611 wpr_archive.Persist() |
| 583 | 612 |
| 584 @self.RegisterTask('common/original-cache.zip', [BuildPatchedWpr]) | 613 @self.RegisterTask('common/original-cache.zip', [BuildPatchedWpr]) |
| 585 def BuildOriginalCache(): | 614 def BuildOriginalCache(): |
| 586 runner = self._common_builder.CreateSandwichRunner() | 615 runner = self._common_builder.CreateSandwichRunner() |
| (...skipping 111 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 698 run_metrics_list = _ProcessRunOutputDir( | 727 run_metrics_list = _ProcessRunOutputDir( |
| 699 cache_validation_result, benchmark_setup, RunBenchmark.path) | 728 cache_validation_result, benchmark_setup, RunBenchmark.path) |
| 700 with open(ProcessRunOutputDir.path, 'w') as csv_file: | 729 with open(ProcessRunOutputDir.path, 'w') as csv_file: |
| 701 writer = csv.DictWriter(csv_file, fieldnames=(additional_column_names + | 730 writer = csv.DictWriter(csv_file, fieldnames=(additional_column_names + |
| 702 sandwich_metrics.COMMON_CSV_COLUMN_NAMES)) | 731 sandwich_metrics.COMMON_CSV_COLUMN_NAMES)) |
| 703 writer.writeheader() | 732 writer.writeheader() |
| 704 for trace_metrics in run_metrics_list: | 733 for trace_metrics in run_metrics_list: |
| 705 writer.writerow(trace_metrics) | 734 writer.writerow(trace_metrics) |
| 706 | 735 |
| 707 self._common_builder.default_final_tasks.append(ProcessRunOutputDir) | 736 self._common_builder.default_final_tasks.append(ProcessRunOutputDir) |
| OLD | NEW |