| OLD | NEW |
| 1 # Copyright 2016 The Chromium Authors. All rights reserved. | 1 # Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 """ | 5 """ |
| 6 Implements a task builder for benchmarking effects of NoState Prefetch. | 6 Implements a task builder for benchmarking effects of NoState Prefetch. |
| 7 Noticeable steps of the task pipeline: | 7 Noticeable steps of the task pipeline: |
| 8 * Save a WPR archive | 8 * Save a WPR archive |
| 9 * Process the WPR archive to make all resources cacheable | 9 * Process the WPR archive to make all resources cacheable |
| 10 * Process cache archive to patch response headers back to their original | 10 * Process cache archive to patch response headers back to their original |
| (...skipping 98 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 109 | 109 |
| 110 | 110 |
| 111 def _FilterOutDataAndIncompleteRequests(requests): | 111 def _FilterOutDataAndIncompleteRequests(requests): |
| 112 for request in filter(lambda r: not r.IsDataRequest(), requests): | 112 for request in filter(lambda r: not r.IsDataRequest(), requests): |
| 113 # The protocol is only known once the response has been received. But the | 113 # The protocol is only known once the response has been received. But the |
| 114 # trace recording might have been stopped with still some JavaScript | 114 # trace recording might have been stopped with still some JavaScript |
| 115 # originated requests that have not received any responses yet. | 115 # originated requests that have not received any responses yet. |
| 116 if request.protocol is None: | 116 if request.protocol is None: |
| 117 assert not request.HasReceivedResponse() | 117 assert not request.HasReceivedResponse() |
| 118 continue | 118 continue |
| 119 if request.protocol == 'about': | 119 if request.protocol in {'about', 'blob'}: |
| 120 continue | 120 continue |
| 121 if request.protocol not in {'http/0.9', 'http/1.0', 'http/1.1'}: | 121 if request.protocol not in {'http/0.9', 'http/1.0', 'http/1.1'}: |
| 122 raise RuntimeError('Unknown request protocol {}'.format(request.protocol)) | 122 raise RuntimeError('Unknown request protocol {}'.format(request.protocol)) |
| 123 yield request | 123 yield request |
| 124 | 124 |
| 125 | 125 |
| 126 def _PatchCacheArchive(cache_archive_path, loading_trace_path, | 126 def _PatchCacheArchive(cache_archive_path, loading_trace_path, |
| 127 cache_archive_dest_path): | 127 cache_archive_dest_path): |
| 128 """Patch the cache archive. | 128 """Patch the cache archive. |
| 129 | 129 |
| 130 Note: This method update the raw response headers of cache entries' to store | 130 Note: This method update the raw response headers of cache entries' to store |
| 131 the ones such as Set-Cookie that were pruned by the | 131 the ones such as Set-Cookie that were pruned by the |
| 132 net::HttpCacheTransaction, and remove the stream index 2 holding resource's | 132 net::HttpCacheTransaction, and remove the stream index 2 holding resource's |
| 133 compile meta data. | 133 compile meta data. |
| 134 | 134 |
| 135 Args: | 135 Args: |
| 136 cache_archive_path: Input archive's path to patch. | 136 cache_archive_path: Input archive's path to patch. |
| 137 loading_trace_path: Path of the loading trace that have recorded the cache | 137 loading_trace_path: Path of the loading trace that have recorded the cache |
| 138 archive <cache_archive_path>. | 138 archive <cache_archive_path>. |
| 139 cache_archive_dest_path: Archive destination's path. | 139 cache_archive_dest_path: Archive destination's path. |
| 140 """ | 140 """ |
| 141 logging.info('loading trace: %s', loading_trace_path) |
| 141 trace = loading_trace.LoadingTrace.FromJsonFile(loading_trace_path) | 142 trace = loading_trace.LoadingTrace.FromJsonFile(loading_trace_path) |
| 142 with common_util.TemporaryDirectory(prefix='sandwich_tmp') as tmp_path: | 143 with common_util.TemporaryDirectory(prefix='sandwich_tmp') as tmp_path: |
| 143 cache_path = os.path.join(tmp_path, 'cache') | 144 cache_path = os.path.join(tmp_path, 'cache') |
| 144 chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_path) | 145 chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_path) |
| 145 cache_backend = chrome_cache.CacheBackend(cache_path, 'simple') | 146 cache_backend = chrome_cache.BatchCacheBackend( |
| 147 cache_path, chrome_cache.CacheBackendType.SIMPLE) |
| 146 cache_entries = set(cache_backend.ListKeys()) | 148 cache_entries = set(cache_backend.ListKeys()) |
| 147 logging.info('Original cache size: %d bytes' % cache_backend.GetSize()) | 149 logging.info('Original cache size: %d bytes' % cache_backend.GetSize()) |
| 148 for request in _FilterOutDataAndIncompleteRequests( | 150 for request in _FilterOutDataAndIncompleteRequests( |
| 149 trace.request_track.GetEvents()): | 151 trace.request_track.GetEvents()): |
| 150 # On requests having an upload data stream such as POST requests, | 152 # On requests having an upload data stream such as POST requests, |
| 151 # net::HttpCache::GenerateCacheKey() prefixes the cache entry's key with | 153 # net::HttpCache::GenerateCacheKey() prefixes the cache entry's key with |
| 152 # the upload data stream's session unique identifier. | 154 # the upload data stream's session unique identifier. |
| 153 # | 155 # |
| 154 # It is fine to not patch these requests since when reopening Chrome, | 156 # It is fine to not patch these requests since when reopening Chrome, |
| 155 # there is no way the entry can be reused since the upload data stream's | 157 # there is no way the entry can be reused since the upload data stream's |
| 156 # identifier will be different. | 158 # identifier will be different. |
| 157 # | 159 # |
| 158 # The fact that these entries are kept in the cache after closing Chrome | 160 # The fact that these entries are kept in the cache after closing Chrome |
| 159 # properly by closing the Chrome tab as the ChromeControler.SetSlowDeath() | 161 # properly by closing the Chrome tab as the |
| 160 # do is known chrome bug (crbug.com/610725). | 162 # ChromeControler.SetSlowDeath() do is known chrome bug |
| 163 # (crbug.com/610725). |
| 161 if request.url not in cache_entries: | 164 if request.url not in cache_entries: |
| 162 continue | 165 continue |
| 163 # Chrome prunes Set-Cookie from response headers before storing them in | 166 # Chrome prunes Set-Cookie from response headers before storing them in |
| 164 # disk cache. Also, it adds implicit "Vary: cookie" header to all redirect | 167 # disk cache. Also, it adds implicit "Vary: cookie" header to all |
| 165 # response headers. Sandwich manages the cache, but between recording the | 168 # redirect response headers. Sandwich manages the cache, but between |
| 166 # cache and benchmarking the cookie jar is invalidated. This leads to | 169 # recording the # cache and benchmarking the cookie jar is invalidated. |
| 167 # invalidation of all cacheable redirects. | 170 # This leads to invalidation of all cacheable redirects. |
| 168 raw_headers = request.GetRawResponseHeaders() | 171 raw_headers = request.GetRawResponseHeaders() |
| 169 cache_backend.UpdateRawResponseHeaders(request.url, raw_headers) | 172 cache_backend.UpdateRawResponseHeaders(request.url, raw_headers) |
| 170 # NoState-Prefetch would only fetch the resources, but not parse them. | 173 # NoState-Prefetch would only fetch the resources, but not parse them. |
| 171 cache_backend.DeleteStreamForKey(request.url, 2) | 174 cache_backend.DeleteStreamForKey(request.url, 2) |
| 175 cache_backend.ProcessBatch() |
| 176 logging.info('Patched cache size: %d bytes' % cache_backend.GetSize()) |
| 172 chrome_cache.ZipDirectoryContent(cache_path, cache_archive_dest_path) | 177 chrome_cache.ZipDirectoryContent(cache_path, cache_archive_dest_path) |
| 173 logging.info('Patched cache size: %d bytes' % cache_backend.GetSize()) | |
| 174 | 178 |
| 175 | 179 |
| 176 def _DiscoverRequests(dependencies_lens, subresource_discoverer): | 180 def _DiscoverRequests(dependencies_lens, subresource_discoverer): |
| 177 trace = dependencies_lens.loading_trace | 181 trace = dependencies_lens.loading_trace |
| 178 first_resource_request = trace.request_track.GetFirstResourceRequest() | 182 first_resource_request = trace.request_track.GetFirstResourceRequest() |
| 179 | 183 |
| 180 if subresource_discoverer == Discoverer.EmptyCache: | 184 if subresource_discoverer == Discoverer.EmptyCache: |
| 181 requests = [] | 185 requests = [] |
| 182 elif subresource_discoverer == Discoverer.FullCache: | 186 elif subresource_discoverer == Discoverer.FullCache: |
| 183 requests = dependencies_lens.loading_trace.request_track.GetEvents() | 187 requests = dependencies_lens.loading_trace.request_track.GetEvents() |
| (...skipping 212 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 396 {URL of all requests: encoded_data_length}, | 400 {URL of all requests: encoded_data_length}, |
| 397 'effective_post_requests': [URLs of POST requests], | 401 'effective_post_requests': [URLs of POST requests], |
| 398 'expected_cached_resources': [URLs of resources expected to be cached], | 402 'expected_cached_resources': [URLs of resources expected to be cached], |
| 399 'successfully_cached': [URLs of cached sub-resources] | 403 'successfully_cached': [URLs of cached sub-resources] |
| 400 } | 404 } |
| 401 """ | 405 """ |
| 402 # TODO(gabadie): What's the best way of propagating errors happening in here? | 406 # TODO(gabadie): What's the best way of propagating errors happening in here? |
| 403 logging.info('lists cached urls from %s' % cache_archive_path) | 407 logging.info('lists cached urls from %s' % cache_archive_path) |
| 404 with common_util.TemporaryDirectory() as cache_directory: | 408 with common_util.TemporaryDirectory() as cache_directory: |
| 405 chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_directory) | 409 chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_directory) |
| 406 cache_keys = set( | 410 cache_keys = set(chrome_cache.CacheBackend( |
| 407 chrome_cache.CacheBackend(cache_directory, 'simple').ListKeys()) | 411 cache_directory, chrome_cache.CacheBackendType.SIMPLE).ListKeys()) |
| 408 trace = loading_trace.LoadingTrace.FromJsonFile(cache_build_trace_path) | 412 trace = loading_trace.LoadingTrace.FromJsonFile(cache_build_trace_path) |
| 409 effective_requests = _ListUrlRequests(trace, _RequestOutcome.All) | 413 effective_requests = _ListUrlRequests(trace, _RequestOutcome.All) |
| 410 effective_post_requests = _ListUrlRequests(trace, _RequestOutcome.Post) | 414 effective_post_requests = _ListUrlRequests(trace, _RequestOutcome.Post) |
| 411 effective_encoded_data_lengths = {} | 415 effective_encoded_data_lengths = {} |
| 412 for request in _FilterOutDataAndIncompleteRequests( | 416 for request in _FilterOutDataAndIncompleteRequests( |
| 413 trace.request_track.GetEvents()): | 417 trace.request_track.GetEvents()): |
| 414 if request.from_disk_cache or request.served_from_cache: | 418 if request.from_disk_cache or request.served_from_cache: |
| 415 # At cache archive creation time, a request might be loaded several times, | 419 # At cache archive creation time, a request might be loaded several times, |
| 416 # but avoid the request.encoded_data_length == 0 if loaded from cache. | 420 # but avoid the request.encoded_data_length == 0 if loaded from cache. |
| 417 continue | 421 continue |
| (...skipping 280 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 698 run_metrics_list = _ProcessRunOutputDir( | 702 run_metrics_list = _ProcessRunOutputDir( |
| 699 cache_validation_result, benchmark_setup, RunBenchmark.path) | 703 cache_validation_result, benchmark_setup, RunBenchmark.path) |
| 700 with open(ProcessRunOutputDir.path, 'w') as csv_file: | 704 with open(ProcessRunOutputDir.path, 'w') as csv_file: |
| 701 writer = csv.DictWriter(csv_file, fieldnames=(additional_column_names + | 705 writer = csv.DictWriter(csv_file, fieldnames=(additional_column_names + |
| 702 sandwich_metrics.COMMON_CSV_COLUMN_NAMES)) | 706 sandwich_metrics.COMMON_CSV_COLUMN_NAMES)) |
| 703 writer.writeheader() | 707 writer.writeheader() |
| 704 for trace_metrics in run_metrics_list: | 708 for trace_metrics in run_metrics_list: |
| 705 writer.writerow(trace_metrics) | 709 writer.writerow(trace_metrics) |
| 706 | 710 |
| 707 self._common_builder.default_final_tasks.append(ProcessRunOutputDir) | 711 self._common_builder.default_final_tasks.append(ProcessRunOutputDir) |
| OLD | NEW |