| OLD | NEW |
| 1 # Copyright 2016 The Chromium Authors. All rights reserved. | 1 # Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 """ | 5 """ |
| 6 Implements a task builder for benchmarking effects of NoState Prefetch. | 6 Implements a task builder for benchmarking effects of NoState Prefetch. |
| 7 Noticeable steps of the task pipeline: | 7 Noticeable steps of the task pipeline: |
| 8 * Save a WPR archive | 8 * Save a WPR archive |
| 9 * Process the WPR archive to make all resources cacheable | 9 * Process the WPR archive to make all resources cacheable |
| 10 * Process cache archive to patch response headers back to their original | 10 * Process cache archive to patch response headers back to their original |
| (...skipping 98 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 109 | 109 |
| 110 | 110 |
| 111 def _FilterOutDataAndIncompleteRequests(requests): | 111 def _FilterOutDataAndIncompleteRequests(requests): |
| 112 for request in filter(lambda r: not r.IsDataRequest(), requests): | 112 for request in filter(lambda r: not r.IsDataRequest(), requests): |
| 113 # The protocol is only known once the response has been received. But the | 113 # The protocol is only known once the response has been received. But the |
| 114 # trace recording might have been stopped with still some JavaScript | 114 # trace recording might have been stopped with still some JavaScript |
| 115 # originated requests that have not received any responses yet. | 115 # originated requests that have not received any responses yet. |
| 116 if request.protocol is None: | 116 if request.protocol is None: |
| 117 assert not request.HasReceivedResponse() | 117 assert not request.HasReceivedResponse() |
| 118 continue | 118 continue |
| 119 if request.protocol == 'about': | 119 if request.protocol in {'about', 'blob'}: |
| 120 continue | 120 continue |
| 121 if request.protocol not in {'http/0.9', 'http/1.0', 'http/1.1'}: | 121 if request.protocol not in {'http/0.9', 'http/1.0', 'http/1.1'}: |
| 122 raise RuntimeError('Unknown request protocol {}'.format(request.protocol)) | 122 raise RuntimeError('Unknown request protocol {}'.format(request.protocol)) |
| 123 yield request | 123 yield request |
| 124 | 124 |
| 125 | 125 |
| 126 def _PatchCacheArchive(cache_archive_path, loading_trace_path, | 126 def _PatchCacheArchive(cache_archive_path, loading_trace_path, |
| 127 cache_archive_dest_path): | 127 cache_archive_dest_path): |
| 128 """Patch the cache archive. | 128 """Patch the cache archive. |
| 129 | 129 |
| 130 Note: This method update the raw response headers of cache entries' to store | 130 Note: This method update the raw response headers of cache entries' to store |
| 131 the ones such as Set-Cookie that were pruned by the | 131 the ones such as Set-Cookie that were pruned by the |
| 132 net::HttpCacheTransaction, and remove the stream index 2 holding resource's | 132 net::HttpCacheTransaction, and remove the stream index 2 holding resource's |
| 133 compile meta data. | 133 compile meta data. |
| 134 | 134 |
| 135 Args: | 135 Args: |
| 136 cache_archive_path: Input archive's path to patch. | 136 cache_archive_path: Input archive's path to patch. |
| 137 loading_trace_path: Path of the loading trace that have recorded the cache | 137 loading_trace_path: Path of the loading trace that have recorded the cache |
| 138 archive <cache_archive_path>. | 138 archive <cache_archive_path>. |
| 139 cache_archive_dest_path: Archive destination's path. | 139 cache_archive_dest_path: Archive destination's path. |
| 140 """ | 140 """ |
| 141 logging.info('loading trace: %s', loading_trace_path) |
| 141 trace = loading_trace.LoadingTrace.FromJsonFile(loading_trace_path) | 142 trace = loading_trace.LoadingTrace.FromJsonFile(loading_trace_path) |
| 142 with common_util.TemporaryDirectory(prefix='sandwich_tmp') as tmp_path: | 143 with common_util.TemporaryDirectory(prefix='sandwich_tmp') as tmp_path: |
| 143 cache_path = os.path.join(tmp_path, 'cache') | 144 cache_path = os.path.join(tmp_path, 'cache') |
| 144 chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_path) | 145 chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_path) |
| 145 cache_backend = chrome_cache.CacheBackend(cache_path, 'simple') | 146 with chrome_cache.OnlineCacheBackend( |
| 146 cache_entries = set(cache_backend.ListKeys()) | 147 cache_path, chrome_cache.CacheBackendType.SIMPLE) as cache_backend: |
| 147 logging.info('Original cache size: %d bytes' % cache_backend.GetSize()) | 148 cache_entries = set(cache_backend.ListKeys()) |
| 148 for request in _FilterOutDataAndIncompleteRequests( | 149 logging.info('Original cache size: %d bytes' % cache_backend.GetSize()) |
| 149 trace.request_track.GetEvents()): | 150 for request in _FilterOutDataAndIncompleteRequests( |
| 150 # On requests having an upload data stream such as POST requests, | 151 trace.request_track.GetEvents()): |
| 151 # net::HttpCache::GenerateCacheKey() prefixes the cache entry's key with | 152 # On requests having an upload data stream such as POST requests, |
| 152 # the upload data stream's session unique identifier. | 153 # net::HttpCache::GenerateCacheKey() prefixes the cache entry's key with |
| 153 # | 154 # the upload data stream's session unique identifier. |
| 154 # It is fine to not patch these requests since when reopening Chrome, | 155 # |
| 155 # there is no way the entry can be reused since the upload data stream's | 156 # It is fine to not patch these requests since when reopening Chrome, |
| 156 # identifier will be different. | 157 # there is no way the entry can be reused since the upload data stream's |
| 157 # | 158 # identifier will be different. |
| 158 # The fact that these entries are kept in the cache after closing Chrome | 159 # |
| 159 # properly by closing the Chrome tab as the ChromeControler.SetSlowDeath() | 160 # The fact that these entries are kept in the cache after closing Chrome |
| 160 # do is known chrome bug (crbug.com/610725). | 161 # properly by closing the Chrome tab as the |
| 161 if request.url not in cache_entries: | 162 # ChromeControler.SetSlowDeath() do is known chrome bug |
| 162 continue | 163 # (crbug.com/610725). |
| 163 # Chrome prunes Set-Cookie from response headers before storing them in | 164 if request.url not in cache_entries: |
| 164 # disk cache. Also, it adds implicit "Vary: cookie" header to all redirect | 165 continue |
| 165 # response headers. Sandwich manages the cache, but between recording the | 166 # Chrome prunes Set-Cookie from response headers before storing them in |
| 166 # cache and benchmarking the cookie jar is invalidated. This leads to | 167 # disk cache. Also, it adds implicit "Vary: cookie" header to all |
| 167 # invalidation of all cacheable redirects. | 168 # redirect response headers. Sandwich manages the cache, but between |
| 168 raw_headers = request.GetRawResponseHeaders() | 169 # recording the # cache and benchmarking the cookie jar is invalidated. |
| 169 cache_backend.UpdateRawResponseHeaders(request.url, raw_headers) | 170 # This leads to invalidation of all cacheable redirects. |
| 170 # NoState-Prefetch would only fetch the resources, but not parse them. | 171 raw_headers = request.GetRawResponseHeaders() |
| 171 cache_backend.DeleteStreamForKey(request.url, 2) | 172 try: |
| 173 cache_backend.UpdateRawResponseHeaders(request.url, raw_headers) |
| 174 # NoState-Prefetch would only fetch the resources, but not parse them. |
| 175 cache_backend.DeleteStreamForKey(request.url, 2) |
| 176 # Sync operations to actually catch errors here. |
| 177 cache_backend.Sync() |
| 178 except chrome_cache.CacheBackendError as error: |
| 179 # For some reason, sometimes the cachetool can't find key's entry when |
| 180 # not using the online mode. |
| 181 logging.warning('cachetool error: %s', repr(error)) |
| 182 logging.info('Patched cache size: %d bytes' % cache_backend.GetSize()) |
| 172 chrome_cache.ZipDirectoryContent(cache_path, cache_archive_dest_path) | 183 chrome_cache.ZipDirectoryContent(cache_path, cache_archive_dest_path) |
| 173 logging.info('Patched cache size: %d bytes' % cache_backend.GetSize()) | |
| 174 | 184 |
| 175 | 185 |
| 176 def _DiscoverRequests(dependencies_lens, subresource_discoverer): | 186 def _DiscoverRequests(dependencies_lens, subresource_discoverer): |
| 177 trace = dependencies_lens.loading_trace | 187 trace = dependencies_lens.loading_trace |
| 178 first_resource_request = trace.request_track.GetFirstResourceRequest() | 188 first_resource_request = trace.request_track.GetFirstResourceRequest() |
| 179 | 189 |
| 180 if subresource_discoverer == Discoverer.EmptyCache: | 190 if subresource_discoverer == Discoverer.EmptyCache: |
| 181 requests = [] | 191 requests = [] |
| 182 elif subresource_discoverer == Discoverer.FullCache: | 192 elif subresource_discoverer == Discoverer.FullCache: |
| 183 requests = dependencies_lens.loading_trace.request_track.GetEvents() | 193 requests = dependencies_lens.loading_trace.request_track.GetEvents() |
| (...skipping 212 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 396 {URL of all requests: encoded_data_length}, | 406 {URL of all requests: encoded_data_length}, |
| 397 'effective_post_requests': [URLs of POST requests], | 407 'effective_post_requests': [URLs of POST requests], |
| 398 'expected_cached_resources': [URLs of resources expected to be cached], | 408 'expected_cached_resources': [URLs of resources expected to be cached], |
| 399 'successfully_cached': [URLs of cached sub-resources] | 409 'successfully_cached': [URLs of cached sub-resources] |
| 400 } | 410 } |
| 401 """ | 411 """ |
| 402 # TODO(gabadie): What's the best way of propagating errors happening in here? | 412 # TODO(gabadie): What's the best way of propagating errors happening in here? |
| 403 logging.info('lists cached urls from %s' % cache_archive_path) | 413 logging.info('lists cached urls from %s' % cache_archive_path) |
| 404 with common_util.TemporaryDirectory() as cache_directory: | 414 with common_util.TemporaryDirectory() as cache_directory: |
| 405 chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_directory) | 415 chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_directory) |
| 406 cache_keys = set( | 416 cache_keys = set(chrome_cache.CacheBackend( |
| 407 chrome_cache.CacheBackend(cache_directory, 'simple').ListKeys()) | 417 cache_directory, chrome_cache.CacheBackendType.SIMPLE).ListKeys()) |
| 408 trace = loading_trace.LoadingTrace.FromJsonFile(cache_build_trace_path) | 418 trace = loading_trace.LoadingTrace.FromJsonFile(cache_build_trace_path) |
| 409 effective_requests = _ListUrlRequests(trace, _RequestOutcome.All) | 419 effective_requests = _ListUrlRequests(trace, _RequestOutcome.All) |
| 410 effective_post_requests = _ListUrlRequests(trace, _RequestOutcome.Post) | 420 effective_post_requests = _ListUrlRequests(trace, _RequestOutcome.Post) |
| 411 effective_encoded_data_lengths = {} | 421 effective_encoded_data_lengths = {} |
| 412 for request in _FilterOutDataAndIncompleteRequests( | 422 for request in _FilterOutDataAndIncompleteRequests( |
| 413 trace.request_track.GetEvents()): | 423 trace.request_track.GetEvents()): |
| 414 if request.from_disk_cache or request.served_from_cache: | 424 if request.from_disk_cache or request.served_from_cache: |
| 415 # At cache archive creation time, a request might be loaded several times, | 425 # At cache archive creation time, a request might be loaded several times, |
| 416 # but avoid the request.encoded_data_length == 0 if loaded from cache. | 426 # but avoid the request.encoded_data_length == 0 if loaded from cache. |
| 417 continue | 427 continue |
| (...skipping 280 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 698 run_metrics_list = _ProcessRunOutputDir( | 708 run_metrics_list = _ProcessRunOutputDir( |
| 699 cache_validation_result, benchmark_setup, RunBenchmark.path) | 709 cache_validation_result, benchmark_setup, RunBenchmark.path) |
| 700 with open(ProcessRunOutputDir.path, 'w') as csv_file: | 710 with open(ProcessRunOutputDir.path, 'w') as csv_file: |
| 701 writer = csv.DictWriter(csv_file, fieldnames=(additional_column_names + | 711 writer = csv.DictWriter(csv_file, fieldnames=(additional_column_names + |
| 702 sandwich_metrics.COMMON_CSV_COLUMN_NAMES)) | 712 sandwich_metrics.COMMON_CSV_COLUMN_NAMES)) |
| 703 writer.writeheader() | 713 writer.writeheader() |
| 704 for trace_metrics in run_metrics_list: | 714 for trace_metrics in run_metrics_list: |
| 705 writer.writerow(trace_metrics) | 715 writer.writerow(trace_metrics) |
| 706 | 716 |
| 707 self._common_builder.default_final_tasks.append(ProcessRunOutputDir) | 717 self._common_builder.default_final_tasks.append(ProcessRunOutputDir) |
| OLD | NEW |