| OLD | NEW | 
|    1 # Copyright 2016 The Chromium Authors. All rights reserved. |    1 # Copyright 2016 The Chromium Authors. All rights reserved. | 
|    2 # Use of this source code is governed by a BSD-style license that can be |    2 # Use of this source code is governed by a BSD-style license that can be | 
|    3 # found in the LICENSE file. |    3 # found in the LICENSE file. | 
|    4  |    4  | 
|    5 """ |    5 """ | 
|    6 Implements a task builder for benchmarking effects of NoState Prefetch. |    6 Implements a task builder for benchmarking effects of NoState Prefetch. | 
|    7 Noticeable steps of the task pipeline: |    7 Noticeable steps of the task pipeline: | 
|    8   * Save a WPR archive |    8   * Save a WPR archive | 
|    9   * Process the WPR archive to make all resources cacheable |    9   * Process the WPR archive to make all resources cacheable | 
|   10   * Process cache archive to patch response headers back to their original |   10   * Process cache archive to patch response headers back to their original | 
| (...skipping 98 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
|  109  |  109  | 
|  110  |  110  | 
|  111 def _FilterOutDataAndIncompleteRequests(requests): |  111 def _FilterOutDataAndIncompleteRequests(requests): | 
|  112   for request in filter(lambda r: not r.IsDataRequest(), requests): |  112   for request in filter(lambda r: not r.IsDataRequest(), requests): | 
|  113     # The protocol is only known once the response has been received. But the |  113     # The protocol is only known once the response has been received. But the | 
|  114     # trace recording might have been stopped with still some JavaScript |  114     # trace recording might have been stopped with still some JavaScript | 
|  115     # originated requests that have not received any responses yet. |  115     # originated requests that have not received any responses yet. | 
|  116     if request.protocol is None: |  116     if request.protocol is None: | 
|  117       assert not request.HasReceivedResponse() |  117       assert not request.HasReceivedResponse() | 
|  118       continue |  118       continue | 
|  119     if request.protocol == 'about': |  119     if request.protocol in {'about', 'blob'}: | 
|  120       continue |  120       continue | 
|  121     if request.protocol not in {'http/0.9', 'http/1.0', 'http/1.1'}: |  121     if request.protocol not in {'http/0.9', 'http/1.0', 'http/1.1'}: | 
|  122       raise RuntimeError('Unknown request protocol {}'.format(request.protocol)) |  122       raise RuntimeError('Unknown request protocol {}'.format(request.protocol)) | 
|  123     yield request |  123     yield request | 
|  124  |  124  | 
|  125  |  125  | 
|  126 def _PatchCacheArchive(cache_archive_path, loading_trace_path, |  126 def _PatchCacheArchive(cache_archive_path, loading_trace_path, | 
|  127                        cache_archive_dest_path): |  127                        cache_archive_dest_path): | 
|  128   """Patch the cache archive. |  128   """Patch the cache archive. | 
|  129  |  129  | 
|  130   Note: This method update the raw response headers of cache entries' to store |  130   Note: This method update the raw response headers of cache entries' to store | 
|  131     the ones such as Set-Cookie that were pruned by the |  131     the ones such as Set-Cookie that were pruned by the | 
|  132     net::HttpCacheTransaction, and remove the stream index 2 holding resource's |  132     net::HttpCacheTransaction, and remove the stream index 2 holding resource's | 
|  133     compile meta data. |  133     compile meta data. | 
|  134  |  134  | 
|  135   Args: |  135   Args: | 
|  136     cache_archive_path: Input archive's path to patch. |  136     cache_archive_path: Input archive's path to patch. | 
|  137     loading_trace_path: Path of the loading trace that have recorded the cache |  137     loading_trace_path: Path of the loading trace that have recorded the cache | 
|  138         archive <cache_archive_path>. |  138         archive <cache_archive_path>. | 
|  139     cache_archive_dest_path: Archive destination's path. |  139     cache_archive_dest_path: Archive destination's path. | 
|  140   """ |  140   """ | 
 |  141   logging.info('loading trace: %s', loading_trace_path) | 
|  141   trace = loading_trace.LoadingTrace.FromJsonFile(loading_trace_path) |  142   trace = loading_trace.LoadingTrace.FromJsonFile(loading_trace_path) | 
|  142   with common_util.TemporaryDirectory(prefix='sandwich_tmp') as tmp_path: |  143   with common_util.TemporaryDirectory(prefix='sandwich_tmp') as tmp_path: | 
|  143     cache_path = os.path.join(tmp_path, 'cache') |  144     cache_path = os.path.join(tmp_path, 'cache') | 
|  144     chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_path) |  145     chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_path) | 
|  145     cache_backend = chrome_cache.CacheBackend(cache_path, 'simple') |  146     with chrome_cache.OnlineCacheBackend( | 
|  146     cache_entries = set(cache_backend.ListKeys()) |  147         cache_path, chrome_cache.CacheBackendType.Simple) as cache_backend: | 
|  147     logging.info('Original cache size: %d bytes' % cache_backend.GetSize()) |  148       cache_entries = set(cache_backend.ListKeys()) | 
|  148     for request in _FilterOutDataAndIncompleteRequests( |  149       logging.info('Original cache size: %d bytes' % cache_backend.GetSize()) | 
|  149         trace.request_track.GetEvents()): |  150       for request in _FilterOutDataAndIncompleteRequests( | 
|  150       # On requests having an upload data stream such as POST requests, |  151           trace.request_track.GetEvents()): | 
|  151       # net::HttpCache::GenerateCacheKey() prefixes the cache entry's key with |  152         # On requests having an upload data stream such as POST requests, | 
|  152       # the upload data stream's session unique identifier. |  153         # net::HttpCache::GenerateCacheKey() prefixes the cache entry's key with | 
|  153       # |  154         # the upload data stream's session unique identifier. | 
|  154       # It is fine to not patch these requests since when reopening Chrome, |  155         # | 
|  155       # there is no way the entry can be reused since the upload data stream's |  156         # It is fine to not patch these requests since when reopening Chrome, | 
|  156       # identifier will be different. |  157         # there is no way the entry can be reused since the upload data stream's | 
|  157       # |  158         # identifier will be different. | 
|  158       # The fact that these entries are kept in the cache after closing Chrome |  159         # | 
|  159       # properly by closing the Chrome tab as the ChromeControler.SetSlowDeath() |  160         # The fact that these entries are kept in the cache after closing Chrome | 
|  160       # do is known chrome bug (crbug.com/610725). |  161         # properly by closing the Chrome tab as the | 
|  161       if request.url not in cache_entries: |  162         # ChromeControler.SetSlowDeath() do is known chrome bug | 
|  162         continue |  163         # (crbug.com/610725). | 
|  163       # Chrome prunes Set-Cookie from response headers before storing them in |  164         if request.url not in cache_entries: | 
|  164       # disk cache. Also, it adds implicit "Vary: cookie" header to all redirect |  165           continue | 
|  165       # response headers. Sandwich manages the cache, but between recording the |  166         # Chrome prunes Set-Cookie from response headers before storing them in | 
|  166       # cache and benchmarking the cookie jar is invalidated. This leads to |  167         # disk cache. Also, it adds implicit "Vary: cookie" header to all | 
|  167       # invalidation of all cacheable redirects. |  168         # redirect response headers. Sandwich manages the cache, but between | 
|  168       raw_headers = request.GetRawResponseHeaders() |  169         # recording the # cache and benchmarking the cookie jar is invalidated. | 
|  169       cache_backend.UpdateRawResponseHeaders(request.url, raw_headers) |  170         # This leads to invalidation of all cacheable redirects. | 
|  170       # NoState-Prefetch would only fetch the resources, but not parse them. |  171         raw_headers = request.GetRawResponseHeaders() | 
|  171       cache_backend.DeleteStreamForKey(request.url, 2) |  172         try: | 
 |  173           cache_backend.UpdateRawResponseHeaders(request.url, raw_headers) | 
 |  174           # NoState-Prefetch would only fetch the resources, but not parse them. | 
 |  175           cache_backend.DeleteStreamForKey(request.url, 2) | 
 |  176           # Sync operations to actually catch errors here. | 
 |  177           cache_backend.Sync() | 
 |  178         except chrome_cache.CacheBackendError as error: | 
 |  179           # For some reason, sometimes the cachetool can't find key's entry when | 
 |  180           # not using the online mode. | 
 |  181           logging.warning('cachetool error: %s', repr(error)) | 
 |  182       logging.info('Patched cache size: %d bytes' % cache_backend.GetSize()) | 
|  172     chrome_cache.ZipDirectoryContent(cache_path, cache_archive_dest_path) |  183     chrome_cache.ZipDirectoryContent(cache_path, cache_archive_dest_path) | 
|  173     logging.info('Patched cache size: %d bytes' % cache_backend.GetSize()) |  | 
|  174  |  184  | 
|  175  |  185  | 
|  176 def _DiscoverRequests(dependencies_lens, subresource_discoverer): |  186 def _DiscoverRequests(dependencies_lens, subresource_discoverer): | 
|  177   trace = dependencies_lens.loading_trace |  187   trace = dependencies_lens.loading_trace | 
|  178   first_resource_request = trace.request_track.GetFirstResourceRequest() |  188   first_resource_request = trace.request_track.GetFirstResourceRequest() | 
|  179  |  189  | 
|  180   if subresource_discoverer == Discoverer.EmptyCache: |  190   if subresource_discoverer == Discoverer.EmptyCache: | 
|  181     requests = [] |  191     requests = [] | 
|  182   elif subresource_discoverer == Discoverer.FullCache: |  192   elif subresource_discoverer == Discoverer.FullCache: | 
|  183     requests = dependencies_lens.loading_trace.request_track.GetEvents() |  193     requests = dependencies_lens.loading_trace.request_track.GetEvents() | 
| (...skipping 212 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
|  396         {URL of all requests: encoded_data_length}, |  406         {URL of all requests: encoded_data_length}, | 
|  397       'effective_post_requests': [URLs of POST requests], |  407       'effective_post_requests': [URLs of POST requests], | 
|  398       'expected_cached_resources': [URLs of resources expected to be cached], |  408       'expected_cached_resources': [URLs of resources expected to be cached], | 
|  399       'successfully_cached': [URLs of cached sub-resources] |  409       'successfully_cached': [URLs of cached sub-resources] | 
|  400     } |  410     } | 
|  401   """ |  411   """ | 
|  402   # TODO(gabadie): What's the best way of propagating errors happening in here? |  412   # TODO(gabadie): What's the best way of propagating errors happening in here? | 
|  403   logging.info('lists cached urls from %s' % cache_archive_path) |  413   logging.info('lists cached urls from %s' % cache_archive_path) | 
|  404   with common_util.TemporaryDirectory() as cache_directory: |  414   with common_util.TemporaryDirectory() as cache_directory: | 
|  405     chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_directory) |  415     chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_directory) | 
|  406     cache_keys = set( |  416     cache_keys = set(chrome_cache.CacheBackend( | 
|  407         chrome_cache.CacheBackend(cache_directory, 'simple').ListKeys()) |  417         cache_directory, chrome_cache.CacheBackendType.Simple).ListKeys()) | 
|  408   trace = loading_trace.LoadingTrace.FromJsonFile(cache_build_trace_path) |  418   trace = loading_trace.LoadingTrace.FromJsonFile(cache_build_trace_path) | 
|  409   effective_requests = _ListUrlRequests(trace, _RequestOutcome.All) |  419   effective_requests = _ListUrlRequests(trace, _RequestOutcome.All) | 
|  410   effective_post_requests = _ListUrlRequests(trace, _RequestOutcome.Post) |  420   effective_post_requests = _ListUrlRequests(trace, _RequestOutcome.Post) | 
|  411   effective_encoded_data_lengths = {} |  421   effective_encoded_data_lengths = {} | 
|  412   for request in _FilterOutDataAndIncompleteRequests( |  422   for request in _FilterOutDataAndIncompleteRequests( | 
|  413       trace.request_track.GetEvents()): |  423       trace.request_track.GetEvents()): | 
|  414     if request.from_disk_cache or request.served_from_cache: |  424     if request.from_disk_cache or request.served_from_cache: | 
|  415       # At cache archive creation time, a request might be loaded several times, |  425       # At cache archive creation time, a request might be loaded several times, | 
|  416       # but avoid the request.encoded_data_length == 0 if loaded from cache. |  426       # but avoid the request.encoded_data_length == 0 if loaded from cache. | 
|  417       continue |  427       continue | 
| (...skipping 280 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
|  698       run_metrics_list = _ProcessRunOutputDir( |  708       run_metrics_list = _ProcessRunOutputDir( | 
|  699           cache_validation_result, benchmark_setup, RunBenchmark.path) |  709           cache_validation_result, benchmark_setup, RunBenchmark.path) | 
|  700       with open(ProcessRunOutputDir.path, 'w') as csv_file: |  710       with open(ProcessRunOutputDir.path, 'w') as csv_file: | 
|  701         writer = csv.DictWriter(csv_file, fieldnames=(additional_column_names + |  711         writer = csv.DictWriter(csv_file, fieldnames=(additional_column_names + | 
|  702                                     sandwich_metrics.COMMON_CSV_COLUMN_NAMES)) |  712                                     sandwich_metrics.COMMON_CSV_COLUMN_NAMES)) | 
|  703         writer.writeheader() |  713         writer.writeheader() | 
|  704         for trace_metrics in run_metrics_list: |  714         for trace_metrics in run_metrics_list: | 
|  705           writer.writerow(trace_metrics) |  715           writer.writerow(trace_metrics) | 
|  706  |  716  | 
|  707     self._common_builder.default_final_tasks.append(ProcessRunOutputDir) |  717     self._common_builder.default_final_tasks.append(ProcessRunOutputDir) | 
| OLD | NEW |