Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(4)

Side by Side Diff: tools/android/loading/sandwich_prefetch.py

Issue 2112013003: sandwich: Use cachetool's batch mode to speed-up cache processing. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@af00
Patch Set: Created 4 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Copyright 2016 The Chromium Authors. All rights reserved. 1 # Copyright 2016 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 """ 5 """
6 Implements a task builder for benchmarking effects of NoState Prefetch. 6 Implements a task builder for benchmarking effects of NoState Prefetch.
7 Noticeable steps of the task pipeline: 7 Noticeable steps of the task pipeline:
8 * Save a WPR archive 8 * Save a WPR archive
9 * Process the WPR archive to make all resources cacheable 9 * Process the WPR archive to make all resources cacheable
10 * Process cache archive to patch response headers back to their original 10 * Process cache archive to patch response headers back to their original
(...skipping 98 matching lines...) Expand 10 before | Expand all | Expand 10 after
109 109
110 110
111 def _FilterOutDataAndIncompleteRequests(requests): 111 def _FilterOutDataAndIncompleteRequests(requests):
112 for request in filter(lambda r: not r.IsDataRequest(), requests): 112 for request in filter(lambda r: not r.IsDataRequest(), requests):
113 # The protocol is only known once the response has been received. But the 113 # The protocol is only known once the response has been received. But the
114 # trace recording might have been stopped with still some JavaScript 114 # trace recording might have been stopped with still some JavaScript
115 # originated requests that have not received any responses yet. 115 # originated requests that have not received any responses yet.
116 if request.protocol is None: 116 if request.protocol is None:
117 assert not request.HasReceivedResponse() 117 assert not request.HasReceivedResponse()
118 continue 118 continue
119 if request.protocol == 'about': 119 if request.protocol in {'about', 'blob'}:
120 continue 120 continue
121 if request.protocol not in {'http/0.9', 'http/1.0', 'http/1.1'}: 121 if request.protocol not in {'http/0.9', 'http/1.0', 'http/1.1'}:
122 raise RuntimeError('Unknown request protocol {}'.format(request.protocol)) 122 raise RuntimeError('Unknown request protocol {}'.format(request.protocol))
123 yield request 123 yield request
124 124
125 125
126 def _PatchCacheArchive(cache_archive_path, loading_trace_path, 126 def _PatchCacheArchive(cache_archive_path, loading_trace_path,
127 cache_archive_dest_path): 127 cache_archive_dest_path):
128 """Patch the cache archive. 128 """Patch the cache archive.
129 129
130 Note: This method update the raw response headers of cache entries' to store 130 Note: This method update the raw response headers of cache entries' to store
131 the ones such as Set-Cookie that were pruned by the 131 the ones such as Set-Cookie that were pruned by the
132 net::HttpCacheTransaction, and remove the stream index 2 holding resource's 132 net::HttpCacheTransaction, and remove the stream index 2 holding resource's
133 compile meta data. 133 compile meta data.
134 134
135 Args: 135 Args:
136 cache_archive_path: Input archive's path to patch. 136 cache_archive_path: Input archive's path to patch.
137 loading_trace_path: Path of the loading trace that have recorded the cache 137 loading_trace_path: Path of the loading trace that have recorded the cache
138 archive <cache_archive_path>. 138 archive <cache_archive_path>.
139 cache_archive_dest_path: Archive destination's path. 139 cache_archive_dest_path: Archive destination's path.
140 """ 140 """
141 logging.info('loading trace: %s', loading_trace_path)
141 trace = loading_trace.LoadingTrace.FromJsonFile(loading_trace_path) 142 trace = loading_trace.LoadingTrace.FromJsonFile(loading_trace_path)
142 with common_util.TemporaryDirectory(prefix='sandwich_tmp') as tmp_path: 143 with common_util.TemporaryDirectory(prefix='sandwich_tmp') as tmp_path:
143 cache_path = os.path.join(tmp_path, 'cache') 144 cache_path = os.path.join(tmp_path, 'cache')
144 chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_path) 145 chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_path)
145 cache_backend = chrome_cache.CacheBackend(cache_path, 'simple') 146 with chrome_cache.OnlineCacheBackend(
146 cache_entries = set(cache_backend.ListKeys()) 147 cache_path, chrome_cache.CacheBackendType.Simple) as cache_backend:
147 logging.info('Original cache size: %d bytes' % cache_backend.GetSize()) 148 cache_entries = set(cache_backend.ListKeys())
148 for request in _FilterOutDataAndIncompleteRequests( 149 logging.info('Original cache size: %d bytes' % cache_backend.GetSize())
149 trace.request_track.GetEvents()): 150 for request in _FilterOutDataAndIncompleteRequests(
150 # On requests having an upload data stream such as POST requests, 151 trace.request_track.GetEvents()):
151 # net::HttpCache::GenerateCacheKey() prefixes the cache entry's key with 152 # On requests having an upload data stream such as POST requests,
152 # the upload data stream's session unique identifier. 153 # net::HttpCache::GenerateCacheKey() prefixes the cache entry's key with
153 # 154 # the upload data stream's session unique identifier.
154 # It is fine to not patch these requests since when reopening Chrome, 155 #
155 # there is no way the entry can be reused since the upload data stream's 156 # It is fine to not patch these requests since when reopening Chrome,
156 # identifier will be different. 157 # there is no way the entry can be reused since the upload data stream's
157 # 158 # identifier will be different.
158 # The fact that these entries are kept in the cache after closing Chrome 159 #
159 # properly by closing the Chrome tab as the ChromeControler.SetSlowDeath() 160 # The fact that these entries are kept in the cache after closing Chrome
160 # do is known chrome bug (crbug.com/610725). 161 # properly by closing the Chrome tab as the
161 if request.url not in cache_entries: 162 # ChromeControler.SetSlowDeath() do is known chrome bug
162 continue 163 # (crbug.com/610725).
163 # Chrome prunes Set-Cookie from response headers before storing them in 164 if request.url not in cache_entries:
164 # disk cache. Also, it adds implicit "Vary: cookie" header to all redirect 165 continue
165 # response headers. Sandwich manages the cache, but between recording the 166 # Chrome prunes Set-Cookie from response headers before storing them in
166 # cache and benchmarking the cookie jar is invalidated. This leads to 167 # disk cache. Also, it adds implicit "Vary: cookie" header to all
167 # invalidation of all cacheable redirects. 168 # redirect response headers. Sandwich manages the cache, but between
168 raw_headers = request.GetRawResponseHeaders() 169 # recording the # cache and benchmarking the cookie jar is invalidated.
169 cache_backend.UpdateRawResponseHeaders(request.url, raw_headers) 170 # This leads to invalidation of all cacheable redirects.
170 # NoState-Prefetch would only fetch the resources, but not parse them. 171 raw_headers = request.GetRawResponseHeaders()
171 cache_backend.DeleteStreamForKey(request.url, 2) 172 try:
173 cache_backend.UpdateRawResponseHeaders(request.url, raw_headers)
174 # NoState-Prefetch would only fetch the resources, but not parse them.
175 cache_backend.DeleteStreamForKey(request.url, 2)
176 # Sync operations to actually catch errors here.
177 cache_backend.Sync()
178 except chrome_cache.CacheBackendError as error:
179 # For some reason, sometimes the cachetool can't find key's entry when
180 # not using the online mode.
181 logging.warning('cachetool error: %s', repr(error))
182 logging.info('Patched cache size: %d bytes' % cache_backend.GetSize())
172 chrome_cache.ZipDirectoryContent(cache_path, cache_archive_dest_path) 183 chrome_cache.ZipDirectoryContent(cache_path, cache_archive_dest_path)
173 logging.info('Patched cache size: %d bytes' % cache_backend.GetSize())
174 184
175 185
176 def _DiscoverRequests(dependencies_lens, subresource_discoverer): 186 def _DiscoverRequests(dependencies_lens, subresource_discoverer):
177 trace = dependencies_lens.loading_trace 187 trace = dependencies_lens.loading_trace
178 first_resource_request = trace.request_track.GetFirstResourceRequest() 188 first_resource_request = trace.request_track.GetFirstResourceRequest()
179 189
180 if subresource_discoverer == Discoverer.EmptyCache: 190 if subresource_discoverer == Discoverer.EmptyCache:
181 requests = [] 191 requests = []
182 elif subresource_discoverer == Discoverer.FullCache: 192 elif subresource_discoverer == Discoverer.FullCache:
183 requests = dependencies_lens.loading_trace.request_track.GetEvents() 193 requests = dependencies_lens.loading_trace.request_track.GetEvents()
(...skipping 212 matching lines...) Expand 10 before | Expand all | Expand 10 after
396 {URL of all requests: encoded_data_length}, 406 {URL of all requests: encoded_data_length},
397 'effective_post_requests': [URLs of POST requests], 407 'effective_post_requests': [URLs of POST requests],
398 'expected_cached_resources': [URLs of resources expected to be cached], 408 'expected_cached_resources': [URLs of resources expected to be cached],
399 'successfully_cached': [URLs of cached sub-resources] 409 'successfully_cached': [URLs of cached sub-resources]
400 } 410 }
401 """ 411 """
402 # TODO(gabadie): What's the best way of propagating errors happening in here? 412 # TODO(gabadie): What's the best way of propagating errors happening in here?
403 logging.info('lists cached urls from %s' % cache_archive_path) 413 logging.info('lists cached urls from %s' % cache_archive_path)
404 with common_util.TemporaryDirectory() as cache_directory: 414 with common_util.TemporaryDirectory() as cache_directory:
405 chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_directory) 415 chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_directory)
406 cache_keys = set( 416 cache_keys = set(chrome_cache.CacheBackend(
407 chrome_cache.CacheBackend(cache_directory, 'simple').ListKeys()) 417 cache_directory, chrome_cache.CacheBackendType.Simple).ListKeys())
408 trace = loading_trace.LoadingTrace.FromJsonFile(cache_build_trace_path) 418 trace = loading_trace.LoadingTrace.FromJsonFile(cache_build_trace_path)
409 effective_requests = _ListUrlRequests(trace, _RequestOutcome.All) 419 effective_requests = _ListUrlRequests(trace, _RequestOutcome.All)
410 effective_post_requests = _ListUrlRequests(trace, _RequestOutcome.Post) 420 effective_post_requests = _ListUrlRequests(trace, _RequestOutcome.Post)
411 effective_encoded_data_lengths = {} 421 effective_encoded_data_lengths = {}
412 for request in _FilterOutDataAndIncompleteRequests( 422 for request in _FilterOutDataAndIncompleteRequests(
413 trace.request_track.GetEvents()): 423 trace.request_track.GetEvents()):
414 if request.from_disk_cache or request.served_from_cache: 424 if request.from_disk_cache or request.served_from_cache:
415 # At cache archive creation time, a request might be loaded several times, 425 # At cache archive creation time, a request might be loaded several times,
416 # but avoid the request.encoded_data_length == 0 if loaded from cache. 426 # but avoid the request.encoded_data_length == 0 if loaded from cache.
417 continue 427 continue
(...skipping 280 matching lines...) Expand 10 before | Expand all | Expand 10 after
698 run_metrics_list = _ProcessRunOutputDir( 708 run_metrics_list = _ProcessRunOutputDir(
699 cache_validation_result, benchmark_setup, RunBenchmark.path) 709 cache_validation_result, benchmark_setup, RunBenchmark.path)
700 with open(ProcessRunOutputDir.path, 'w') as csv_file: 710 with open(ProcessRunOutputDir.path, 'w') as csv_file:
701 writer = csv.DictWriter(csv_file, fieldnames=(additional_column_names + 711 writer = csv.DictWriter(csv_file, fieldnames=(additional_column_names +
702 sandwich_metrics.COMMON_CSV_COLUMN_NAMES)) 712 sandwich_metrics.COMMON_CSV_COLUMN_NAMES))
703 writer.writeheader() 713 writer.writeheader()
704 for trace_metrics in run_metrics_list: 714 for trace_metrics in run_metrics_list:
705 writer.writerow(trace_metrics) 715 writer.writerow(trace_metrics)
706 716
707 self._common_builder.default_final_tasks.append(ProcessRunOutputDir) 717 self._common_builder.default_final_tasks.append(ProcessRunOutputDir)
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698