Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(5)

Side by Side Diff: tools/android/loading/sandwich_prefetch.py

Issue 2112013003: sandwich: Use cachetool's batch mode to speed-up cache processing. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@af00
Patch Set: s/Online/Batch Created 4 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « tools/android/loading/request_track.py ('k') | tools/android/loading/sandwich_swr.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2016 The Chromium Authors. All rights reserved. 1 # Copyright 2016 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 """ 5 """
6 Implements a task builder for benchmarking effects of NoState Prefetch. 6 Implements a task builder for benchmarking effects of NoState Prefetch.
7 Noticeable steps of the task pipeline: 7 Noticeable steps of the task pipeline:
8 * Save a WPR archive 8 * Save a WPR archive
9 * Process the WPR archive to make all resources cacheable 9 * Process the WPR archive to make all resources cacheable
10 * Process cache archive to patch response headers back to their original 10 * Process cache archive to patch response headers back to their original
(...skipping 98 matching lines...) Expand 10 before | Expand all | Expand 10 after
109 109
110 110
111 def _FilterOutDataAndIncompleteRequests(requests): 111 def _FilterOutDataAndIncompleteRequests(requests):
112 for request in filter(lambda r: not r.IsDataRequest(), requests): 112 for request in filter(lambda r: not r.IsDataRequest(), requests):
113 # The protocol is only known once the response has been received. But the 113 # The protocol is only known once the response has been received. But the
114 # trace recording might have been stopped with still some JavaScript 114 # trace recording might have been stopped with still some JavaScript
115 # originated requests that have not received any responses yet. 115 # originated requests that have not received any responses yet.
116 if request.protocol is None: 116 if request.protocol is None:
117 assert not request.HasReceivedResponse() 117 assert not request.HasReceivedResponse()
118 continue 118 continue
119 if request.protocol == 'about': 119 if request.protocol in {'about', 'blob'}:
120 continue 120 continue
121 if request.protocol not in {'http/0.9', 'http/1.0', 'http/1.1'}: 121 if request.protocol not in {'http/0.9', 'http/1.0', 'http/1.1'}:
122 raise RuntimeError('Unknown request protocol {}'.format(request.protocol)) 122 raise RuntimeError('Unknown request protocol {}'.format(request.protocol))
123 yield request 123 yield request
124 124
125 125
126 def _PatchCacheArchive(cache_archive_path, loading_trace_path, 126 def _PatchCacheArchive(cache_archive_path, loading_trace_path,
127 cache_archive_dest_path): 127 cache_archive_dest_path):
128 """Patch the cache archive. 128 """Patch the cache archive.
129 129
130 Note: This method update the raw response headers of cache entries' to store 130 Note: This method update the raw response headers of cache entries' to store
131 the ones such as Set-Cookie that were pruned by the 131 the ones such as Set-Cookie that were pruned by the
132 net::HttpCacheTransaction, and remove the stream index 2 holding resource's 132 net::HttpCacheTransaction, and remove the stream index 2 holding resource's
133 compile meta data. 133 compile meta data.
134 134
135 Args: 135 Args:
136 cache_archive_path: Input archive's path to patch. 136 cache_archive_path: Input archive's path to patch.
137 loading_trace_path: Path of the loading trace that have recorded the cache 137 loading_trace_path: Path of the loading trace that have recorded the cache
138 archive <cache_archive_path>. 138 archive <cache_archive_path>.
139 cache_archive_dest_path: Archive destination's path. 139 cache_archive_dest_path: Archive destination's path.
140 """ 140 """
141 logging.info('loading trace: %s', loading_trace_path)
141 trace = loading_trace.LoadingTrace.FromJsonFile(loading_trace_path) 142 trace = loading_trace.LoadingTrace.FromJsonFile(loading_trace_path)
142 with common_util.TemporaryDirectory(prefix='sandwich_tmp') as tmp_path: 143 with common_util.TemporaryDirectory(prefix='sandwich_tmp') as tmp_path:
143 cache_path = os.path.join(tmp_path, 'cache') 144 cache_path = os.path.join(tmp_path, 'cache')
144 chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_path) 145 chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_path)
145 cache_backend = chrome_cache.CacheBackend(cache_path, 'simple') 146 cache_backend = chrome_cache.BatchCacheBackend(
147 cache_path, chrome_cache.CacheBackendType.SIMPLE)
146 cache_entries = set(cache_backend.ListKeys()) 148 cache_entries = set(cache_backend.ListKeys())
147 logging.info('Original cache size: %d bytes' % cache_backend.GetSize()) 149 logging.info('Original cache size: %d bytes' % cache_backend.GetSize())
148 for request in _FilterOutDataAndIncompleteRequests( 150 for request in _FilterOutDataAndIncompleteRequests(
149 trace.request_track.GetEvents()): 151 trace.request_track.GetEvents()):
150 # On requests having an upload data stream such as POST requests, 152 # On requests having an upload data stream such as POST requests,
151 # net::HttpCache::GenerateCacheKey() prefixes the cache entry's key with 153 # net::HttpCache::GenerateCacheKey() prefixes the cache entry's key with
152 # the upload data stream's session unique identifier. 154 # the upload data stream's session unique identifier.
153 # 155 #
154 # It is fine to not patch these requests since when reopening Chrome, 156 # It is fine to not patch these requests since when reopening Chrome,
155 # there is no way the entry can be reused since the upload data stream's 157 # there is no way the entry can be reused since the upload data stream's
156 # identifier will be different. 158 # identifier will be different.
157 # 159 #
158 # The fact that these entries are kept in the cache after closing Chrome 160 # The fact that these entries are kept in the cache after closing Chrome
159 # properly by closing the Chrome tab as the ChromeControler.SetSlowDeath() 161 # properly by closing the Chrome tab as the
160 # do is known chrome bug (crbug.com/610725). 162 # ChromeControler.SetSlowDeath() do is known chrome bug
163 # (crbug.com/610725).
161 if request.url not in cache_entries: 164 if request.url not in cache_entries:
162 continue 165 continue
163 # Chrome prunes Set-Cookie from response headers before storing them in 166 # Chrome prunes Set-Cookie from response headers before storing them in
164 # disk cache. Also, it adds implicit "Vary: cookie" header to all redirect 167 # disk cache. Also, it adds implicit "Vary: cookie" header to all
165 # response headers. Sandwich manages the cache, but between recording the 168 # redirect response headers. Sandwich manages the cache, but between
166 # cache and benchmarking the cookie jar is invalidated. This leads to 169 # recording the # cache and benchmarking the cookie jar is invalidated.
167 # invalidation of all cacheable redirects. 170 # This leads to invalidation of all cacheable redirects.
168 raw_headers = request.GetRawResponseHeaders() 171 raw_headers = request.GetRawResponseHeaders()
169 cache_backend.UpdateRawResponseHeaders(request.url, raw_headers) 172 cache_backend.UpdateRawResponseHeaders(request.url, raw_headers)
170 # NoState-Prefetch would only fetch the resources, but not parse them. 173 # NoState-Prefetch would only fetch the resources, but not parse them.
171 cache_backend.DeleteStreamForKey(request.url, 2) 174 cache_backend.DeleteStreamForKey(request.url, 2)
175 cache_backend.ProcessBatch()
176 logging.info('Patched cache size: %d bytes' % cache_backend.GetSize())
172 chrome_cache.ZipDirectoryContent(cache_path, cache_archive_dest_path) 177 chrome_cache.ZipDirectoryContent(cache_path, cache_archive_dest_path)
173 logging.info('Patched cache size: %d bytes' % cache_backend.GetSize())
174 178
175 179
176 def _DiscoverRequests(dependencies_lens, subresource_discoverer): 180 def _DiscoverRequests(dependencies_lens, subresource_discoverer):
177 trace = dependencies_lens.loading_trace 181 trace = dependencies_lens.loading_trace
178 first_resource_request = trace.request_track.GetFirstResourceRequest() 182 first_resource_request = trace.request_track.GetFirstResourceRequest()
179 183
180 if subresource_discoverer == Discoverer.EmptyCache: 184 if subresource_discoverer == Discoverer.EmptyCache:
181 requests = [] 185 requests = []
182 elif subresource_discoverer == Discoverer.FullCache: 186 elif subresource_discoverer == Discoverer.FullCache:
183 requests = dependencies_lens.loading_trace.request_track.GetEvents() 187 requests = dependencies_lens.loading_trace.request_track.GetEvents()
(...skipping 212 matching lines...) Expand 10 before | Expand all | Expand 10 after
396 {URL of all requests: encoded_data_length}, 400 {URL of all requests: encoded_data_length},
397 'effective_post_requests': [URLs of POST requests], 401 'effective_post_requests': [URLs of POST requests],
398 'expected_cached_resources': [URLs of resources expected to be cached], 402 'expected_cached_resources': [URLs of resources expected to be cached],
399 'successfully_cached': [URLs of cached sub-resources] 403 'successfully_cached': [URLs of cached sub-resources]
400 } 404 }
401 """ 405 """
402 # TODO(gabadie): What's the best way of propagating errors happening in here? 406 # TODO(gabadie): What's the best way of propagating errors happening in here?
403 logging.info('lists cached urls from %s' % cache_archive_path) 407 logging.info('lists cached urls from %s' % cache_archive_path)
404 with common_util.TemporaryDirectory() as cache_directory: 408 with common_util.TemporaryDirectory() as cache_directory:
405 chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_directory) 409 chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_directory)
406 cache_keys = set( 410 cache_keys = set(chrome_cache.CacheBackend(
407 chrome_cache.CacheBackend(cache_directory, 'simple').ListKeys()) 411 cache_directory, chrome_cache.CacheBackendType.SIMPLE).ListKeys())
408 trace = loading_trace.LoadingTrace.FromJsonFile(cache_build_trace_path) 412 trace = loading_trace.LoadingTrace.FromJsonFile(cache_build_trace_path)
409 effective_requests = _ListUrlRequests(trace, _RequestOutcome.All) 413 effective_requests = _ListUrlRequests(trace, _RequestOutcome.All)
410 effective_post_requests = _ListUrlRequests(trace, _RequestOutcome.Post) 414 effective_post_requests = _ListUrlRequests(trace, _RequestOutcome.Post)
411 effective_encoded_data_lengths = {} 415 effective_encoded_data_lengths = {}
412 for request in _FilterOutDataAndIncompleteRequests( 416 for request in _FilterOutDataAndIncompleteRequests(
413 trace.request_track.GetEvents()): 417 trace.request_track.GetEvents()):
414 if request.from_disk_cache or request.served_from_cache: 418 if request.from_disk_cache or request.served_from_cache:
415 # At cache archive creation time, a request might be loaded several times, 419 # At cache archive creation time, a request might be loaded several times,
416 # but avoid the request.encoded_data_length == 0 if loaded from cache. 420 # but avoid the request.encoded_data_length == 0 if loaded from cache.
417 continue 421 continue
(...skipping 280 matching lines...) Expand 10 before | Expand all | Expand 10 after
698 run_metrics_list = _ProcessRunOutputDir( 702 run_metrics_list = _ProcessRunOutputDir(
699 cache_validation_result, benchmark_setup, RunBenchmark.path) 703 cache_validation_result, benchmark_setup, RunBenchmark.path)
700 with open(ProcessRunOutputDir.path, 'w') as csv_file: 704 with open(ProcessRunOutputDir.path, 'w') as csv_file:
701 writer = csv.DictWriter(csv_file, fieldnames=(additional_column_names + 705 writer = csv.DictWriter(csv_file, fieldnames=(additional_column_names +
702 sandwich_metrics.COMMON_CSV_COLUMN_NAMES)) 706 sandwich_metrics.COMMON_CSV_COLUMN_NAMES))
703 writer.writeheader() 707 writer.writeheader()
704 for trace_metrics in run_metrics_list: 708 for trace_metrics in run_metrics_list:
705 writer.writerow(trace_metrics) 709 writer.writerow(trace_metrics)
706 710
707 self._common_builder.default_final_tasks.append(ProcessRunOutputDir) 711 self._common_builder.default_final_tasks.append(ProcessRunOutputDir)
OLDNEW
« no previous file with comments | « tools/android/loading/request_track.py ('k') | tools/android/loading/sandwich_swr.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698