tools/android/loading/sandwich_prefetch.py - Issue 2112013003: sandwich: Use cachetool's batch mode to speed-up cache processing.

Side by Side Diff: tools/android/loading/sandwich_prefetch.py

Issue 2112013003: sandwich: Use cachetool's batch mode to speed-up cache processing. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@af00

Patch Set: s/Online/Batch Created 4 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« no previous file with comments | « tools/android/loading/request_track.py ('k') | tools/android/loading/sandwich_swr.py » ('j') | no next file with comments »

OLD	NEW
1 # Copyright 2016 The Chromium Authors. All rights reserved.	1 # Copyright 2016 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 """	5 """

6 Implements a task builder for benchmarking effects of NoState Prefetch.	6 Implements a task builder for benchmarking effects of NoState Prefetch.

7 Noticeable steps of the task pipeline:	7 Noticeable steps of the task pipeline:

8 * Save a WPR archive	8 * Save a WPR archive

9 * Process the WPR archive to make all resources cacheable	9 * Process the WPR archive to make all resources cacheable

10 * Process cache archive to patch response headers back to their original	10 * Process cache archive to patch response headers back to their original

(...skipping 98 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
109	109

110	110

111 def _FilterOutDataAndIncompleteRequests(requests):	111 def _FilterOutDataAndIncompleteRequests(requests):

112 for request in filter(lambda r: not r.IsDataRequest(), requests):	112 for request in filter(lambda r: not r.IsDataRequest(), requests):

113 # The protocol is only known once the response has been received. But the	113 # The protocol is only known once the response has been received. But the

114 # trace recording might have been stopped with still some JavaScript	114 # trace recording might have been stopped with still some JavaScript

115 # originated requests that have not received any responses yet.	115 # originated requests that have not received any responses yet.

116 if request.protocol is None:	116 if request.protocol is None:

117 assert not request.HasReceivedResponse()	117 assert not request.HasReceivedResponse()

118 continue	118 continue

119 if request.protocol == 'about':	119 if request.protocol in {'about', 'blob'}:

120 continue	120 continue

121 if request.protocol not in {'http/0.9', 'http/1.0', 'http/1.1'}:	121 if request.protocol not in {'http/0.9', 'http/1.0', 'http/1.1'}:

122 raise RuntimeError('Unknown request protocol {}'.format(request.protocol))	122 raise RuntimeError('Unknown request protocol {}'.format(request.protocol))

123 yield request	123 yield request

124	124

125	125

126 def _PatchCacheArchive(cache_archive_path, loading_trace_path,	126 def _PatchCacheArchive(cache_archive_path, loading_trace_path,

127 cache_archive_dest_path):	127 cache_archive_dest_path):

128 """Patch the cache archive.	128 """Patch the cache archive.

129	129

130 Note: This method update the raw response headers of cache entries' to store	130 Note: This method update the raw response headers of cache entries' to store

131 the ones such as Set-Cookie that were pruned by the	131 the ones such as Set-Cookie that were pruned by the

132 net::HttpCacheTransaction, and remove the stream index 2 holding resource's	132 net::HttpCacheTransaction, and remove the stream index 2 holding resource's

133 compile meta data.	133 compile meta data.

134	134

135 Args:	135 Args:

136 cache_archive_path: Input archive's path to patch.	136 cache_archive_path: Input archive's path to patch.

137 loading_trace_path: Path of the loading trace that have recorded the cache	137 loading_trace_path: Path of the loading trace that have recorded the cache

138 archive <cache_archive_path>.	138 archive <cache_archive_path>.

139 cache_archive_dest_path: Archive destination's path.	139 cache_archive_dest_path: Archive destination's path.

140 """	140 """

	141 logging.info('loading trace: %s', loading_trace_path)

141 trace = loading_trace.LoadingTrace.FromJsonFile(loading_trace_path)	142 trace = loading_trace.LoadingTrace.FromJsonFile(loading_trace_path)

142 with common_util.TemporaryDirectory(prefix='sandwich_tmp') as tmp_path:	143 with common_util.TemporaryDirectory(prefix='sandwich_tmp') as tmp_path:

143 cache_path = os.path.join(tmp_path, 'cache')	144 cache_path = os.path.join(tmp_path, 'cache')

144 chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_path)	145 chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_path)

145 cache_backend = chrome_cache.CacheBackend(cache_path, 'simple')	146 cache_backend = chrome_cache.BatchCacheBackend(

	147 cache_path, chrome_cache.CacheBackendType.SIMPLE)

146 cache_entries = set(cache_backend.ListKeys())	148 cache_entries = set(cache_backend.ListKeys())

147 logging.info('Original cache size: %d bytes' % cache_backend.GetSize())	149 logging.info('Original cache size: %d bytes' % cache_backend.GetSize())

148 for request in _FilterOutDataAndIncompleteRequests(	150 for request in _FilterOutDataAndIncompleteRequests(

149 trace.request_track.GetEvents()):	151 trace.request_track.GetEvents()):

150 # On requests having an upload data stream such as POST requests,	152 # On requests having an upload data stream such as POST requests,

151 # net::HttpCache::GenerateCacheKey() prefixes the cache entry's key with	153 # net::HttpCache::GenerateCacheKey() prefixes the cache entry's key with

152 # the upload data stream's session unique identifier.	154 # the upload data stream's session unique identifier.

153 #	155 #

154 # It is fine to not patch these requests since when reopening Chrome,	156 # It is fine to not patch these requests since when reopening Chrome,

155 # there is no way the entry can be reused since the upload data stream's	157 # there is no way the entry can be reused since the upload data stream's

156 # identifier will be different.	158 # identifier will be different.

157 #	159 #

158 # The fact that these entries are kept in the cache after closing Chrome	160 # The fact that these entries are kept in the cache after closing Chrome

159 # properly by closing the Chrome tab as the ChromeControler.SetSlowDeath()	161 # properly by closing the Chrome tab as the

160 # do is known chrome bug (crbug.com/610725).	162 # ChromeControler.SetSlowDeath() do is known chrome bug

	163 # (crbug.com/610725).

161 if request.url not in cache_entries:	164 if request.url not in cache_entries:

162 continue	165 continue

163 # Chrome prunes Set-Cookie from response headers before storing them in	166 # Chrome prunes Set-Cookie from response headers before storing them in

164 # disk cache. Also, it adds implicit "Vary: cookie" header to all redirect	167 # disk cache. Also, it adds implicit "Vary: cookie" header to all

165 # response headers. Sandwich manages the cache, but between recording the	168 # redirect response headers. Sandwich manages the cache, but between

166 # cache and benchmarking the cookie jar is invalidated. This leads to	169 # recording the # cache and benchmarking the cookie jar is invalidated.

167 # invalidation of all cacheable redirects.	170 # This leads to invalidation of all cacheable redirects.

168 raw_headers = request.GetRawResponseHeaders()	171 raw_headers = request.GetRawResponseHeaders()

169 cache_backend.UpdateRawResponseHeaders(request.url, raw_headers)	172 cache_backend.UpdateRawResponseHeaders(request.url, raw_headers)

170 # NoState-Prefetch would only fetch the resources, but not parse them.	173 # NoState-Prefetch would only fetch the resources, but not parse them.

171 cache_backend.DeleteStreamForKey(request.url, 2)	174 cache_backend.DeleteStreamForKey(request.url, 2)

	175 cache_backend.ProcessBatch()

	176 logging.info('Patched cache size: %d bytes' % cache_backend.GetSize())

172 chrome_cache.ZipDirectoryContent(cache_path, cache_archive_dest_path)	177 chrome_cache.ZipDirectoryContent(cache_path, cache_archive_dest_path)

173 logging.info('Patched cache size: %d bytes' % cache_backend.GetSize())

174	178

175	179

176 def _DiscoverRequests(dependencies_lens, subresource_discoverer):	180 def _DiscoverRequests(dependencies_lens, subresource_discoverer):

177 trace = dependencies_lens.loading_trace	181 trace = dependencies_lens.loading_trace

178 first_resource_request = trace.request_track.GetFirstResourceRequest()	182 first_resource_request = trace.request_track.GetFirstResourceRequest()

179	183

180 if subresource_discoverer == Discoverer.EmptyCache:	184 if subresource_discoverer == Discoverer.EmptyCache:

181 requests = []	185 requests = []

182 elif subresource_discoverer == Discoverer.FullCache:	186 elif subresource_discoverer == Discoverer.FullCache:

183 requests = dependencies_lens.loading_trace.request_track.GetEvents()	187 requests = dependencies_lens.loading_trace.request_track.GetEvents()

(...skipping 212 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
396 {URL of all requests: encoded_data_length},	400 {URL of all requests: encoded_data_length},

397 'effective_post_requests': [URLs of POST requests],	401 'effective_post_requests': [URLs of POST requests],

398 'expected_cached_resources': [URLs of resources expected to be cached],	402 'expected_cached_resources': [URLs of resources expected to be cached],

399 'successfully_cached': [URLs of cached sub-resources]	403 'successfully_cached': [URLs of cached sub-resources]

400 }	404 }

401 """	405 """

402 # TODO(gabadie): What's the best way of propagating errors happening in here?	406 # TODO(gabadie): What's the best way of propagating errors happening in here?

403 logging.info('lists cached urls from %s' % cache_archive_path)	407 logging.info('lists cached urls from %s' % cache_archive_path)

404 with common_util.TemporaryDirectory() as cache_directory:	408 with common_util.TemporaryDirectory() as cache_directory:

405 chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_directory)	409 chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_directory)

406 cache_keys = set(	410 cache_keys = set(chrome_cache.CacheBackend(

407 chrome_cache.CacheBackend(cache_directory, 'simple').ListKeys())	411 cache_directory, chrome_cache.CacheBackendType.SIMPLE).ListKeys())

408 trace = loading_trace.LoadingTrace.FromJsonFile(cache_build_trace_path)	412 trace = loading_trace.LoadingTrace.FromJsonFile(cache_build_trace_path)

409 effective_requests = _ListUrlRequests(trace, _RequestOutcome.All)	413 effective_requests = _ListUrlRequests(trace, _RequestOutcome.All)

410 effective_post_requests = _ListUrlRequests(trace, _RequestOutcome.Post)	414 effective_post_requests = _ListUrlRequests(trace, _RequestOutcome.Post)

411 effective_encoded_data_lengths = {}	415 effective_encoded_data_lengths = {}

412 for request in _FilterOutDataAndIncompleteRequests(	416 for request in _FilterOutDataAndIncompleteRequests(

413 trace.request_track.GetEvents()):	417 trace.request_track.GetEvents()):

414 if request.from_disk_cache or request.served_from_cache:	418 if request.from_disk_cache or request.served_from_cache:

415 # At cache archive creation time, a request might be loaded several times,	419 # At cache archive creation time, a request might be loaded several times,

416 # but avoid the request.encoded_data_length == 0 if loaded from cache.	420 # but avoid the request.encoded_data_length == 0 if loaded from cache.

417 continue	421 continue

(...skipping 280 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
698 run_metrics_list = _ProcessRunOutputDir(	702 run_metrics_list = _ProcessRunOutputDir(

699 cache_validation_result, benchmark_setup, RunBenchmark.path)	703 cache_validation_result, benchmark_setup, RunBenchmark.path)

700 with open(ProcessRunOutputDir.path, 'w') as csv_file:	704 with open(ProcessRunOutputDir.path, 'w') as csv_file:

701 writer = csv.DictWriter(csv_file, fieldnames=(additional_column_names +	705 writer = csv.DictWriter(csv_file, fieldnames=(additional_column_names +

702 sandwich_metrics.COMMON_CSV_COLUMN_NAMES))	706 sandwich_metrics.COMMON_CSV_COLUMN_NAMES))

703 writer.writeheader()	707 writer.writeheader()

704 for trace_metrics in run_metrics_list:	708 for trace_metrics in run_metrics_list:

705 writer.writerow(trace_metrics)	709 writer.writerow(trace_metrics)

706	710

707 self._common_builder.default_final_tasks.append(ProcessRunOutputDir)	711 self._common_builder.default_final_tasks.append(ProcessRunOutputDir)

OLD	NEW