tools/android/loading/sandwich_prefetch.py - Issue 2112013003: sandwich: Use cachetool's batch mode to speed-up cache processing.

Side by Side Diff: tools/android/loading/sandwich_prefetch.py

Issue 2112013003: sandwich: Use cachetool's batch mode to speed-up cache processing. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@af00

Patch Set: Created 4 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 # Copyright 2016 The Chromium Authors. All rights reserved.	1 # Copyright 2016 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 """	5 """

6 Implements a task builder for benchmarking effects of NoState Prefetch.	6 Implements a task builder for benchmarking effects of NoState Prefetch.

7 Noticeable steps of the task pipeline:	7 Noticeable steps of the task pipeline:

8 * Save a WPR archive	8 * Save a WPR archive

9 * Process the WPR archive to make all resources cacheable	9 * Process the WPR archive to make all resources cacheable

10 * Process cache archive to patch response headers back to their original	10 * Process cache archive to patch response headers back to their original

(...skipping 98 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
109	109

110	110

111 def _FilterOutDataAndIncompleteRequests(requests):	111 def _FilterOutDataAndIncompleteRequests(requests):

112 for request in filter(lambda r: not r.IsDataRequest(), requests):	112 for request in filter(lambda r: not r.IsDataRequest(), requests):

113 # The protocol is only known once the response has been received. But the	113 # The protocol is only known once the response has been received. But the

114 # trace recording might have been stopped with still some JavaScript	114 # trace recording might have been stopped with still some JavaScript

115 # originated requests that have not received any responses yet.	115 # originated requests that have not received any responses yet.

116 if request.protocol is None:	116 if request.protocol is None:

117 assert not request.HasReceivedResponse()	117 assert not request.HasReceivedResponse()

118 continue	118 continue

119 if request.protocol == 'about':	119 if request.protocol in {'about', 'blob'}:

120 continue	120 continue

121 if request.protocol not in {'http/0.9', 'http/1.0', 'http/1.1'}:	121 if request.protocol not in {'http/0.9', 'http/1.0', 'http/1.1'}:

122 raise RuntimeError('Unknown request protocol {}'.format(request.protocol))	122 raise RuntimeError('Unknown request protocol {}'.format(request.protocol))

123 yield request	123 yield request

124	124

125	125

126 def _PatchCacheArchive(cache_archive_path, loading_trace_path,	126 def _PatchCacheArchive(cache_archive_path, loading_trace_path,

127 cache_archive_dest_path):	127 cache_archive_dest_path):

128 """Patch the cache archive.	128 """Patch the cache archive.

129	129

130 Note: This method update the raw response headers of cache entries' to store	130 Note: This method update the raw response headers of cache entries' to store

131 the ones such as Set-Cookie that were pruned by the	131 the ones such as Set-Cookie that were pruned by the

132 net::HttpCacheTransaction, and remove the stream index 2 holding resource's	132 net::HttpCacheTransaction, and remove the stream index 2 holding resource's

133 compile meta data.	133 compile meta data.

134	134

135 Args:	135 Args:

136 cache_archive_path: Input archive's path to patch.	136 cache_archive_path: Input archive's path to patch.

137 loading_trace_path: Path of the loading trace that have recorded the cache	137 loading_trace_path: Path of the loading trace that have recorded the cache

138 archive <cache_archive_path>.	138 archive <cache_archive_path>.

139 cache_archive_dest_path: Archive destination's path.	139 cache_archive_dest_path: Archive destination's path.

140 """	140 """

	141 logging.info('loading trace: %s', loading_trace_path)

141 trace = loading_trace.LoadingTrace.FromJsonFile(loading_trace_path)	142 trace = loading_trace.LoadingTrace.FromJsonFile(loading_trace_path)

142 with common_util.TemporaryDirectory(prefix='sandwich_tmp') as tmp_path:	143 with common_util.TemporaryDirectory(prefix='sandwich_tmp') as tmp_path:

143 cache_path = os.path.join(tmp_path, 'cache')	144 cache_path = os.path.join(tmp_path, 'cache')

144 chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_path)	145 chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_path)

145 cache_backend = chrome_cache.CacheBackend(cache_path, 'simple')	146 with chrome_cache.OnlineCacheBackend(

146 cache_entries = set(cache_backend.ListKeys())	147 cache_path, chrome_cache.CacheBackendType.Simple) as cache_backend:

147 logging.info('Original cache size: %d bytes' % cache_backend.GetSize())	148 cache_entries = set(cache_backend.ListKeys())

148 for request in _FilterOutDataAndIncompleteRequests(	149 logging.info('Original cache size: %d bytes' % cache_backend.GetSize())

149 trace.request_track.GetEvents()):	150 for request in _FilterOutDataAndIncompleteRequests(

150 # On requests having an upload data stream such as POST requests,	151 trace.request_track.GetEvents()):

151 # net::HttpCache::GenerateCacheKey() prefixes the cache entry's key with	152 # On requests having an upload data stream such as POST requests,

152 # the upload data stream's session unique identifier.	153 # net::HttpCache::GenerateCacheKey() prefixes the cache entry's key with

153 #	154 # the upload data stream's session unique identifier.

154 # It is fine to not patch these requests since when reopening Chrome,	155 #

155 # there is no way the entry can be reused since the upload data stream's	156 # It is fine to not patch these requests since when reopening Chrome,

156 # identifier will be different.	157 # there is no way the entry can be reused since the upload data stream's

157 #	158 # identifier will be different.

158 # The fact that these entries are kept in the cache after closing Chrome	159 #

159 # properly by closing the Chrome tab as the ChromeControler.SetSlowDeath()	160 # The fact that these entries are kept in the cache after closing Chrome

160 # do is known chrome bug (crbug.com/610725).	161 # properly by closing the Chrome tab as the

161 if request.url not in cache_entries:	162 # ChromeControler.SetSlowDeath() do is known chrome bug

162 continue	163 # (crbug.com/610725).

163 # Chrome prunes Set-Cookie from response headers before storing them in	164 if request.url not in cache_entries:

164 # disk cache. Also, it adds implicit "Vary: cookie" header to all redirect	165 continue

165 # response headers. Sandwich manages the cache, but between recording the	166 # Chrome prunes Set-Cookie from response headers before storing them in

166 # cache and benchmarking the cookie jar is invalidated. This leads to	167 # disk cache. Also, it adds implicit "Vary: cookie" header to all

167 # invalidation of all cacheable redirects.	168 # redirect response headers. Sandwich manages the cache, but between

168 raw_headers = request.GetRawResponseHeaders()	169 # recording the # cache and benchmarking the cookie jar is invalidated.

169 cache_backend.UpdateRawResponseHeaders(request.url, raw_headers)	170 # This leads to invalidation of all cacheable redirects.

170 # NoState-Prefetch would only fetch the resources, but not parse them.	171 raw_headers = request.GetRawResponseHeaders()

171 cache_backend.DeleteStreamForKey(request.url, 2)	172 try:

	173 cache_backend.UpdateRawResponseHeaders(request.url, raw_headers)

	174 # NoState-Prefetch would only fetch the resources, but not parse them.

	175 cache_backend.DeleteStreamForKey(request.url, 2)

	176 # Sync operations to actually catch errors here.

	177 cache_backend.Sync()

	178 except chrome_cache.CacheBackendError as error:

	179 # For some reason, sometimes the cachetool can't find key's entry when

	180 # not using the online mode.

	181 logging.warning('cachetool error: %s', repr(error))

	182 logging.info('Patched cache size: %d bytes' % cache_backend.GetSize())

172 chrome_cache.ZipDirectoryContent(cache_path, cache_archive_dest_path)	183 chrome_cache.ZipDirectoryContent(cache_path, cache_archive_dest_path)

173 logging.info('Patched cache size: %d bytes' % cache_backend.GetSize())

174	184

175	185

176 def _DiscoverRequests(dependencies_lens, subresource_discoverer):	186 def _DiscoverRequests(dependencies_lens, subresource_discoverer):

177 trace = dependencies_lens.loading_trace	187 trace = dependencies_lens.loading_trace

178 first_resource_request = trace.request_track.GetFirstResourceRequest()	188 first_resource_request = trace.request_track.GetFirstResourceRequest()

179	189

180 if subresource_discoverer == Discoverer.EmptyCache:	190 if subresource_discoverer == Discoverer.EmptyCache:

181 requests = []	191 requests = []

182 elif subresource_discoverer == Discoverer.FullCache:	192 elif subresource_discoverer == Discoverer.FullCache:

183 requests = dependencies_lens.loading_trace.request_track.GetEvents()	193 requests = dependencies_lens.loading_trace.request_track.GetEvents()

(...skipping 212 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
396 {URL of all requests: encoded_data_length},	406 {URL of all requests: encoded_data_length},

397 'effective_post_requests': [URLs of POST requests],	407 'effective_post_requests': [URLs of POST requests],

398 'expected_cached_resources': [URLs of resources expected to be cached],	408 'expected_cached_resources': [URLs of resources expected to be cached],

399 'successfully_cached': [URLs of cached sub-resources]	409 'successfully_cached': [URLs of cached sub-resources]

400 }	410 }

401 """	411 """

402 # TODO(gabadie): What's the best way of propagating errors happening in here?	412 # TODO(gabadie): What's the best way of propagating errors happening in here?

403 logging.info('lists cached urls from %s' % cache_archive_path)	413 logging.info('lists cached urls from %s' % cache_archive_path)

404 with common_util.TemporaryDirectory() as cache_directory:	414 with common_util.TemporaryDirectory() as cache_directory:

405 chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_directory)	415 chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_directory)

406 cache_keys = set(	416 cache_keys = set(chrome_cache.CacheBackend(

407 chrome_cache.CacheBackend(cache_directory, 'simple').ListKeys())	417 cache_directory, chrome_cache.CacheBackendType.Simple).ListKeys())

408 trace = loading_trace.LoadingTrace.FromJsonFile(cache_build_trace_path)	418 trace = loading_trace.LoadingTrace.FromJsonFile(cache_build_trace_path)

409 effective_requests = _ListUrlRequests(trace, _RequestOutcome.All)	419 effective_requests = _ListUrlRequests(trace, _RequestOutcome.All)

410 effective_post_requests = _ListUrlRequests(trace, _RequestOutcome.Post)	420 effective_post_requests = _ListUrlRequests(trace, _RequestOutcome.Post)

411 effective_encoded_data_lengths = {}	421 effective_encoded_data_lengths = {}

412 for request in _FilterOutDataAndIncompleteRequests(	422 for request in _FilterOutDataAndIncompleteRequests(

413 trace.request_track.GetEvents()):	423 trace.request_track.GetEvents()):

414 if request.from_disk_cache or request.served_from_cache:	424 if request.from_disk_cache or request.served_from_cache:

415 # At cache archive creation time, a request might be loaded several times,	425 # At cache archive creation time, a request might be loaded several times,

416 # but avoid the request.encoded_data_length == 0 if loaded from cache.	426 # but avoid the request.encoded_data_length == 0 if loaded from cache.

417 continue	427 continue

(...skipping 280 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
698 run_metrics_list = _ProcessRunOutputDir(	708 run_metrics_list = _ProcessRunOutputDir(

699 cache_validation_result, benchmark_setup, RunBenchmark.path)	709 cache_validation_result, benchmark_setup, RunBenchmark.path)

700 with open(ProcessRunOutputDir.path, 'w') as csv_file:	710 with open(ProcessRunOutputDir.path, 'w') as csv_file:

701 writer = csv.DictWriter(csv_file, fieldnames=(additional_column_names +	711 writer = csv.DictWriter(csv_file, fieldnames=(additional_column_names +

702 sandwich_metrics.COMMON_CSV_COLUMN_NAMES))	712 sandwich_metrics.COMMON_CSV_COLUMN_NAMES))

703 writer.writeheader()	713 writer.writeheader()

704 for trace_metrics in run_metrics_list:	714 for trace_metrics in run_metrics_list:

705 writer.writerow(trace_metrics)	715 writer.writerow(trace_metrics)

706	716

707 self._common_builder.default_final_tasks.append(ProcessRunOutputDir)	717 self._common_builder.default_final_tasks.append(ProcessRunOutputDir)

OLD	NEW

« tools/android/loading/common_util.py ('K') | « tools/android/loading/common_util.py ('k') | tools/android/loading/sandwich_swr.py » ('j') | no next file with comments »