Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 # Copyright 2016 The Chromium Authors. All rights reserved. | 1 # Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 import logging | 5 import logging |
| 6 import json | |
| 7 import os | |
| 6 | 8 |
| 9 import chrome_cache | |
| 10 import common_util | |
| 7 from loading_trace import LoadingTrace | 11 from loading_trace import LoadingTrace |
| 8 from prefetch_view import PrefetchSimulationView | 12 from prefetch_view import PrefetchSimulationView |
| 9 from request_dependencies_lens import RequestDependencyLens | 13 from request_dependencies_lens import RequestDependencyLens |
| 10 from user_satisfied_lens import FirstContentfulPaintLens | 14 import sandwich_runner |
| 11 import wpr_backend | 15 import wpr_backend |
| 12 | 16 |
| 13 | 17 |
| 18 # Do not prefetch anything. | |
| 19 EMPTY_CACHE_DISCOVERER = 'empty-cache' | |
| 20 | |
| 21 # Prefetches everything to load fully from cache (impossible in practice). | |
| 22 FULL_CACHE_DISCOVERER = 'full-cache' | |
| 23 | |
| 14 # Prefetches the first resource following the redirection chain. | 24 # Prefetches the first resource following the redirection chain. |
| 15 REDIRECTED_MAIN_DISCOVERER = 'redirected-main' | 25 REDIRECTED_MAIN_DISCOVERER = 'redirected-main' |
| 16 | 26 |
| 17 # All resources which are fetched from the main document and their redirections. | 27 # All resources which are fetched from the main document and their redirections. |
| 18 PARSER_DISCOVERER = 'parser' | 28 PARSER_DISCOVERER = 'parser' |
| 19 | 29 |
| 20 # Simulation of HTMLPreloadScanner on the main document and their redirections. | 30 # Simulation of HTMLPreloadScanner on the main document and their redirections. |
| 21 HTML_PRELOAD_SCANNER_DISCOVERER = 'html-scanner' | 31 HTML_PRELOAD_SCANNER_DISCOVERER = 'html-scanner' |
| 22 | 32 |
| 23 SUBRESOURCE_DISCOVERERS = set([ | 33 SUBRESOURCE_DISCOVERERS = set([ |
| 34 EMPTY_CACHE_DISCOVERER, | |
| 35 FULL_CACHE_DISCOVERER, | |
| 24 REDIRECTED_MAIN_DISCOVERER, | 36 REDIRECTED_MAIN_DISCOVERER, |
| 25 PARSER_DISCOVERER, | 37 PARSER_DISCOVERER, |
| 26 HTML_PRELOAD_SCANNER_DISCOVERER | 38 HTML_PRELOAD_SCANNER_DISCOVERER |
| 27 ]) | 39 ]) |
| 28 | 40 |
| 29 | 41 |
| 30 def PatchWpr(wpr_archive_path): | 42 def PatchWpr(wpr_archive_path): |
| 31 """Patches a WPR archive to get all resources into the HTTP cache and avoid | 43 """Patches a WPR archive to get all resources into the HTTP cache and avoid |
| 32 invalidation and revalidations. | 44 invalidation and revalidations. |
| 33 | 45 |
| (...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 78 'unknown prefetch simulation {}'.format(subresource_discoverer) | 90 'unknown prefetch simulation {}'.format(subresource_discoverer) |
| 79 | 91 |
| 80 # Load trace and related infos. | 92 # Load trace and related infos. |
| 81 logging.info('loading %s' % loading_trace_path) | 93 logging.info('loading %s' % loading_trace_path) |
| 82 trace = LoadingTrace.FromJsonFile(loading_trace_path) | 94 trace = LoadingTrace.FromJsonFile(loading_trace_path) |
| 83 dependencies_lens = RequestDependencyLens(trace) | 95 dependencies_lens = RequestDependencyLens(trace) |
| 84 first_resource_request = trace.request_track.GetFirstResourceRequest() | 96 first_resource_request = trace.request_track.GetFirstResourceRequest() |
| 85 | 97 |
| 86 # Build the list of discovered requests according to the desired simulation. | 98 # Build the list of discovered requests according to the desired simulation. |
| 87 discovered_requests = [] | 99 discovered_requests = [] |
| 88 if subresource_discoverer == REDIRECTED_MAIN_DISCOVERER: | 100 if subresource_discoverer == EMPTY_CACHE_DISCOVERER: |
| 101 pass | |
| 102 elif subresource_discoverer == FULL_CACHE_DISCOVERER: | |
| 103 discovered_requests = trace.request_track.GetEvents() | |
| 104 elif subresource_discoverer == REDIRECTED_MAIN_DISCOVERER: | |
| 89 discovered_requests = \ | 105 discovered_requests = \ |
| 90 [dependencies_lens.GetRedirectChain(first_resource_request)[-1]] | 106 [dependencies_lens.GetRedirectChain(first_resource_request)[-1]] |
| 91 elif subresource_discoverer == PARSER_DISCOVERER: | 107 elif subresource_discoverer == PARSER_DISCOVERER: |
| 92 discovered_requests = PrefetchSimulationView.ParserDiscoverableRequests( | 108 discovered_requests = PrefetchSimulationView.ParserDiscoverableRequests( |
| 93 first_resource_request, dependencies_lens) | 109 first_resource_request, dependencies_lens) |
| 94 elif subresource_discoverer == HTML_PRELOAD_SCANNER_DISCOVERER: | 110 elif subresource_discoverer == HTML_PRELOAD_SCANNER_DISCOVERER: |
| 95 discovered_requests = PrefetchSimulationView.PreloadedRequests( | 111 discovered_requests = PrefetchSimulationView.PreloadedRequests( |
| 96 first_resource_request, dependencies_lens, trace) | 112 first_resource_request, dependencies_lens, trace) |
| 97 else: | 113 else: |
| 98 assert False | 114 assert False |
| 99 | 115 |
| 100 # Prune out data:// requests. | 116 # Prune out data:// requests. |
| 101 whitelisted_urls = set() | 117 whitelisted_urls = set() |
| 102 logging.info('white-listing %s' % first_resource_request.url) | 118 logging.info('white-listing %s' % first_resource_request.url) |
| 103 whitelisted_urls.add(first_resource_request.url) | |
| 104 for request in discovered_requests: | 119 for request in discovered_requests: |
| 105 # Work-around where the protocol may be none for an unclear reason yet. | 120 # Work-around where the protocol may be none for an unclear reason yet. |
| 106 # TODO(gabadie): Follow up on this with Clovis guys and possibly remove | 121 # TODO(gabadie): Follow up on this with Clovis guys and possibly remove |
| 107 # this work-around. | 122 # this work-around. |
| 108 if not request.protocol: | 123 if not request.protocol: |
| 109 logging.warning('ignoring %s (no protocol)' % request.url) | 124 logging.warning('ignoring %s (no protocol)' % request.url) |
| 110 continue | 125 continue |
| 111 # Ignore data protocols. | 126 # Ignore data protocols. |
| 112 if not request.protocol.startswith('http'): | 127 if not request.protocol.startswith('http'): |
| 113 continue | 128 continue |
| 114 logging.info('white-listing %s' % request.url) | 129 logging.info('white-listing %s' % request.url) |
| 115 whitelisted_urls.add(request.url) | 130 whitelisted_urls.add(request.url) |
| 116 return whitelisted_urls | 131 return whitelisted_urls |
| 132 | |
| 133 | |
| 134 def _PrintUrlSetComparison(ref_url_set, url_set, url_set_name): | |
| 135 """Compare URL sets and log the diffs. | |
| 136 | |
| 137 Args: | |
| 138 ref_url_set: Set of reference urls. | |
| 139 url_set: Set of urls to compare to the reference. | |
| 140 url_set_name: The set name for logging purposes. | |
| 141 """ | |
| 142 assert type(ref_url_set) == set | |
| 143 assert type(url_set) == set | |
| 144 if ref_url_set == url_set: | |
| 145 logging.info(' %d %s are matching.' % (len(ref_url_set), url_set_name)) | |
| 146 return | |
| 147 logging.error(' %s are not matching.' % url_set_name) | |
| 148 logging.error(' List of missing resources:') | |
| 149 for url in ref_url_set.difference(url_set): | |
| 150 logging.error('- ' + url) | |
| 151 logging.error(' List of unexpected resources:') | |
| 152 for url in url_set.difference(ref_url_set): | |
| 153 logging.error('+ ' + url) | |
| 154 | |
| 155 | |
| 156 class _RequestOutcome: | |
| 157 All, ServedFromCache, NotServedFromCache = range(3) | |
| 158 | |
| 159 | |
| 160 def _ListUrlRequests(trace, request_kind): | |
| 161 """Lists requested URLs from a trace. | |
| 162 | |
| 163 Args: | |
| 164 trace: The trace. | |
|
pasko
2016/04/25 13:29:06
trace: (LoadingTrace) loading trace.
gabadie
2016/04/27 08:32:16
Done.
| |
| 165 request_kind: _RequestOutcome indicating the subset of requests to output. | |
| 166 | |
| 167 Returns: | |
| 168 set([str]) | |
| 169 """ | |
| 170 urls = set() | |
| 171 for request_event in trace.request_track.GetEvents(): | |
| 172 if request_event.protocol == None: | |
| 173 continue | |
| 174 if request_event.protocol.startswith('data'): | |
| 175 continue | |
| 176 assert request_event.protocol.startswith('http') | |
|
pasko
2016/04/25 13:29:06
I can remember you preferring to avoid asserts on
gabadie
2016/04/27 08:32:16
Yes but here it is not an user input related check
pasko
2016/04/27 08:50:58
I disagree, it depends on what a user puts into th
| |
| 177 if (request_kind == _RequestOutcome.ServedFromCache and | |
| 178 request_event.from_disk_cache): | |
| 179 urls.add(request_event.url) | |
| 180 elif (request_kind == _RequestOutcome.NotServedFromCache and | |
| 181 not request_event.from_disk_cache): | |
| 182 urls.add(request_event.url) | |
| 183 elif request_kind == _RequestOutcome.All: | |
| 184 urls.add(request_event.url) | |
| 185 return urls | |
| 186 | |
| 187 | |
| 188 def VerifyBenchmarkOutputDirectory(benchmark_setup_path, | |
| 189 benchmark_output_directory_path): | |
| 190 """Verifies that all run inside the run_output_directory worked as expected. | |
| 191 | |
| 192 Args: | |
| 193 benchmark_setup_path: Path of the JSON of the benchmark setup. | |
| 194 benchmark_output_directory_path: Path of the benchmark output directory to | |
| 195 verify. | |
| 196 """ | |
| 197 # TODO(gabadie): What's the best way of propagating errors happening in here? | |
| 198 benchmark_setup = json.load(open(benchmark_setup_path)) | |
| 199 cache_whitelist = set(benchmark_setup['cache_whitelist']) | |
| 200 url_resources = set(benchmark_setup['url_resources']) | |
| 201 | |
| 202 # Verify requests from traces. | |
| 203 run_id = -1 | |
| 204 while True: | |
| 205 run_id += 1 | |
| 206 run_path = os.path.join(benchmark_output_directory_path, str(run_id)) | |
| 207 if not os.path.isdir(run_path): | |
| 208 break | |
| 209 trace_path = os.path.join(run_path, sandwich_runner.TRACE_FILENAME) | |
| 210 if not os.path.isfile(trace_path): | |
| 211 logging.error('missing trace %s' % trace_path) | |
| 212 continue | |
| 213 trace = LoadingTrace.FromJsonFile(trace_path) | |
| 214 logging.info('verifying %s from %s' % (trace.url, trace_path)) | |
| 215 _PrintUrlSetComparison(url_resources, | |
| 216 _ListUrlRequests(trace, _RequestOutcome.All), 'All resources') | |
| 217 _PrintUrlSetComparison(url_resources.intersection(cache_whitelist), | |
| 218 _ListUrlRequests(trace, _RequestOutcome.ServedFromCache), | |
| 219 'Cached resources') | |
| 220 _PrintUrlSetComparison(url_resources.difference(cache_whitelist), | |
| 221 _ListUrlRequests(trace, _RequestOutcome.NotServedFromCache), | |
| 222 'Non cached resources') | |
| 223 | |
| 224 | |
| 225 def ReadSubresourceMapFromBenchmarkOutput(benchmark_output_directory_path): | |
| 226 """Extracts a map URL-to-subresources for each navigation in benchmark | |
| 227 directory. | |
| 228 | |
| 229 Args: | |
| 230 benchmark_output_directory_path: Path of the benchmark output directory to | |
| 231 verify. | |
| 232 | |
| 233 Returns: | |
| 234 {url -> [URLs of sub-resources]} | |
| 235 """ | |
| 236 url_subresources = {} | |
| 237 run_id = -1 | |
| 238 while True: | |
| 239 run_id += 1 | |
| 240 run_path = os.path.join(benchmark_output_directory_path, str(run_id)) | |
| 241 if not os.path.isdir(run_path): | |
| 242 break | |
| 243 trace_path = os.path.join(run_path, sandwich_runner.TRACE_FILENAME) | |
| 244 if not os.path.isfile(trace_path): | |
| 245 continue | |
| 246 trace = LoadingTrace.FromJsonFile(trace_path) | |
| 247 if trace.url in url_subresources: | |
| 248 continue | |
| 249 logging.info('lists resources of %s from %s' % (trace.url, trace_path)) | |
| 250 urls_set = set() | |
| 251 for request_event in trace.request_track.GetEvents(): | |
| 252 if not request_event.protocol.startswith('http'): | |
| 253 continue | |
| 254 if request_event.url not in urls_set: | |
| 255 logging.info(' %s' % request_event.url) | |
| 256 urls_set.add(request_event.url) | |
| 257 url_subresources[trace.url] = [url for url in urls_set] | |
| 258 return url_subresources | |
| 259 | |
| 260 | |
| 261 def ValidateCacheArchiveContent(ref_urls, cache_archive_path): | |
| 262 """Validates a cache archive content. | |
| 263 | |
| 264 Args: | |
| 265 ref_urls: Reference list of urls. | |
| 266 cache_archive_path: Cache archive's path to validate. | |
| 267 """ | |
| 268 # TODO(gabadie): What's the best way of propagating errors happening in here? | |
| 269 logging.info('lists cached urls from %s' % cache_archive_path) | |
| 270 with common_util.TemporaryDirectory() as cache_directory: | |
| 271 chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_directory) | |
| 272 cached_urls = \ | |
| 273 chrome_cache.CacheBackend(cache_directory, 'simple').ListKeys() | |
| 274 _PrintUrlSetComparison(set(ref_urls), set(cached_urls), 'cached resources') | |
| OLD | NEW |