| OLD | NEW |
| 1 # Copyright 2016 The Chromium Authors. All rights reserved. | 1 # Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 import logging | 5 import logging |
| 6 import json |
| 7 import os |
| 6 | 8 |
| 9 import chrome_cache |
| 10 import common_util |
| 7 from loading_trace import LoadingTrace | 11 from loading_trace import LoadingTrace |
| 8 from prefetch_view import PrefetchSimulationView | 12 from prefetch_view import PrefetchSimulationView |
| 9 from request_dependencies_lens import RequestDependencyLens | 13 from request_dependencies_lens import RequestDependencyLens |
| 10 from user_satisfied_lens import FirstContentfulPaintLens | 14 import sandwich_runner |
| 11 import wpr_backend | 15 import wpr_backend |
| 12 | 16 |
| 13 | 17 |
| 18 # Do not prefetch anything. |
| 19 EMPTY_CACHE_DISCOVERER = 'empty-cache' |
| 20 |
| 21 # Prefetches everything to load fully from cache (impossible in practice). |
| 22 FULL_CACHE_DISCOVERER = 'full-cache' |
| 23 |
| 14 # Prefetches the first resource following the redirection chain. | 24 # Prefetches the first resource following the redirection chain. |
| 15 REDIRECTED_MAIN_DISCOVERER = 'redirected-main' | 25 REDIRECTED_MAIN_DISCOVERER = 'redirected-main' |
| 16 | 26 |
| 17 # All resources which are fetched from the main document and their redirections. | 27 # All resources which are fetched from the main document and their redirections. |
| 18 PARSER_DISCOVERER = 'parser' | 28 PARSER_DISCOVERER = 'parser' |
| 19 | 29 |
| 20 # Simulation of HTMLPreloadScanner on the main document and their redirections. | 30 # Simulation of HTMLPreloadScanner on the main document and their redirections. |
| 21 HTML_PRELOAD_SCANNER_DISCOVERER = 'html-scanner' | 31 HTML_PRELOAD_SCANNER_DISCOVERER = 'html-scanner' |
| 22 | 32 |
| 23 SUBRESOURCE_DISCOVERERS = set([ | 33 SUBRESOURCE_DISCOVERERS = set([ |
| 34 EMPTY_CACHE_DISCOVERER, |
| 35 FULL_CACHE_DISCOVERER, |
| 24 REDIRECTED_MAIN_DISCOVERER, | 36 REDIRECTED_MAIN_DISCOVERER, |
| 25 PARSER_DISCOVERER, | 37 PARSER_DISCOVERER, |
| 26 HTML_PRELOAD_SCANNER_DISCOVERER | 38 HTML_PRELOAD_SCANNER_DISCOVERER |
| 27 ]) | 39 ]) |
| 28 | 40 |
| 29 | 41 |
| 30 def PatchWpr(wpr_archive_path): | 42 def PatchWpr(wpr_archive_path): |
| 31 """Patches a WPR archive to get all resources into the HTTP cache and avoid | 43 """Patches a WPR archive to get all resources into the HTTP cache and avoid |
| 32 invalidation and revalidations. | 44 invalidation and revalidations. |
| 33 | 45 |
| (...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 78 'unknown prefetch simulation {}'.format(subresource_discoverer) | 90 'unknown prefetch simulation {}'.format(subresource_discoverer) |
| 79 | 91 |
| 80 # Load trace and related infos. | 92 # Load trace and related infos. |
| 81 logging.info('loading %s' % loading_trace_path) | 93 logging.info('loading %s' % loading_trace_path) |
| 82 trace = LoadingTrace.FromJsonFile(loading_trace_path) | 94 trace = LoadingTrace.FromJsonFile(loading_trace_path) |
| 83 dependencies_lens = RequestDependencyLens(trace) | 95 dependencies_lens = RequestDependencyLens(trace) |
| 84 first_resource_request = trace.request_track.GetFirstResourceRequest() | 96 first_resource_request = trace.request_track.GetFirstResourceRequest() |
| 85 | 97 |
| 86 # Build the list of discovered requests according to the desired simulation. | 98 # Build the list of discovered requests according to the desired simulation. |
| 87 discovered_requests = [] | 99 discovered_requests = [] |
| 88 if subresource_discoverer == REDIRECTED_MAIN_DISCOVERER: | 100 if subresource_discoverer == EMPTY_CACHE_DISCOVERER: |
| 101 pass |
| 102 elif subresource_discoverer == FULL_CACHE_DISCOVERER: |
| 103 discovered_requests = trace.request_track.GetEvents() |
| 104 elif subresource_discoverer == REDIRECTED_MAIN_DISCOVERER: |
| 89 discovered_requests = \ | 105 discovered_requests = \ |
| 90 [dependencies_lens.GetRedirectChain(first_resource_request)[-1]] | 106 [dependencies_lens.GetRedirectChain(first_resource_request)[-1]] |
| 91 elif subresource_discoverer == PARSER_DISCOVERER: | 107 elif subresource_discoverer == PARSER_DISCOVERER: |
| 92 discovered_requests = PrefetchSimulationView.ParserDiscoverableRequests( | 108 discovered_requests = PrefetchSimulationView.ParserDiscoverableRequests( |
| 93 first_resource_request, dependencies_lens) | 109 first_resource_request, dependencies_lens) |
| 94 elif subresource_discoverer == HTML_PRELOAD_SCANNER_DISCOVERER: | 110 elif subresource_discoverer == HTML_PRELOAD_SCANNER_DISCOVERER: |
| 95 discovered_requests = PrefetchSimulationView.PreloadedRequests( | 111 discovered_requests = PrefetchSimulationView.PreloadedRequests( |
| 96 first_resource_request, dependencies_lens, trace) | 112 first_resource_request, dependencies_lens, trace) |
| 97 else: | 113 else: |
| 98 assert False | 114 assert False |
| 99 | 115 |
| 100 # Prune out data:// requests. | 116 # Prune out data:// requests. |
| 101 whitelisted_urls = set() | 117 whitelisted_urls = set() |
| 102 logging.info('white-listing %s' % first_resource_request.url) | 118 logging.info('white-listing %s' % first_resource_request.url) |
| 103 whitelisted_urls.add(first_resource_request.url) | |
| 104 for request in discovered_requests: | 119 for request in discovered_requests: |
| 105 # Work-around where the protocol may be none for an unclear reason yet. | 120 # Work-around where the protocol may be none for an unclear reason yet. |
| 106 # TODO(gabadie): Follow up on this with Clovis guys and possibly remove | 121 # TODO(gabadie): Follow up on this with Clovis guys and possibly remove |
| 107 # this work-around. | 122 # this work-around. |
| 108 if not request.protocol: | 123 if not request.protocol: |
| 109 logging.warning('ignoring %s (no protocol)' % request.url) | 124 logging.warning('ignoring %s (no protocol)' % request.url) |
| 110 continue | 125 continue |
| 111 # Ignore data protocols. | 126 # Ignore data protocols. |
| 112 if not request.protocol.startswith('http'): | 127 if not request.protocol.startswith('http'): |
| 113 continue | 128 continue |
| 114 logging.info('white-listing %s' % request.url) | 129 logging.info('white-listing %s' % request.url) |
| 115 whitelisted_urls.add(request.url) | 130 whitelisted_urls.add(request.url) |
| 116 return whitelisted_urls | 131 return whitelisted_urls |
| 132 |
| 133 |
| 134 def _PrintUrlSetComparison(ref_url_set, url_set, url_set_name): |
| 135 """Compare URL sets and log the diffs. |
| 136 |
| 137 Args: |
| 138 ref_url_set: Set of reference urls. |
| 139 url_set: Set of urls to compare to the reference. |
| 140 url_set_name: The set name for logging purposes. |
| 141 """ |
| 142 assert type(ref_url_set) == set |
| 143 assert type(url_set) == set |
| 144 if ref_url_set == url_set: |
| 145 logging.info(' %d %s are matching.' % (len(ref_url_set), url_set_name)) |
| 146 return |
| 147 logging.error(' %s are not matching.' % url_set_name) |
| 148 logging.error(' List of missing resources:') |
| 149 for url in ref_url_set.difference(url_set): |
| 150 logging.error('- ' + url) |
| 151 logging.error(' List of unexpected resources:') |
| 152 for url in url_set.difference(ref_url_set): |
| 153 logging.error('+ ' + url) |
| 154 |
| 155 |
| 156 class _RequestOutcome: |
| 157 All, ServedFromCache, NotServedFromCache = range(3) |
| 158 |
| 159 |
| 160 def _ListUrlRequests(trace, request_kind): |
| 161 """Lists requested URLs from a trace. |
| 162 |
| 163 Args: |
| 164 trace: (LoadingTrace) loading trace. |
| 165 request_kind: _RequestOutcome indicating the subset of requests to output. |
| 166 |
| 167 Returns: |
| 168 set([str]) |
| 169 """ |
| 170 urls = set() |
| 171 for request_event in trace.request_track.GetEvents(): |
| 172 if request_event.protocol == None: |
| 173 continue |
| 174 if request_event.protocol.startswith('data'): |
| 175 continue |
| 176 if request_event.protocol.startswith('http'): |
| 177 raise RuntimeError('Unknown protocol {}'.format(request_event.protocol)) |
| 178 if (request_kind == _RequestOutcome.ServedFromCache and |
| 179 request_event.from_disk_cache): |
| 180 urls.add(request_event.url) |
| 181 elif (request_kind == _RequestOutcome.NotServedFromCache and |
| 182 not request_event.from_disk_cache): |
| 183 urls.add(request_event.url) |
| 184 elif request_kind == _RequestOutcome.All: |
| 185 urls.add(request_event.url) |
| 186 return urls |
| 187 |
| 188 |
| 189 def VerifyBenchmarkOutputDirectory(benchmark_setup_path, |
| 190 benchmark_output_directory_path): |
| 191 """Verifies that all run inside the run_output_directory worked as expected. |
| 192 |
| 193 Args: |
| 194 benchmark_setup_path: Path of the JSON of the benchmark setup. |
| 195 benchmark_output_directory_path: Path of the benchmark output directory to |
| 196 verify. |
| 197 """ |
| 198 # TODO(gabadie): What's the best way of propagating errors happening in here? |
| 199 benchmark_setup = json.load(open(benchmark_setup_path)) |
| 200 cache_whitelist = set(benchmark_setup['cache_whitelist']) |
| 201 url_resources = set(benchmark_setup['url_resources']) |
| 202 |
| 203 # Verify requests from traces. |
| 204 run_id = -1 |
| 205 while True: |
| 206 run_id += 1 |
| 207 run_path = os.path.join(benchmark_output_directory_path, str(run_id)) |
| 208 if not os.path.isdir(run_path): |
| 209 break |
| 210 trace_path = os.path.join(run_path, sandwich_runner.TRACE_FILENAME) |
| 211 if not os.path.isfile(trace_path): |
| 212 logging.error('missing trace %s' % trace_path) |
| 213 continue |
| 214 trace = LoadingTrace.FromJsonFile(trace_path) |
| 215 logging.info('verifying %s from %s' % (trace.url, trace_path)) |
| 216 _PrintUrlSetComparison(url_resources, |
| 217 _ListUrlRequests(trace, _RequestOutcome.All), 'All resources') |
| 218 _PrintUrlSetComparison(url_resources.intersection(cache_whitelist), |
| 219 _ListUrlRequests(trace, _RequestOutcome.ServedFromCache), |
| 220 'Cached resources') |
| 221 _PrintUrlSetComparison(url_resources.difference(cache_whitelist), |
| 222 _ListUrlRequests(trace, _RequestOutcome.NotServedFromCache), |
| 223 'Non cached resources') |
| 224 |
| 225 |
| 226 def ReadSubresourceMapFromBenchmarkOutput(benchmark_output_directory_path): |
| 227 """Extracts a map URL-to-subresources for each navigation in benchmark |
| 228 directory. |
| 229 |
| 230 Args: |
| 231 benchmark_output_directory_path: Path of the benchmark output directory to |
| 232 verify. |
| 233 |
| 234 Returns: |
| 235 {url -> [URLs of sub-resources]} |
| 236 """ |
| 237 url_subresources = {} |
| 238 run_id = -1 |
| 239 while True: |
| 240 run_id += 1 |
| 241 run_path = os.path.join(benchmark_output_directory_path, str(run_id)) |
| 242 if not os.path.isdir(run_path): |
| 243 break |
| 244 trace_path = os.path.join(run_path, sandwich_runner.TRACE_FILENAME) |
| 245 if not os.path.isfile(trace_path): |
| 246 continue |
| 247 trace = LoadingTrace.FromJsonFile(trace_path) |
| 248 if trace.url in url_subresources: |
| 249 continue |
| 250 logging.info('lists resources of %s from %s' % (trace.url, trace_path)) |
| 251 urls_set = set() |
| 252 for request_event in trace.request_track.GetEvents(): |
| 253 if not request_event.protocol.startswith('http'): |
| 254 continue |
| 255 if request_event.url not in urls_set: |
| 256 logging.info(' %s' % request_event.url) |
| 257 urls_set.add(request_event.url) |
| 258 url_subresources[trace.url] = [url for url in urls_set] |
| 259 return url_subresources |
| 260 |
| 261 |
| 262 def ValidateCacheArchiveContent(ref_urls, cache_archive_path): |
| 263 """Validates a cache archive content. |
| 264 |
| 265 Args: |
| 266 ref_urls: Reference list of urls. |
| 267 cache_archive_path: Cache archive's path to validate. |
| 268 """ |
| 269 # TODO(gabadie): What's the best way of propagating errors happening in here? |
| 270 logging.info('lists cached urls from %s' % cache_archive_path) |
| 271 with common_util.TemporaryDirectory() as cache_directory: |
| 272 chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_directory) |
| 273 cached_urls = \ |
| 274 chrome_cache.CacheBackend(cache_directory, 'simple').ListKeys() |
| 275 _PrintUrlSetComparison(set(ref_urls), set(cached_urls), 'cached resources') |
| OLD | NEW |