| OLD | NEW |
| 1 # Copyright 2016 The Chromium Authors. All rights reserved. | 1 # Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 import logging | 5 import logging |
| 6 import json |
| 7 import os |
| 6 | 8 |
| 9 import chrome_cache |
| 10 import common_util |
| 7 from loading_trace import LoadingTrace | 11 from loading_trace import LoadingTrace |
| 8 from prefetch_view import PrefetchSimulationView | 12 from prefetch_view import PrefetchSimulationView |
| 9 from request_dependencies_lens import RequestDependencyLens | 13 from request_dependencies_lens import RequestDependencyLens |
| 10 from user_satisfied_lens import FirstContentfulPaintLens | |
| 11 import wpr_backend | 14 import wpr_backend |
| 12 | 15 |
| 13 | 16 |
| 14 # Prefetches the first resource following the redirection chain. | 17 # Prefetches the first resource following the redirection chain. |
| 15 REDIRECTED_MAIN_DISCOVERER = 'redirected-main' | 18 REDIRECTED_MAIN_DISCOVERER = 'redirected-main' |
| 16 | 19 |
| 17 # All resources which are fetched from the main document and their redirections. | 20 # All resources which are fetched from the main document and their redirections. |
| 18 PARSER_DISCOVERER = 'parser', | 21 PARSER_DISCOVERER = 'parser', |
| 19 | 22 |
| 20 # Simulation of HTMLPreloadScanner on the main document and their redirections. | 23 # Simulation of HTMLPreloadScanner on the main document and their redirections. |
| (...skipping 86 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 107 # this work-around. | 110 # this work-around. |
| 108 if not request.protocol: | 111 if not request.protocol: |
| 109 logging.warning('ignoring %s (no protocol)' % request.url) | 112 logging.warning('ignoring %s (no protocol)' % request.url) |
| 110 continue | 113 continue |
| 111 # Ignore data protocols. | 114 # Ignore data protocols. |
| 112 if not request.protocol.startswith('http'): | 115 if not request.protocol.startswith('http'): |
| 113 continue | 116 continue |
| 114 logging.info('white-listing %s' % request.url) | 117 logging.info('white-listing %s' % request.url) |
| 115 whitelisted_urls.add(request.url) | 118 whitelisted_urls.add(request.url) |
| 116 return whitelisted_urls | 119 return whitelisted_urls |
| 120 |
| 121 |
| 122 def CompareUrlSet(ref_url_set, url_set, url_set_name, debug_hint='Good luck!'): |
| 123 """Compare URL sets |
| 124 |
| 125 Args: |
| 126 ref_url_set: Set of reference urls. |
| 127 url_set: Set of urls to compare to the reference. |
| 128 url_set_name: The set name for logging purposes. |
| 129 debug_hint: A debug hint to help debugging in any case the sets are |
| 130 different. |
| 131 """ |
| 132 assert type(ref_url_set) == set |
| 133 assert type(url_set) == set |
| 134 if ref_url_set == url_set: |
| 135 logging.info(' %d %s are matching.' % (len(ref_url_set), url_set_name)) |
| 136 return |
| 137 logging.error(' %s are not matching.' % url_set_name) |
| 138 logging.error(' Hint: ' + debug_hint) |
| 139 logging.error(' List of missing resources:') |
| 140 for url in ref_url_set.difference(url_set): |
| 141 logging.error('- ' + url) |
| 142 logging.error(' List of unexpected resources:') |
| 143 for url in url_set.difference(ref_url_set): |
| 144 logging.error('+ ' + url) |
| 145 |
| 146 |
| 147 def _ListUrlRequests(trace, from_cache=None): |
| 148 urls = set() |
| 149 for request_event in trace.request_track.GetEvents(): |
| 150 if request_event.protocol == None: |
| 151 continue |
| 152 if not request_event.protocol.startswith('http'): |
| 153 continue |
| 154 if from_cache is not None and request_event.from_disk_cache != from_cache: |
| 155 continue |
| 156 urls.add(request_event.url) |
| 157 return urls |
| 158 |
| 159 |
| 160 def VerifyBenchmarkOutputDirectory(benchmark_setup_path, |
| 161 benchmark_output_directory_path): |
| 162 """Verifies that all run inside the run_output_directory worked as expected. |
| 163 |
| 164 Args: |
| 165 benchmark_setup_path: Path of the JSON of the benchmark setup. |
| 166 benchmark_output_directory_path: Path of the benchmark output directory to |
| 167 verify. |
| 168 """ |
| 169 benchmark_setup = json.load(open(benchmark_setup_path)) |
| 170 cache_whitelist = set(benchmark_setup['cache_whitelist']) |
| 171 url_resources = set(benchmark_setup['url_resources']) |
| 172 |
| 173 # Verify requests from traces. |
| 174 run_id = -1 |
| 175 while True: |
| 176 run_id += 1 |
| 177 run_path = os.path.join(benchmark_output_directory_path, str(run_id)) |
| 178 if not os.path.isdir(run_path): |
| 179 break |
| 180 trace_path = os.path.join(run_path, 'trace.json') |
| 181 if not os.path.isfile(trace_path): |
| 182 logging.error('missing trace %s' % trace_path) |
| 183 continue |
| 184 trace = LoadingTrace.FromJsonFile(trace_path) |
| 185 logging.info('verifying %s from %s' % (trace.url, trace_path)) |
| 186 CompareUrlSet(url_resources, _ListUrlRequests(trace), 'All resources', |
| 187 'You may have an issue with an AJAX requests.') |
| 188 CompareUrlSet(url_resources.intersection(cache_whitelist), |
| 189 _ListUrlRequests(trace, True), 'Cached resources', |
| 190 'The WPR archive patcher may have an invalidation issue.') |
| 191 CompareUrlSet(url_resources.difference(cache_whitelist), |
| 192 _ListUrlRequests(trace, False), 'Non cached resources') |
| 193 |
| 194 |
| 195 def ListResourcesUrls(benchmark_output_directory_path): |
| 196 """Lists all requested urls per navigated urls |
| 197 |
| 198 Args: |
| 199 benchmark_output_directory_path: Path of the benchmark output directory to |
| 200 verify. |
| 201 |
| 202 Returns: |
| 203 {url -> [urls of sub-resources]} |
| 204 """ |
| 205 url_subresources = {} |
| 206 run_id = -1 |
| 207 while True: |
| 208 run_id += 1 |
| 209 run_path = os.path.join(benchmark_output_directory_path, str(run_id)) |
| 210 if not os.path.isdir(run_path): |
| 211 break |
| 212 trace_path = os.path.join(run_path, 'trace.json') |
| 213 if not os.path.isfile(trace_path): |
| 214 continue |
| 215 trace = LoadingTrace.FromJsonFile(trace_path) |
| 216 if trace.url in url_subresources: |
| 217 continue |
| 218 logging.info('lists resources of %s from %s' % (trace.url, trace_path)) |
| 219 urls_set = set() |
| 220 for request_event in trace.request_track.GetEvents(): |
| 221 if not request_event.protocol.startswith('http'): |
| 222 continue |
| 223 if request_event.url not in urls_set: |
| 224 logging.info(' %s' % request_event.url) |
| 225 urls_set.add(request_event.url) |
| 226 url_subresources[trace.url] = [url for url in urls_set] |
| 227 return url_subresources |
| 228 |
| 229 |
| 230 def ValidateCacheArchiveContent(ref_urls, cache_archive_path): |
| 231 """Validates a cache archive content. |
| 232 |
| 233 Args: |
| 234 ref_urls: Reference list of urls. |
| 235 cache_archive_path: Cache archive's path to validate. |
| 236 """ |
| 237 logging.info('lists cached urls from %s' % cache_archive_path) |
| 238 with common_util.TemporaryDirectory() as cache_directory: |
| 239 chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_directory) |
| 240 cached_urls = \ |
| 241 chrome_cache.CacheBackend(cache_directory, 'simple').ListKeys() |
| 242 CompareUrlSet(set(ref_urls), set(cached_urls), 'cached resources', |
| 243 debug_hint='Looks like a response header needs to be patched.') |
| OLD | NEW |