| OLD | NEW |
| 1 # Copyright 2016 The Chromium Authors. All rights reserved. | 1 # Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 """ |
| 6 Implements a task builder for benchmarking effects of NoState Prefetch. |
| 7 Noticeable steps of the task pipeline: |
| 8 * Save a WPR archive |
| 9 * Process the WPR archive to make all resources cacheable |
| 10 * Process cache archive to patch response headers back to their original |
| 11 values. |
| 12 * Find out which resources are discoverable by NoState Prefetch |
| 13 (HTMLPreloadScanner) |
| 14 * Load pages with empty/full/prefetched cache |
| 15 * Extract most important metrics to a CSV |
| 16 """ |
| 17 |
| 5 import csv | 18 import csv |
| 6 import logging | 19 import logging |
| 7 import json | 20 import json |
| 8 import os | 21 import os |
| 9 import re | 22 import re |
| 10 import shutil | 23 import shutil |
| 11 from urlparse import urlparse | 24 from urlparse import urlparse |
| 12 | 25 |
| 13 import chrome_cache | 26 import chrome_cache |
| 14 import common_util | 27 import common_util |
| (...skipping 176 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 191 elif subresource_discoverer == PARSER_DISCOVERER: | 204 elif subresource_discoverer == PARSER_DISCOVERER: |
| 192 discovered_requests = PrefetchSimulationView.ParserDiscoverableRequests( | 205 discovered_requests = PrefetchSimulationView.ParserDiscoverableRequests( |
| 193 first_resource_request, dependencies_lens) | 206 first_resource_request, dependencies_lens) |
| 194 elif subresource_discoverer == HTML_PRELOAD_SCANNER_DISCOVERER: | 207 elif subresource_discoverer == HTML_PRELOAD_SCANNER_DISCOVERER: |
| 195 discovered_requests = PrefetchSimulationView.PreloadedRequests( | 208 discovered_requests = PrefetchSimulationView.PreloadedRequests( |
| 196 first_resource_request, dependencies_lens, trace) | 209 first_resource_request, dependencies_lens, trace) |
| 197 else: | 210 else: |
| 198 assert False | 211 assert False |
| 199 | 212 |
| 200 whitelisted_urls = set() | 213 whitelisted_urls = set() |
| 201 logging.info('white-listing %s' % first_resource_request.url) | |
| 202 for request in _FilterOutDataAndIncompleteRequests(discovered_requests): | 214 for request in _FilterOutDataAndIncompleteRequests(discovered_requests): |
| 203 logging.info('white-listing %s' % request.url) | 215 logging.debug('white-listing %s', request.url) |
| 204 whitelisted_urls.add(request.url) | 216 whitelisted_urls.add(request.url) |
| 217 logging.info('number of white-listed resources: %d', len(whitelisted_urls)) |
| 205 return whitelisted_urls | 218 return whitelisted_urls |
| 206 | 219 |
| 207 | 220 |
| 208 def _PrintUrlSetComparison(ref_url_set, url_set, url_set_name): | 221 def _PrintUrlSetComparison(ref_url_set, url_set, url_set_name): |
| 209 """Compare URL sets and log the diffs. | 222 """Compare URL sets and log the diffs. |
| 210 | 223 |
| 211 Args: | 224 Args: |
| 212 ref_url_set: Set of reference urls. | 225 ref_url_set: Set of reference urls. |
| 213 url_set: Set of urls to compare to the reference. | 226 url_set: Set of urls to compare to the reference. |
| 214 url_set_name: The set name for logging purposes. | 227 url_set_name: The set name for logging purposes. |
| (...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 254 request_event.method.upper().strip() == 'POST'): | 267 request_event.method.upper().strip() == 'POST'): |
| 255 urls.add(request_event.url) | 268 urls.add(request_event.url) |
| 256 elif (request_kind == _RequestOutcome.NotServedFromCache and | 269 elif (request_kind == _RequestOutcome.NotServedFromCache and |
| 257 not request_event.from_disk_cache): | 270 not request_event.from_disk_cache): |
| 258 urls.add(request_event.url) | 271 urls.add(request_event.url) |
| 259 elif request_kind == _RequestOutcome.All: | 272 elif request_kind == _RequestOutcome.All: |
| 260 urls.add(request_event.url) | 273 urls.add(request_event.url) |
| 261 return urls | 274 return urls |
| 262 | 275 |
| 263 | 276 |
| 264 def _VerifyBenchmarkOutputDirectory(benchmark_setup_path, | 277 class _RunOutputVerifier(object): |
| 265 benchmark_output_directory_path): | 278 """Object to verify benchmark run from traces and WPR log stored in the |
| 266 """Verifies that all run inside the run_output_directory worked as expected. | 279 runner output directory. |
| 280 """ |
| 267 | 281 |
| 268 Args: | 282 def __init__(self, cache_validation_result, benchmark_setup): |
| 269 benchmark_setup_path: Path of the JSON of the benchmark setup. | 283 """Constructor. |
| 270 benchmark_output_directory_path: Path of the benchmark output directory to | |
| 271 verify. | |
| 272 """ | |
| 273 # TODO(gabadie): What's the best way of propagating errors happening in here? | |
| 274 benchmark_setup = json.load(open(benchmark_setup_path)) | |
| 275 cache_whitelist = set(benchmark_setup['cache_whitelist']) | |
| 276 original_requests = set(benchmark_setup['url_resources']) | |
| 277 original_cached_requests = original_requests.intersection(cache_whitelist) | |
| 278 original_uncached_requests = original_requests.difference(cache_whitelist) | |
| 279 all_sent_url_requests = set() | |
| 280 | 284 |
| 281 # Verify requests from traces. | 285 Args: |
| 282 run_id = -1 | 286 cache_validation_result: JSON of the cache validation task. |
| 283 while True: | 287 benchmark_setup: JSON of the benchmark setup. |
| 284 run_id += 1 | 288 """ |
| 285 run_path = os.path.join(benchmark_output_directory_path, str(run_id)) | 289 self._cache_whitelist = set(benchmark_setup['cache_whitelist']) |
| 286 if not os.path.isdir(run_path): | 290 self._original_requests = set(cache_validation_result['effective_requests']) |
| 287 break | 291 self._original_post_requests = set( |
| 288 trace_path = os.path.join(run_path, sandwich_runner.TRACE_FILENAME) | 292 cache_validation_result['effective_post_requests']) |
| 289 if not os.path.isfile(trace_path): | 293 self._original_cached_requests = self._original_requests.intersection( |
| 290 logging.error('missing trace %s' % trace_path) | 294 self._cache_whitelist) |
| 291 continue | 295 self._original_uncached_requests = self._original_requests.difference( |
| 292 trace = loading_trace.LoadingTrace.FromJsonFile(trace_path) | 296 self._cache_whitelist) |
| 293 logging.info('verifying %s from %s' % (trace.url, trace_path)) | 297 self._all_sent_url_requests = set() |
| 294 | 298 |
| 299 def VerifyTrace(self, trace): |
| 300 """Verifies a trace with the cache validation result and the benchmark |
| 301 setup. |
| 302 """ |
| 295 effective_requests = _ListUrlRequests(trace, _RequestOutcome.All) | 303 effective_requests = _ListUrlRequests(trace, _RequestOutcome.All) |
| 296 effective_post_requests = _ListUrlRequests(trace, _RequestOutcome.Post) | 304 effective_post_requests = _ListUrlRequests(trace, _RequestOutcome.Post) |
| 297 effective_cached_requests = \ | 305 effective_cached_requests = \ |
| 298 _ListUrlRequests(trace, _RequestOutcome.ServedFromCache) | 306 _ListUrlRequests(trace, _RequestOutcome.ServedFromCache) |
| 299 effective_uncached_requests = \ | 307 effective_uncached_requests = \ |
| 300 _ListUrlRequests(trace, _RequestOutcome.NotServedFromCache) | 308 _ListUrlRequests(trace, _RequestOutcome.NotServedFromCache) |
| 301 | 309 |
| 302 missing_requests = original_requests.difference(effective_requests) | 310 missing_requests = self._original_requests.difference(effective_requests) |
| 303 unexpected_requests = effective_requests.difference(original_requests) | 311 unexpected_requests = effective_requests.difference(self._original_requests) |
| 304 expected_cached_requests = \ | 312 expected_cached_requests = \ |
| 305 original_cached_requests.difference(missing_requests) | 313 self._original_cached_requests.difference(missing_requests) |
| 306 missing_cached_requests = \ | 314 expected_uncached_requests = self._original_uncached_requests.union( |
| 307 expected_cached_requests.difference(effective_cached_requests) | 315 unexpected_requests).difference(missing_requests) |
| 308 expected_uncached_requests = original_uncached_requests.union( | |
| 309 unexpected_requests).union(missing_cached_requests) | |
| 310 all_sent_url_requests.update(effective_uncached_requests) | |
| 311 | 316 |
| 312 # POST requests are known to be unable to use the cache. | 317 # POST requests are known to be unable to use the cache. |
| 313 expected_cached_requests.difference_update(effective_post_requests) | 318 expected_cached_requests.difference_update(effective_post_requests) |
| 314 expected_uncached_requests.update(effective_post_requests) | 319 expected_uncached_requests.update(effective_post_requests) |
| 315 | 320 |
| 316 _PrintUrlSetComparison(original_requests, effective_requests, | 321 _PrintUrlSetComparison(self._original_requests, effective_requests, |
| 317 'All resources') | 322 'All resources') |
| 318 _PrintUrlSetComparison(set(), effective_post_requests, | 323 _PrintUrlSetComparison(set(), effective_post_requests, 'POST resources') |
| 319 'POST resources') | |
| 320 _PrintUrlSetComparison(expected_cached_requests, effective_cached_requests, | 324 _PrintUrlSetComparison(expected_cached_requests, effective_cached_requests, |
| 321 'Cached resources') | 325 'Cached resources') |
| 322 _PrintUrlSetComparison(expected_uncached_requests, | 326 _PrintUrlSetComparison(expected_uncached_requests, |
| 323 effective_uncached_requests, 'Non cached resources') | 327 effective_uncached_requests, 'Non cached resources') |
| 324 | 328 |
| 325 # Verify requests from WPR. | 329 self._all_sent_url_requests.update(effective_uncached_requests) |
| 326 wpr_log_path = os.path.join( | |
| 327 benchmark_output_directory_path, sandwich_runner.WPR_LOG_FILENAME) | |
| 328 logging.info('verifying requests from %s' % wpr_log_path) | |
| 329 all_wpr_requests = wpr_backend.ExtractRequestsFromLog(wpr_log_path) | |
| 330 all_wpr_urls = set() | |
| 331 unserved_wpr_urls = set() | |
| 332 wpr_command_colliding_urls = set() | |
| 333 | 330 |
| 334 for request in all_wpr_requests: | 331 def VerifyWprLog(self, wpr_log_path): |
| 335 if request.is_wpr_host: | 332 """Verifies WPR log with previously verified traces.""" |
| 336 continue | 333 all_wpr_requests = wpr_backend.ExtractRequestsFromLog(wpr_log_path) |
| 337 if urlparse(request.url).path.startswith('/web-page-replay'): | 334 all_wpr_urls = set() |
| 338 wpr_command_colliding_urls.add(request.url) | 335 unserved_wpr_urls = set() |
| 339 elif request.is_served is False: | 336 wpr_command_colliding_urls = set() |
| 340 unserved_wpr_urls.add(request.url) | |
| 341 all_wpr_urls.add(request.url) | |
| 342 | 337 |
| 343 _PrintUrlSetComparison(set(), unserved_wpr_urls, | 338 for request in all_wpr_requests: |
| 344 'Distinct unserved resources from WPR') | 339 if request.is_wpr_host: |
| 345 _PrintUrlSetComparison(set(), wpr_command_colliding_urls, | 340 continue |
| 346 'Distinct resources colliding to WPR commands') | 341 if urlparse(request.url).path.startswith('/web-page-replay'): |
| 347 _PrintUrlSetComparison(all_wpr_urls, all_sent_url_requests, | 342 wpr_command_colliding_urls.add(request.url) |
| 348 'Distinct resource requests to WPR') | 343 elif request.is_served is False: |
| 344 unserved_wpr_urls.add(request.url) |
| 345 all_wpr_urls.add(request.url) |
| 349 | 346 |
| 350 | 347 _PrintUrlSetComparison(set(), unserved_wpr_urls, |
| 351 def _ReadSubresourceFromRunnerOutputDir(runner_output_dir): | 348 'Distinct unserved resources from WPR') |
| 352 """Extracts a list of subresources in runner output directory. | 349 _PrintUrlSetComparison(set(), wpr_command_colliding_urls, |
| 353 | 350 'Distinct resources colliding to WPR commands') |
| 354 Args: | 351 _PrintUrlSetComparison(all_wpr_urls, self._all_sent_url_requests, |
| 355 runner_output_dir: Path of the runner's output directory. | 352 'Distinct resource requests to WPR') |
| 356 | |
| 357 Returns: | |
| 358 [URLs of sub-resources] | |
| 359 """ | |
| 360 trace_path = os.path.join( | |
| 361 runner_output_dir, '0', sandwich_runner.TRACE_FILENAME) | |
| 362 trace = loading_trace.LoadingTrace.FromJsonFile(trace_path) | |
| 363 url_set = set() | |
| 364 for request_event in _FilterOutDataAndIncompleteRequests( | |
| 365 trace.request_track.GetEvents()): | |
| 366 url_set.add(request_event.url) | |
| 367 logging.info('lists %s resources of %s from %s' % \ | |
| 368 (len(url_set), trace.url, trace_path)) | |
| 369 return [url for url in url_set] | |
| 370 | 353 |
| 371 | 354 |
| 372 def _ValidateCacheArchiveContent(cache_build_trace_path, cache_archive_path): | 355 def _ValidateCacheArchiveContent(cache_build_trace_path, cache_archive_path): |
| 373 """Validates a cache archive content. | 356 """Validates a cache archive content. |
| 374 | 357 |
| 375 Args: | 358 Args: |
| 376 cache_build_trace_path: Path of the generated trace at the cache build time. | 359 cache_build_trace_path: Path of the generated trace at the cache build time. |
| 377 cache_archive_path: Cache archive's path to validate. | 360 cache_archive_path: Cache archive's path to validate. |
| 361 |
| 362 Returns: |
| 363 { |
| 364 'effective_requests': [URLs of all requests], |
| 365 'effective_post_requests': [URLs of POST requests], |
| 366 'expected_cached_resources': [URLs of resources expected to be cached], |
| 367 'successfully_cached': [URLs of cached sub-resources] |
| 368 } |
| 378 """ | 369 """ |
| 379 # TODO(gabadie): What's the best way of propagating errors happening in here? | 370 # TODO(gabadie): What's the best way of propagating errors happening in here? |
| 380 logging.info('lists cached urls from %s' % cache_archive_path) | 371 logging.info('lists cached urls from %s' % cache_archive_path) |
| 381 with common_util.TemporaryDirectory() as cache_directory: | 372 with common_util.TemporaryDirectory() as cache_directory: |
| 382 chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_directory) | 373 chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_directory) |
| 383 cache_keys = set( | 374 cache_keys = set( |
| 384 chrome_cache.CacheBackend(cache_directory, 'simple').ListKeys()) | 375 chrome_cache.CacheBackend(cache_directory, 'simple').ListKeys()) |
| 385 trace = loading_trace.LoadingTrace.FromJsonFile(cache_build_trace_path) | 376 trace = loading_trace.LoadingTrace.FromJsonFile(cache_build_trace_path) |
| 386 effective_requests = _ListUrlRequests(trace, _RequestOutcome.All) | 377 effective_requests = _ListUrlRequests(trace, _RequestOutcome.All) |
| 387 effective_post_requests = _ListUrlRequests(trace, _RequestOutcome.Post) | 378 effective_post_requests = _ListUrlRequests(trace, _RequestOutcome.Post) |
| (...skipping 10 matching lines...) Expand all Loading... |
| 398 expected_cached_requests = effective_requests.difference( | 389 expected_cached_requests = effective_requests.difference( |
| 399 effective_post_requests) | 390 effective_post_requests) |
| 400 effective_cache_keys = cache_keys.difference( | 391 effective_cache_keys = cache_keys.difference( |
| 401 upload_data_stream_cache_entry_keys) | 392 upload_data_stream_cache_entry_keys) |
| 402 | 393 |
| 403 _PrintUrlSetComparison(effective_post_requests, upload_data_stream_requests, | 394 _PrintUrlSetComparison(effective_post_requests, upload_data_stream_requests, |
| 404 'POST resources') | 395 'POST resources') |
| 405 _PrintUrlSetComparison(expected_cached_requests, effective_cache_keys, | 396 _PrintUrlSetComparison(expected_cached_requests, effective_cache_keys, |
| 406 'Cached resources') | 397 'Cached resources') |
| 407 | 398 |
| 399 return { |
| 400 'effective_requests': [url for url in effective_requests], |
| 401 'effective_post_requests': [url for url in effective_post_requests], |
| 402 'expected_cached_resources': [url for url in expected_cached_requests], |
| 403 'successfully_cached_resources': [url for url in effective_cache_keys] |
| 404 } |
| 405 |
| 406 |
| 407 def _ProcessRunOutputDir( |
| 408 cache_validation_result, benchmark_setup, runner_output_dir): |
| 409 """Process benchmark's run output directory. |
| 410 |
| 411 Args: |
| 412 cache_validation_result: Same as for _RunOutputVerifier |
| 413 benchmark_setup: Same as for _RunOutputVerifier |
| 414 runner_output_dir: Same as for SandwichRunner.output_dir |
| 415 |
| 416 Returns: |
| 417 List of dictionary. |
| 418 """ |
| 419 run_metrics_list = [] |
| 420 run_output_verifier = _RunOutputVerifier( |
| 421 cache_validation_result, benchmark_setup) |
| 422 for repeat_id, repeat_dir in sandwich_runner.WalkRepeatedRuns( |
| 423 runner_output_dir): |
| 424 trace_path = os.path.join(repeat_dir, sandwich_runner.TRACE_FILENAME) |
| 425 |
| 426 logging.info('loading trace: %s', trace_path) |
| 427 trace = loading_trace.LoadingTrace.FromJsonFile(trace_path) |
| 428 |
| 429 logging.info('verifying trace: %s', trace_path) |
| 430 run_output_verifier.VerifyTrace(trace) |
| 431 |
| 432 logging.info('extracting metrics from trace: %s', trace_path) |
| 433 run_metrics = { |
| 434 'url': trace.url, |
| 435 'repeat_id': repeat_id, |
| 436 'subresource_discoverer': benchmark_setup['subresource_discoverer'], |
| 437 'cache_recording.subresource_count': |
| 438 len(cache_validation_result['effective_requests']), |
| 439 'cache_recording.cached_subresource_count_theoretic': |
| 440 len(cache_validation_result['successfully_cached_resources']), |
| 441 'cache_recording.cached_subresource_count': |
| 442 len(cache_validation_result['expected_cached_resources']), |
| 443 'benchmark.subresource_count': len(_ListUrlRequests( |
| 444 trace, _RequestOutcome.All)), |
| 445 'benchmark.served_from_cache_count_theoretic': |
| 446 len(benchmark_setup['cache_whitelist']), |
| 447 'benchmark.served_from_cache_count': len(_ListUrlRequests( |
| 448 trace, _RequestOutcome.ServedFromCache)), |
| 449 } |
| 450 run_metrics.update( |
| 451 sandwich_metrics.ExtractCommonMetricsFromRepeatDirectory( |
| 452 repeat_dir, trace)) |
| 453 run_metrics_list.append(run_metrics) |
| 454 run_metrics_list.sort(key=lambda e: e['repeat_id']) |
| 455 |
| 456 wpr_log_path = os.path.join( |
| 457 runner_output_dir, sandwich_runner.WPR_LOG_FILENAME) |
| 458 logging.info('verifying wpr log: %s', wpr_log_path) |
| 459 run_output_verifier.VerifyWprLog(wpr_log_path) |
| 460 return run_metrics_list |
| 461 |
| 408 | 462 |
| 409 class PrefetchBenchmarkBuilder(task_manager.Builder): | 463 class PrefetchBenchmarkBuilder(task_manager.Builder): |
| 410 """A builder for a graph of tasks for NoState-Prefetch emulated benchmarks.""" | 464 """A builder for a graph of tasks for NoState-Prefetch emulated benchmarks.""" |
| 411 | 465 |
| 412 def __init__(self, common_builder): | 466 def __init__(self, common_builder): |
| 413 task_manager.Builder.__init__(self, | 467 task_manager.Builder.__init__(self, |
| 414 common_builder.output_directory, | 468 common_builder.output_directory, |
| 415 common_builder.output_subdirectory) | 469 common_builder.output_subdirectory) |
| 416 self._common_builder = common_builder | 470 self._common_builder = common_builder |
| 417 | 471 |
| 418 self._patched_wpr_task = None | 472 self._wpr_archive_path = None |
| 419 self._reference_cache_task = None | 473 self._cache_path = None |
| 420 self._trace_from_grabbing_reference_cache = None | 474 self._trace_from_grabbing_reference_cache = None |
| 421 self._subresources_for_urls_task = None | 475 self._cache_validation_task = None |
| 422 self._PopulateCommonPipelines() | 476 self._PopulateCommonPipelines() |
| 423 | 477 |
| 424 def _PopulateCommonPipelines(self): | 478 def _PopulateCommonPipelines(self): |
| 425 """Creates necessary tasks to produce initial cache archive. | 479 """Creates necessary tasks to produce initial cache archive. |
| 426 | 480 |
| 427 Also creates a task for producing a json file with a mapping of URLs to | 481 Also creates a task for producing a json file with a mapping of URLs to |
| 428 subresources (urls-resources.json). | 482 subresources (urls-resources.json). |
| 429 | 483 |
| 430 Here is the full dependency tree for the returned task: | 484 Here is the full dependency tree for the returned task: |
| 431 common/patched-cache-validation.log | 485 common/patched-cache-validation.json |
| 432 depends on: common/patched-cache.zip | 486 depends on: common/patched-cache.zip |
| 433 depends on: common/original-cache.zip | 487 depends on: common/original-cache.zip |
| 434 depends on: common/webpages-patched.wpr | 488 depends on: common/webpages-patched.wpr |
| 435 depends on: common/webpages.wpr | 489 depends on: common/webpages.wpr |
| 436 depends on: common/urls-resources.json | |
| 437 depends on: common/original-cache.zip | |
| 438 """ | 490 """ |
| 439 @self.RegisterTask('common/webpages-patched.wpr', | 491 @self.RegisterTask('common/webpages-patched.wpr', |
| 440 dependencies=[self._common_builder.original_wpr_task]) | 492 dependencies=[self._common_builder.original_wpr_task]) |
| 441 def BuildPatchedWpr(): | 493 def BuildPatchedWpr(): |
| 442 common_util.EnsureParentDirectoryExists(BuildPatchedWpr.path) | 494 common_util.EnsureParentDirectoryExists(BuildPatchedWpr.path) |
| 443 shutil.copyfile( | 495 shutil.copyfile( |
| 444 self._common_builder.original_wpr_task.path, BuildPatchedWpr.path) | 496 self._common_builder.original_wpr_task.path, BuildPatchedWpr.path) |
| 445 _PatchWpr(BuildPatchedWpr.path) | 497 _PatchWpr(BuildPatchedWpr.path) |
| 446 | 498 |
| 447 @self.RegisterTask('common/original-cache.zip', [BuildPatchedWpr]) | 499 @self.RegisterTask('common/original-cache.zip', [BuildPatchedWpr]) |
| 448 def BuildOriginalCache(): | 500 def BuildOriginalCache(): |
| 449 runner = self._common_builder.CreateSandwichRunner() | 501 runner = self._common_builder.CreateSandwichRunner() |
| 450 runner.wpr_archive_path = BuildPatchedWpr.path | 502 runner.wpr_archive_path = BuildPatchedWpr.path |
| 451 runner.cache_archive_path = BuildOriginalCache.path | 503 runner.cache_archive_path = BuildOriginalCache.path |
| 452 runner.cache_operation = sandwich_runner.CacheOperation.SAVE | 504 runner.cache_operation = sandwich_runner.CacheOperation.SAVE |
| 453 runner.output_dir = BuildOriginalCache.run_path | 505 runner.output_dir = BuildOriginalCache.run_path |
| 454 runner.Run() | 506 runner.Run() |
| 455 BuildOriginalCache.run_path = BuildOriginalCache.path[:-4] + '-run' | 507 BuildOriginalCache.run_path = BuildOriginalCache.path[:-4] + '-run' |
| 456 original_cache_trace_path = os.path.join( | 508 original_cache_trace_path = os.path.join( |
| 457 BuildOriginalCache.run_path, '0', sandwich_runner.TRACE_FILENAME) | 509 BuildOriginalCache.run_path, '0', sandwich_runner.TRACE_FILENAME) |
| 458 | 510 |
| 459 @self.RegisterTask('common/patched-cache.zip', [BuildOriginalCache]) | 511 @self.RegisterTask('common/patched-cache.zip', [BuildOriginalCache]) |
| 460 def BuildPatchedCache(): | 512 def BuildPatchedCache(): |
| 461 _PatchCacheArchive(BuildOriginalCache.path, | 513 _PatchCacheArchive(BuildOriginalCache.path, |
| 462 original_cache_trace_path, BuildPatchedCache.path) | 514 original_cache_trace_path, BuildPatchedCache.path) |
| 463 | 515 |
| 464 @self.RegisterTask('common/subresources-for-urls.json', | 516 @self.RegisterTask('common/patched-cache-validation.json', |
| 465 [BuildOriginalCache]) | |
| 466 def ListUrlsResources(): | |
| 467 url_resources = _ReadSubresourceFromRunnerOutputDir( | |
| 468 BuildOriginalCache.run_path) | |
| 469 with open(ListUrlsResources.path, 'w') as output: | |
| 470 json.dump(url_resources, output) | |
| 471 | |
| 472 @self.RegisterTask('common/patched-cache-validation.log', | |
| 473 [BuildPatchedCache]) | 517 [BuildPatchedCache]) |
| 474 def ValidatePatchedCache(): | 518 def ValidatePatchedCache(): |
| 475 handler = logging.FileHandler(ValidatePatchedCache.path) | 519 cache_validation_result = _ValidateCacheArchiveContent( |
| 476 logging.getLogger().addHandler(handler) | 520 original_cache_trace_path, BuildPatchedCache.path) |
| 477 try: | 521 with open(ValidatePatchedCache.path, 'w') as output: |
| 478 _ValidateCacheArchiveContent( | 522 json.dump(cache_validation_result, output) |
| 479 original_cache_trace_path, BuildPatchedCache.path) | |
| 480 finally: | |
| 481 logging.getLogger().removeHandler(handler) | |
| 482 | 523 |
| 483 self._patched_wpr_task = BuildPatchedWpr | 524 self._wpr_archive_path = BuildPatchedWpr.path |
| 484 self._trace_from_grabbing_reference_cache = original_cache_trace_path | 525 self._trace_from_grabbing_reference_cache = original_cache_trace_path |
| 485 self._reference_cache_task = BuildPatchedCache | 526 self._cache_path = BuildPatchedCache.path |
| 486 self._subresources_for_urls_task = ListUrlsResources | 527 self._cache_validation_task = ValidatePatchedCache |
| 487 | 528 |
| 488 self._common_builder.default_final_tasks.append(ValidatePatchedCache) | 529 self._common_builder.default_final_tasks.append(ValidatePatchedCache) |
| 489 | 530 |
| 490 def PopulateLoadBenchmark(self, subresource_discoverer, | 531 def PopulateLoadBenchmark(self, subresource_discoverer, |
| 491 transformer_list_name, transformer_list): | 532 transformer_list_name, transformer_list): |
| 492 """Populate benchmarking tasks from its setup tasks. | 533 """Populate benchmarking tasks from its setup tasks. |
| 493 | 534 |
| 494 Args: | 535 Args: |
| 495 subresource_discoverer: Name of a subresources discoverer. | 536 subresource_discoverer: Name of a subresources discoverer. |
| 496 transformer_list_name: A string describing the transformers, will be used | 537 transformer_list_name: A string describing the transformers, will be used |
| 497 in Task names (prefer names without spaces and special characters). | 538 in Task names (prefer names without spaces and special characters). |
| 498 transformer_list: An ordered list of function that takes an instance of | 539 transformer_list: An ordered list of function that takes an instance of |
| 499 SandwichRunner as parameter, would be applied immediately before | 540 SandwichRunner as parameter, would be applied immediately before |
| 500 SandwichRunner.Run() in the given order. | 541 SandwichRunner.Run() in the given order. |
| 501 | 542 |
| 502 Here is the full dependency of the added tree for the returned task: | 543 Here is the full dependency of the added tree for the returned task: |
| 503 <transformer_list_name>/<subresource_discoverer>-metrics.csv | 544 <transformer_list_name>/<subresource_discoverer>-metrics.csv |
| 504 depends on: <transformer_list_name>/<subresource_discoverer>-run/ | 545 depends on: <transformer_list_name>/<subresource_discoverer>-run/ |
| 505 depends on: common/<subresource_discoverer>-cache.zip | 546 depends on: common/<subresource_discoverer>-cache.zip |
| 506 depends on: some tasks saved by PopulateCommonPipelines() | |
| 507 depends on: common/<subresource_discoverer>-setup.json | 547 depends on: common/<subresource_discoverer>-setup.json |
| 508 depends on: some tasks saved by PopulateCommonPipelines() | 548 depends on: common/patched-cache-validation.json |
| 509 """ | 549 """ |
| 510 additional_column_names = [ | 550 additional_column_names = [ |
| 511 'url', | 551 'url', |
| 512 'repeat_id', | 552 'repeat_id', |
| 513 'subresource_discoverer', | 553 'subresource_discoverer', |
| 514 'subresource_count', | 554 'cache_recording.subresource_count', |
| 515 # The amount of subresources detected at SetupBenchmark step. | 555 'cache_recording.cached_subresource_count_theoretic', |
| 516 'subresource_count_theoretic', | 556 'cache_recording.cached_subresource_count', |
| 517 # Amount of subresources for caching as suggested by the subresource | 557 'benchmark.subresource_count', |
| 518 # discoverer. | 558 'benchmark.served_from_cache_count_theoretic', |
| 519 'cached_subresource_count_theoretic', | 559 'benchmark.served_from_cache_count'] |
| 520 'cached_subresource_count'] | |
| 521 | 560 |
| 522 assert subresource_discoverer in SUBRESOURCE_DISCOVERERS | 561 assert subresource_discoverer in SUBRESOURCE_DISCOVERERS |
| 523 assert 'common' not in SUBRESOURCE_DISCOVERERS | 562 assert 'common' not in SUBRESOURCE_DISCOVERERS |
| 524 shared_task_prefix = os.path.join('common', subresource_discoverer) | 563 shared_task_prefix = os.path.join('common', subresource_discoverer) |
| 525 task_prefix = os.path.join(transformer_list_name, subresource_discoverer) | 564 task_prefix = os.path.join(transformer_list_name, subresource_discoverer) |
| 526 | 565 |
| 527 @self.RegisterTask(shared_task_prefix + '-setup.json', merge=True, | 566 @self.RegisterTask(shared_task_prefix + '-setup.json', merge=True, |
| 528 dependencies=[self._subresources_for_urls_task]) | 567 dependencies=[self._cache_validation_task]) |
| 529 def SetupBenchmark(): | 568 def SetupBenchmark(): |
| 530 whitelisted_urls = _ExtractDiscoverableUrls( | 569 whitelisted_urls = _ExtractDiscoverableUrls( |
| 531 self._trace_from_grabbing_reference_cache, subresource_discoverer) | 570 self._trace_from_grabbing_reference_cache, subresource_discoverer) |
| 532 | 571 |
| 533 url_resources = json.load(open(self._subresources_for_urls_task.path)) | |
| 534 common_util.EnsureParentDirectoryExists(SetupBenchmark.path) | 572 common_util.EnsureParentDirectoryExists(SetupBenchmark.path) |
| 535 with open(SetupBenchmark.path, 'w') as output: | 573 with open(SetupBenchmark.path, 'w') as output: |
| 536 json.dump({ | 574 json.dump({ |
| 537 'cache_whitelist': [url for url in whitelisted_urls], | 575 'cache_whitelist': [url for url in whitelisted_urls], |
| 538 'subresource_discoverer': subresource_discoverer, | 576 'subresource_discoverer': subresource_discoverer, |
| 539 'url_resources': url_resources, | |
| 540 }, output) | 577 }, output) |
| 541 | 578 |
| 542 @self.RegisterTask(shared_task_prefix + '-cache.zip', merge=True, | 579 @self.RegisterTask(shared_task_prefix + '-cache.zip', merge=True, |
| 543 dependencies=[ | 580 dependencies=[SetupBenchmark]) |
| 544 SetupBenchmark, self._reference_cache_task]) | |
| 545 def BuildBenchmarkCacheArchive(): | 581 def BuildBenchmarkCacheArchive(): |
| 546 setup = json.load(open(SetupBenchmark.path)) | 582 benchmark_setup = json.load(open(SetupBenchmark.path)) |
| 547 chrome_cache.ApplyUrlWhitelistToCacheArchive( | 583 chrome_cache.ApplyUrlWhitelistToCacheArchive( |
| 548 cache_archive_path=self._reference_cache_task.path, | 584 cache_archive_path=self._cache_path, |
| 549 whitelisted_urls=setup['cache_whitelist'], | 585 whitelisted_urls=benchmark_setup['cache_whitelist'], |
| 550 output_cache_archive_path=BuildBenchmarkCacheArchive.path) | 586 output_cache_archive_path=BuildBenchmarkCacheArchive.path) |
| 551 | 587 |
| 552 @self.RegisterTask(task_prefix + '-run/', | 588 @self.RegisterTask(task_prefix + '-run/', |
| 553 dependencies=[BuildBenchmarkCacheArchive]) | 589 dependencies=[BuildBenchmarkCacheArchive]) |
| 554 def RunBenchmark(): | 590 def RunBenchmark(): |
| 555 runner = self._common_builder.CreateSandwichRunner() | 591 runner = self._common_builder.CreateSandwichRunner() |
| 556 for transformer in transformer_list: | 592 for transformer in transformer_list: |
| 557 transformer(runner) | 593 transformer(runner) |
| 558 runner.wpr_archive_path = self._patched_wpr_task.path | 594 runner.wpr_archive_path = self._wpr_archive_path |
| 559 runner.wpr_out_log_path = os.path.join( | 595 runner.wpr_out_log_path = os.path.join( |
| 560 RunBenchmark.path, sandwich_runner.WPR_LOG_FILENAME) | 596 RunBenchmark.path, sandwich_runner.WPR_LOG_FILENAME) |
| 561 runner.cache_archive_path = BuildBenchmarkCacheArchive.path | 597 runner.cache_archive_path = BuildBenchmarkCacheArchive.path |
| 562 runner.cache_operation = sandwich_runner.CacheOperation.PUSH | 598 runner.cache_operation = sandwich_runner.CacheOperation.PUSH |
| 563 runner.output_dir = RunBenchmark.path | 599 runner.output_dir = RunBenchmark.path |
| 564 runner.Run() | 600 runner.Run() |
| 565 | 601 |
| 566 @self.RegisterTask(task_prefix + '-metrics.csv', | 602 @self.RegisterTask(task_prefix + '-metrics.csv', |
| 567 dependencies=[RunBenchmark]) | 603 dependencies=[RunBenchmark]) |
| 568 def ExtractMetrics(): | 604 def ProcessRunOutputDir(): |
| 569 # TODO(gabadie): Performance improvement: load each trace only once and | 605 benchmark_setup = json.load(open(SetupBenchmark.path)) |
| 570 # use it for validation and extraction of metrics later. | 606 cache_validation_result = json.load( |
| 571 _VerifyBenchmarkOutputDirectory(SetupBenchmark.path, RunBenchmark.path) | 607 open(self._cache_validation_task.path)) |
| 572 | 608 |
| 573 benchmark_setup = json.load(open(SetupBenchmark.path)) | 609 run_metrics_list = _ProcessRunOutputDir( |
| 574 run_metrics_list = [] | 610 cache_validation_result, benchmark_setup, RunBenchmark.path) |
| 575 for repeat_id, repeat_dir in sandwich_runner.WalkRepeatedRuns( | 611 with open(ProcessRunOutputDir.path, 'w') as csv_file: |
| 576 RunBenchmark.path): | |
| 577 trace_path = os.path.join(repeat_dir, sandwich_runner.TRACE_FILENAME) | |
| 578 logging.info('processing trace: %s', trace_path) | |
| 579 trace = loading_trace.LoadingTrace.FromJsonFile(trace_path) | |
| 580 run_metrics = { | |
| 581 'url': trace.url, | |
| 582 'repeat_id': repeat_id, | |
| 583 'subresource_discoverer': benchmark_setup['subresource_discoverer'], | |
| 584 'subresource_count': len(_ListUrlRequests( | |
| 585 trace, _RequestOutcome.All)), | |
| 586 'subresource_count_theoretic': | |
| 587 len(benchmark_setup['url_resources']), | |
| 588 'cached_subresource_count': len(_ListUrlRequests( | |
| 589 trace, _RequestOutcome.ServedFromCache)), | |
| 590 'cached_subresource_count_theoretic': | |
| 591 len(benchmark_setup['cache_whitelist']), | |
| 592 } | |
| 593 run_metrics.update( | |
| 594 sandwich_metrics.ExtractCommonMetricsFromRepeatDirectory( | |
| 595 repeat_dir, trace)) | |
| 596 run_metrics_list.append(run_metrics) | |
| 597 | |
| 598 run_metrics_list.sort(key=lambda e: e['repeat_id']) | |
| 599 with open(ExtractMetrics.path, 'w') as csv_file: | |
| 600 writer = csv.DictWriter(csv_file, fieldnames=(additional_column_names + | 612 writer = csv.DictWriter(csv_file, fieldnames=(additional_column_names + |
| 601 sandwich_metrics.COMMON_CSV_COLUMN_NAMES)) | 613 sandwich_metrics.COMMON_CSV_COLUMN_NAMES)) |
| 602 writer.writeheader() | 614 writer.writeheader() |
| 603 for trace_metrics in run_metrics_list: | 615 for trace_metrics in run_metrics_list: |
| 604 writer.writerow(trace_metrics) | 616 writer.writerow(trace_metrics) |
| 605 | 617 |
| 606 self._common_builder.default_final_tasks.append(ExtractMetrics) | 618 self._common_builder.default_final_tasks.append(ProcessRunOutputDir) |
| OLD | NEW |