Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(164)

Side by Side Diff: tools/android/loading/sandwich_prefetch.py

Issue 2033093002: sandwich: Merge cache-validation.json and urls-for-resources.json tasks (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Rebase Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « tools/android/loading/sandwich_metrics.py ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2016 The Chromium Authors. All rights reserved. 1 # Copyright 2016 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 """
6 Implements a task builder for benchmarking effects of NoState Prefetch.
7 Noticeable steps of the task pipeline:
8 * Save a WPR archive
9 * Process the WPR archive to make all resources cacheable
10 * Process cache archive to patch response headers back to their original
11 values.
12 * Find out which resources are discoverable by NoState Prefetch
13 (HTMLPreloadScanner)
14 * Load pages with empty/full/prefetched cache
15 * Extract most important metrics to a CSV
16 """
17
5 import csv 18 import csv
6 import logging 19 import logging
7 import json 20 import json
8 import os 21 import os
9 import re 22 import re
10 import shutil 23 import shutil
11 from urlparse import urlparse 24 from urlparse import urlparse
12 25
13 import chrome_cache 26 import chrome_cache
14 import common_util 27 import common_util
(...skipping 176 matching lines...) Expand 10 before | Expand all | Expand 10 after
191 elif subresource_discoverer == PARSER_DISCOVERER: 204 elif subresource_discoverer == PARSER_DISCOVERER:
192 discovered_requests = PrefetchSimulationView.ParserDiscoverableRequests( 205 discovered_requests = PrefetchSimulationView.ParserDiscoverableRequests(
193 first_resource_request, dependencies_lens) 206 first_resource_request, dependencies_lens)
194 elif subresource_discoverer == HTML_PRELOAD_SCANNER_DISCOVERER: 207 elif subresource_discoverer == HTML_PRELOAD_SCANNER_DISCOVERER:
195 discovered_requests = PrefetchSimulationView.PreloadedRequests( 208 discovered_requests = PrefetchSimulationView.PreloadedRequests(
196 first_resource_request, dependencies_lens, trace) 209 first_resource_request, dependencies_lens, trace)
197 else: 210 else:
198 assert False 211 assert False
199 212
200 whitelisted_urls = set() 213 whitelisted_urls = set()
201 logging.info('white-listing %s' % first_resource_request.url)
202 for request in _FilterOutDataAndIncompleteRequests(discovered_requests): 214 for request in _FilterOutDataAndIncompleteRequests(discovered_requests):
203 logging.info('white-listing %s' % request.url) 215 logging.debug('white-listing %s', request.url)
204 whitelisted_urls.add(request.url) 216 whitelisted_urls.add(request.url)
217 logging.info('number of white-listed resources: %d', len(whitelisted_urls))
205 return whitelisted_urls 218 return whitelisted_urls
206 219
207 220
208 def _PrintUrlSetComparison(ref_url_set, url_set, url_set_name): 221 def _PrintUrlSetComparison(ref_url_set, url_set, url_set_name):
209 """Compare URL sets and log the diffs. 222 """Compare URL sets and log the diffs.
210 223
211 Args: 224 Args:
212 ref_url_set: Set of reference urls. 225 ref_url_set: Set of reference urls.
213 url_set: Set of urls to compare to the reference. 226 url_set: Set of urls to compare to the reference.
214 url_set_name: The set name for logging purposes. 227 url_set_name: The set name for logging purposes.
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after
254 request_event.method.upper().strip() == 'POST'): 267 request_event.method.upper().strip() == 'POST'):
255 urls.add(request_event.url) 268 urls.add(request_event.url)
256 elif (request_kind == _RequestOutcome.NotServedFromCache and 269 elif (request_kind == _RequestOutcome.NotServedFromCache and
257 not request_event.from_disk_cache): 270 not request_event.from_disk_cache):
258 urls.add(request_event.url) 271 urls.add(request_event.url)
259 elif request_kind == _RequestOutcome.All: 272 elif request_kind == _RequestOutcome.All:
260 urls.add(request_event.url) 273 urls.add(request_event.url)
261 return urls 274 return urls
262 275
263 276
264 def _VerifyBenchmarkOutputDirectory(benchmark_setup_path, 277 class _RunOutputVerifier(object):
265 benchmark_output_directory_path): 278 """Object to verify benchmark run from traces and WPR log stored in the
266 """Verifies that all run inside the run_output_directory worked as expected. 279 runner output directory.
280 """
267 281
268 Args: 282 def __init__(self, cache_validation_result, benchmark_setup):
269 benchmark_setup_path: Path of the JSON of the benchmark setup. 283 """Constructor.
270 benchmark_output_directory_path: Path of the benchmark output directory to
271 verify.
272 """
273 # TODO(gabadie): What's the best way of propagating errors happening in here?
274 benchmark_setup = json.load(open(benchmark_setup_path))
275 cache_whitelist = set(benchmark_setup['cache_whitelist'])
276 original_requests = set(benchmark_setup['url_resources'])
277 original_cached_requests = original_requests.intersection(cache_whitelist)
278 original_uncached_requests = original_requests.difference(cache_whitelist)
279 all_sent_url_requests = set()
280 284
281 # Verify requests from traces. 285 Args:
282 run_id = -1 286 cache_validation_result: JSON of the cache validation task.
283 while True: 287 benchmark_setup: JSON of the benchmark setup.
284 run_id += 1 288 """
285 run_path = os.path.join(benchmark_output_directory_path, str(run_id)) 289 self._cache_whitelist = set(benchmark_setup['cache_whitelist'])
286 if not os.path.isdir(run_path): 290 self._original_requests = set(cache_validation_result['effective_requests'])
287 break 291 self._original_post_requests = set(
288 trace_path = os.path.join(run_path, sandwich_runner.TRACE_FILENAME) 292 cache_validation_result['effective_post_requests'])
289 if not os.path.isfile(trace_path): 293 self._original_cached_requests = self._original_requests.intersection(
290 logging.error('missing trace %s' % trace_path) 294 self._cache_whitelist)
291 continue 295 self._original_uncached_requests = self._original_requests.difference(
292 trace = loading_trace.LoadingTrace.FromJsonFile(trace_path) 296 self._cache_whitelist)
293 logging.info('verifying %s from %s' % (trace.url, trace_path)) 297 self._all_sent_url_requests = set()
294 298
299 def VerifyTrace(self, trace):
300 """Verifies a trace with the cache validation result and the benchmark
301 setup.
302 """
295 effective_requests = _ListUrlRequests(trace, _RequestOutcome.All) 303 effective_requests = _ListUrlRequests(trace, _RequestOutcome.All)
296 effective_post_requests = _ListUrlRequests(trace, _RequestOutcome.Post) 304 effective_post_requests = _ListUrlRequests(trace, _RequestOutcome.Post)
297 effective_cached_requests = \ 305 effective_cached_requests = \
298 _ListUrlRequests(trace, _RequestOutcome.ServedFromCache) 306 _ListUrlRequests(trace, _RequestOutcome.ServedFromCache)
299 effective_uncached_requests = \ 307 effective_uncached_requests = \
300 _ListUrlRequests(trace, _RequestOutcome.NotServedFromCache) 308 _ListUrlRequests(trace, _RequestOutcome.NotServedFromCache)
301 309
302 missing_requests = original_requests.difference(effective_requests) 310 missing_requests = self._original_requests.difference(effective_requests)
303 unexpected_requests = effective_requests.difference(original_requests) 311 unexpected_requests = effective_requests.difference(self._original_requests)
304 expected_cached_requests = \ 312 expected_cached_requests = \
305 original_cached_requests.difference(missing_requests) 313 self._original_cached_requests.difference(missing_requests)
306 missing_cached_requests = \ 314 expected_uncached_requests = self._original_uncached_requests.union(
307 expected_cached_requests.difference(effective_cached_requests) 315 unexpected_requests).difference(missing_requests)
308 expected_uncached_requests = original_uncached_requests.union(
309 unexpected_requests).union(missing_cached_requests)
310 all_sent_url_requests.update(effective_uncached_requests)
311 316
312 # POST requests are known to be unable to use the cache. 317 # POST requests are known to be unable to use the cache.
313 expected_cached_requests.difference_update(effective_post_requests) 318 expected_cached_requests.difference_update(effective_post_requests)
314 expected_uncached_requests.update(effective_post_requests) 319 expected_uncached_requests.update(effective_post_requests)
315 320
316 _PrintUrlSetComparison(original_requests, effective_requests, 321 _PrintUrlSetComparison(self._original_requests, effective_requests,
317 'All resources') 322 'All resources')
318 _PrintUrlSetComparison(set(), effective_post_requests, 323 _PrintUrlSetComparison(set(), effective_post_requests, 'POST resources')
319 'POST resources')
320 _PrintUrlSetComparison(expected_cached_requests, effective_cached_requests, 324 _PrintUrlSetComparison(expected_cached_requests, effective_cached_requests,
321 'Cached resources') 325 'Cached resources')
322 _PrintUrlSetComparison(expected_uncached_requests, 326 _PrintUrlSetComparison(expected_uncached_requests,
323 effective_uncached_requests, 'Non cached resources') 327 effective_uncached_requests, 'Non cached resources')
324 328
325 # Verify requests from WPR. 329 self._all_sent_url_requests.update(effective_uncached_requests)
326 wpr_log_path = os.path.join(
327 benchmark_output_directory_path, sandwich_runner.WPR_LOG_FILENAME)
328 logging.info('verifying requests from %s' % wpr_log_path)
329 all_wpr_requests = wpr_backend.ExtractRequestsFromLog(wpr_log_path)
330 all_wpr_urls = set()
331 unserved_wpr_urls = set()
332 wpr_command_colliding_urls = set()
333 330
334 for request in all_wpr_requests: 331 def VerifyWprLog(self, wpr_log_path):
335 if request.is_wpr_host: 332 """Verifies WPR log with previously verified traces."""
336 continue 333 all_wpr_requests = wpr_backend.ExtractRequestsFromLog(wpr_log_path)
337 if urlparse(request.url).path.startswith('/web-page-replay'): 334 all_wpr_urls = set()
338 wpr_command_colliding_urls.add(request.url) 335 unserved_wpr_urls = set()
339 elif request.is_served is False: 336 wpr_command_colliding_urls = set()
340 unserved_wpr_urls.add(request.url)
341 all_wpr_urls.add(request.url)
342 337
343 _PrintUrlSetComparison(set(), unserved_wpr_urls, 338 for request in all_wpr_requests:
344 'Distinct unserved resources from WPR') 339 if request.is_wpr_host:
345 _PrintUrlSetComparison(set(), wpr_command_colliding_urls, 340 continue
346 'Distinct resources colliding to WPR commands') 341 if urlparse(request.url).path.startswith('/web-page-replay'):
347 _PrintUrlSetComparison(all_wpr_urls, all_sent_url_requests, 342 wpr_command_colliding_urls.add(request.url)
348 'Distinct resource requests to WPR') 343 elif request.is_served is False:
344 unserved_wpr_urls.add(request.url)
345 all_wpr_urls.add(request.url)
349 346
350 347 _PrintUrlSetComparison(set(), unserved_wpr_urls,
351 def _ReadSubresourceFromRunnerOutputDir(runner_output_dir): 348 'Distinct unserved resources from WPR')
352 """Extracts a list of subresources in runner output directory. 349 _PrintUrlSetComparison(set(), wpr_command_colliding_urls,
353 350 'Distinct resources colliding to WPR commands')
354 Args: 351 _PrintUrlSetComparison(all_wpr_urls, self._all_sent_url_requests,
355 runner_output_dir: Path of the runner's output directory. 352 'Distinct resource requests to WPR')
356
357 Returns:
358 [URLs of sub-resources]
359 """
360 trace_path = os.path.join(
361 runner_output_dir, '0', sandwich_runner.TRACE_FILENAME)
362 trace = loading_trace.LoadingTrace.FromJsonFile(trace_path)
363 url_set = set()
364 for request_event in _FilterOutDataAndIncompleteRequests(
365 trace.request_track.GetEvents()):
366 url_set.add(request_event.url)
367 logging.info('lists %s resources of %s from %s' % \
368 (len(url_set), trace.url, trace_path))
369 return [url for url in url_set]
370 353
371 354
372 def _ValidateCacheArchiveContent(cache_build_trace_path, cache_archive_path): 355 def _ValidateCacheArchiveContent(cache_build_trace_path, cache_archive_path):
373 """Validates a cache archive content. 356 """Validates a cache archive content.
374 357
375 Args: 358 Args:
376 cache_build_trace_path: Path of the generated trace at the cache build time. 359 cache_build_trace_path: Path of the generated trace at the cache build time.
377 cache_archive_path: Cache archive's path to validate. 360 cache_archive_path: Cache archive's path to validate.
361
362 Returns:
363 {
364 'effective_requests': [URLs of all requests],
365 'effective_post_requests': [URLs of POST requests],
366 'expected_cached_resources': [URLs of resources expected to be cached],
367 'successfully_cached': [URLs of cached sub-resources]
368 }
378 """ 369 """
379 # TODO(gabadie): What's the best way of propagating errors happening in here? 370 # TODO(gabadie): What's the best way of propagating errors happening in here?
380 logging.info('lists cached urls from %s' % cache_archive_path) 371 logging.info('lists cached urls from %s' % cache_archive_path)
381 with common_util.TemporaryDirectory() as cache_directory: 372 with common_util.TemporaryDirectory() as cache_directory:
382 chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_directory) 373 chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_directory)
383 cache_keys = set( 374 cache_keys = set(
384 chrome_cache.CacheBackend(cache_directory, 'simple').ListKeys()) 375 chrome_cache.CacheBackend(cache_directory, 'simple').ListKeys())
385 trace = loading_trace.LoadingTrace.FromJsonFile(cache_build_trace_path) 376 trace = loading_trace.LoadingTrace.FromJsonFile(cache_build_trace_path)
386 effective_requests = _ListUrlRequests(trace, _RequestOutcome.All) 377 effective_requests = _ListUrlRequests(trace, _RequestOutcome.All)
387 effective_post_requests = _ListUrlRequests(trace, _RequestOutcome.Post) 378 effective_post_requests = _ListUrlRequests(trace, _RequestOutcome.Post)
(...skipping 10 matching lines...) Expand all
398 expected_cached_requests = effective_requests.difference( 389 expected_cached_requests = effective_requests.difference(
399 effective_post_requests) 390 effective_post_requests)
400 effective_cache_keys = cache_keys.difference( 391 effective_cache_keys = cache_keys.difference(
401 upload_data_stream_cache_entry_keys) 392 upload_data_stream_cache_entry_keys)
402 393
403 _PrintUrlSetComparison(effective_post_requests, upload_data_stream_requests, 394 _PrintUrlSetComparison(effective_post_requests, upload_data_stream_requests,
404 'POST resources') 395 'POST resources')
405 _PrintUrlSetComparison(expected_cached_requests, effective_cache_keys, 396 _PrintUrlSetComparison(expected_cached_requests, effective_cache_keys,
406 'Cached resources') 397 'Cached resources')
407 398
399 return {
400 'effective_requests': [url for url in effective_requests],
401 'effective_post_requests': [url for url in effective_post_requests],
402 'expected_cached_resources': [url for url in expected_cached_requests],
403 'successfully_cached_resources': [url for url in effective_cache_keys]
404 }
405
406
407 def _ProcessRunOutputDir(
408 cache_validation_result, benchmark_setup, runner_output_dir):
409 """Process benchmark's run output directory.
410
411 Args:
412 cache_validation_result: Same as for _RunOutputVerifier
413 benchmark_setup: Same as for _RunOutputVerifier
414 runner_output_dir: Same as for SandwichRunner.output_dir
415
416 Returns:
417 List of dictionary.
418 """
419 run_metrics_list = []
420 run_output_verifier = _RunOutputVerifier(
421 cache_validation_result, benchmark_setup)
422 for repeat_id, repeat_dir in sandwich_runner.WalkRepeatedRuns(
423 runner_output_dir):
424 trace_path = os.path.join(repeat_dir, sandwich_runner.TRACE_FILENAME)
425
426 logging.info('loading trace: %s', trace_path)
427 trace = loading_trace.LoadingTrace.FromJsonFile(trace_path)
428
429 logging.info('verifying trace: %s', trace_path)
430 run_output_verifier.VerifyTrace(trace)
431
432 logging.info('extracting metrics from trace: %s', trace_path)
433 run_metrics = {
434 'url': trace.url,
435 'repeat_id': repeat_id,
436 'subresource_discoverer': benchmark_setup['subresource_discoverer'],
437 'cache_recording.subresource_count':
438 len(cache_validation_result['effective_requests']),
439 'cache_recording.cached_subresource_count_theoretic':
440 len(cache_validation_result['successfully_cached_resources']),
441 'cache_recording.cached_subresource_count':
442 len(cache_validation_result['expected_cached_resources']),
443 'benchmark.subresource_count': len(_ListUrlRequests(
444 trace, _RequestOutcome.All)),
445 'benchmark.served_from_cache_count_theoretic':
446 len(benchmark_setup['cache_whitelist']),
447 'benchmark.served_from_cache_count': len(_ListUrlRequests(
448 trace, _RequestOutcome.ServedFromCache)),
449 }
450 run_metrics.update(
451 sandwich_metrics.ExtractCommonMetricsFromRepeatDirectory(
452 repeat_dir, trace))
453 run_metrics_list.append(run_metrics)
454 run_metrics_list.sort(key=lambda e: e['repeat_id'])
455
456 wpr_log_path = os.path.join(
457 runner_output_dir, sandwich_runner.WPR_LOG_FILENAME)
458 logging.info('verifying wpr log: %s', wpr_log_path)
459 run_output_verifier.VerifyWprLog(wpr_log_path)
460 return run_metrics_list
461
408 462
409 class PrefetchBenchmarkBuilder(task_manager.Builder): 463 class PrefetchBenchmarkBuilder(task_manager.Builder):
410 """A builder for a graph of tasks for NoState-Prefetch emulated benchmarks.""" 464 """A builder for a graph of tasks for NoState-Prefetch emulated benchmarks."""
411 465
412 def __init__(self, common_builder): 466 def __init__(self, common_builder):
413 task_manager.Builder.__init__(self, 467 task_manager.Builder.__init__(self,
414 common_builder.output_directory, 468 common_builder.output_directory,
415 common_builder.output_subdirectory) 469 common_builder.output_subdirectory)
416 self._common_builder = common_builder 470 self._common_builder = common_builder
417 471
418 self._patched_wpr_task = None 472 self._wpr_archive_path = None
419 self._reference_cache_task = None 473 self._cache_path = None
420 self._trace_from_grabbing_reference_cache = None 474 self._trace_from_grabbing_reference_cache = None
421 self._subresources_for_urls_task = None 475 self._cache_validation_task = None
422 self._PopulateCommonPipelines() 476 self._PopulateCommonPipelines()
423 477
424 def _PopulateCommonPipelines(self): 478 def _PopulateCommonPipelines(self):
425 """Creates necessary tasks to produce initial cache archive. 479 """Creates necessary tasks to produce initial cache archive.
426 480
427 Also creates a task for producing a json file with a mapping of URLs to 481 Also creates a task for producing a json file with a mapping of URLs to
428 subresources (urls-resources.json). 482 subresources (urls-resources.json).
429 483
430 Here is the full dependency tree for the returned task: 484 Here is the full dependency tree for the returned task:
431 common/patched-cache-validation.log 485 common/patched-cache-validation.json
432 depends on: common/patched-cache.zip 486 depends on: common/patched-cache.zip
433 depends on: common/original-cache.zip 487 depends on: common/original-cache.zip
434 depends on: common/webpages-patched.wpr 488 depends on: common/webpages-patched.wpr
435 depends on: common/webpages.wpr 489 depends on: common/webpages.wpr
436 depends on: common/urls-resources.json
437 depends on: common/original-cache.zip
438 """ 490 """
439 @self.RegisterTask('common/webpages-patched.wpr', 491 @self.RegisterTask('common/webpages-patched.wpr',
440 dependencies=[self._common_builder.original_wpr_task]) 492 dependencies=[self._common_builder.original_wpr_task])
441 def BuildPatchedWpr(): 493 def BuildPatchedWpr():
442 common_util.EnsureParentDirectoryExists(BuildPatchedWpr.path) 494 common_util.EnsureParentDirectoryExists(BuildPatchedWpr.path)
443 shutil.copyfile( 495 shutil.copyfile(
444 self._common_builder.original_wpr_task.path, BuildPatchedWpr.path) 496 self._common_builder.original_wpr_task.path, BuildPatchedWpr.path)
445 _PatchWpr(BuildPatchedWpr.path) 497 _PatchWpr(BuildPatchedWpr.path)
446 498
447 @self.RegisterTask('common/original-cache.zip', [BuildPatchedWpr]) 499 @self.RegisterTask('common/original-cache.zip', [BuildPatchedWpr])
448 def BuildOriginalCache(): 500 def BuildOriginalCache():
449 runner = self._common_builder.CreateSandwichRunner() 501 runner = self._common_builder.CreateSandwichRunner()
450 runner.wpr_archive_path = BuildPatchedWpr.path 502 runner.wpr_archive_path = BuildPatchedWpr.path
451 runner.cache_archive_path = BuildOriginalCache.path 503 runner.cache_archive_path = BuildOriginalCache.path
452 runner.cache_operation = sandwich_runner.CacheOperation.SAVE 504 runner.cache_operation = sandwich_runner.CacheOperation.SAVE
453 runner.output_dir = BuildOriginalCache.run_path 505 runner.output_dir = BuildOriginalCache.run_path
454 runner.Run() 506 runner.Run()
455 BuildOriginalCache.run_path = BuildOriginalCache.path[:-4] + '-run' 507 BuildOriginalCache.run_path = BuildOriginalCache.path[:-4] + '-run'
456 original_cache_trace_path = os.path.join( 508 original_cache_trace_path = os.path.join(
457 BuildOriginalCache.run_path, '0', sandwich_runner.TRACE_FILENAME) 509 BuildOriginalCache.run_path, '0', sandwich_runner.TRACE_FILENAME)
458 510
459 @self.RegisterTask('common/patched-cache.zip', [BuildOriginalCache]) 511 @self.RegisterTask('common/patched-cache.zip', [BuildOriginalCache])
460 def BuildPatchedCache(): 512 def BuildPatchedCache():
461 _PatchCacheArchive(BuildOriginalCache.path, 513 _PatchCacheArchive(BuildOriginalCache.path,
462 original_cache_trace_path, BuildPatchedCache.path) 514 original_cache_trace_path, BuildPatchedCache.path)
463 515
464 @self.RegisterTask('common/subresources-for-urls.json', 516 @self.RegisterTask('common/patched-cache-validation.json',
465 [BuildOriginalCache])
466 def ListUrlsResources():
467 url_resources = _ReadSubresourceFromRunnerOutputDir(
468 BuildOriginalCache.run_path)
469 with open(ListUrlsResources.path, 'w') as output:
470 json.dump(url_resources, output)
471
472 @self.RegisterTask('common/patched-cache-validation.log',
473 [BuildPatchedCache]) 517 [BuildPatchedCache])
474 def ValidatePatchedCache(): 518 def ValidatePatchedCache():
475 handler = logging.FileHandler(ValidatePatchedCache.path) 519 cache_validation_result = _ValidateCacheArchiveContent(
476 logging.getLogger().addHandler(handler) 520 original_cache_trace_path, BuildPatchedCache.path)
477 try: 521 with open(ValidatePatchedCache.path, 'w') as output:
478 _ValidateCacheArchiveContent( 522 json.dump(cache_validation_result, output)
479 original_cache_trace_path, BuildPatchedCache.path)
480 finally:
481 logging.getLogger().removeHandler(handler)
482 523
483 self._patched_wpr_task = BuildPatchedWpr 524 self._wpr_archive_path = BuildPatchedWpr.path
484 self._trace_from_grabbing_reference_cache = original_cache_trace_path 525 self._trace_from_grabbing_reference_cache = original_cache_trace_path
485 self._reference_cache_task = BuildPatchedCache 526 self._cache_path = BuildPatchedCache.path
486 self._subresources_for_urls_task = ListUrlsResources 527 self._cache_validation_task = ValidatePatchedCache
487 528
488 self._common_builder.default_final_tasks.append(ValidatePatchedCache) 529 self._common_builder.default_final_tasks.append(ValidatePatchedCache)
489 530
490 def PopulateLoadBenchmark(self, subresource_discoverer, 531 def PopulateLoadBenchmark(self, subresource_discoverer,
491 transformer_list_name, transformer_list): 532 transformer_list_name, transformer_list):
492 """Populate benchmarking tasks from its setup tasks. 533 """Populate benchmarking tasks from its setup tasks.
493 534
494 Args: 535 Args:
495 subresource_discoverer: Name of a subresources discoverer. 536 subresource_discoverer: Name of a subresources discoverer.
496 transformer_list_name: A string describing the transformers, will be used 537 transformer_list_name: A string describing the transformers, will be used
497 in Task names (prefer names without spaces and special characters). 538 in Task names (prefer names without spaces and special characters).
498 transformer_list: An ordered list of function that takes an instance of 539 transformer_list: An ordered list of function that takes an instance of
499 SandwichRunner as parameter, would be applied immediately before 540 SandwichRunner as parameter, would be applied immediately before
500 SandwichRunner.Run() in the given order. 541 SandwichRunner.Run() in the given order.
501 542
502 Here is the full dependency of the added tree for the returned task: 543 Here is the full dependency of the added tree for the returned task:
503 <transformer_list_name>/<subresource_discoverer>-metrics.csv 544 <transformer_list_name>/<subresource_discoverer>-metrics.csv
504 depends on: <transformer_list_name>/<subresource_discoverer>-run/ 545 depends on: <transformer_list_name>/<subresource_discoverer>-run/
505 depends on: common/<subresource_discoverer>-cache.zip 546 depends on: common/<subresource_discoverer>-cache.zip
506 depends on: some tasks saved by PopulateCommonPipelines()
507 depends on: common/<subresource_discoverer>-setup.json 547 depends on: common/<subresource_discoverer>-setup.json
508 depends on: some tasks saved by PopulateCommonPipelines() 548 depends on: common/patched-cache-validation.json
509 """ 549 """
510 additional_column_names = [ 550 additional_column_names = [
511 'url', 551 'url',
512 'repeat_id', 552 'repeat_id',
513 'subresource_discoverer', 553 'subresource_discoverer',
514 'subresource_count', 554 'cache_recording.subresource_count',
515 # The amount of subresources detected at SetupBenchmark step. 555 'cache_recording.cached_subresource_count_theoretic',
516 'subresource_count_theoretic', 556 'cache_recording.cached_subresource_count',
517 # Amount of subresources for caching as suggested by the subresource 557 'benchmark.subresource_count',
518 # discoverer. 558 'benchmark.served_from_cache_count_theoretic',
519 'cached_subresource_count_theoretic', 559 'benchmark.served_from_cache_count']
520 'cached_subresource_count']
521 560
522 assert subresource_discoverer in SUBRESOURCE_DISCOVERERS 561 assert subresource_discoverer in SUBRESOURCE_DISCOVERERS
523 assert 'common' not in SUBRESOURCE_DISCOVERERS 562 assert 'common' not in SUBRESOURCE_DISCOVERERS
524 shared_task_prefix = os.path.join('common', subresource_discoverer) 563 shared_task_prefix = os.path.join('common', subresource_discoverer)
525 task_prefix = os.path.join(transformer_list_name, subresource_discoverer) 564 task_prefix = os.path.join(transformer_list_name, subresource_discoverer)
526 565
527 @self.RegisterTask(shared_task_prefix + '-setup.json', merge=True, 566 @self.RegisterTask(shared_task_prefix + '-setup.json', merge=True,
528 dependencies=[self._subresources_for_urls_task]) 567 dependencies=[self._cache_validation_task])
529 def SetupBenchmark(): 568 def SetupBenchmark():
530 whitelisted_urls = _ExtractDiscoverableUrls( 569 whitelisted_urls = _ExtractDiscoverableUrls(
531 self._trace_from_grabbing_reference_cache, subresource_discoverer) 570 self._trace_from_grabbing_reference_cache, subresource_discoverer)
532 571
533 url_resources = json.load(open(self._subresources_for_urls_task.path))
534 common_util.EnsureParentDirectoryExists(SetupBenchmark.path) 572 common_util.EnsureParentDirectoryExists(SetupBenchmark.path)
535 with open(SetupBenchmark.path, 'w') as output: 573 with open(SetupBenchmark.path, 'w') as output:
536 json.dump({ 574 json.dump({
537 'cache_whitelist': [url for url in whitelisted_urls], 575 'cache_whitelist': [url for url in whitelisted_urls],
538 'subresource_discoverer': subresource_discoverer, 576 'subresource_discoverer': subresource_discoverer,
539 'url_resources': url_resources,
540 }, output) 577 }, output)
541 578
542 @self.RegisterTask(shared_task_prefix + '-cache.zip', merge=True, 579 @self.RegisterTask(shared_task_prefix + '-cache.zip', merge=True,
543 dependencies=[ 580 dependencies=[SetupBenchmark])
544 SetupBenchmark, self._reference_cache_task])
545 def BuildBenchmarkCacheArchive(): 581 def BuildBenchmarkCacheArchive():
546 setup = json.load(open(SetupBenchmark.path)) 582 benchmark_setup = json.load(open(SetupBenchmark.path))
547 chrome_cache.ApplyUrlWhitelistToCacheArchive( 583 chrome_cache.ApplyUrlWhitelistToCacheArchive(
548 cache_archive_path=self._reference_cache_task.path, 584 cache_archive_path=self._cache_path,
549 whitelisted_urls=setup['cache_whitelist'], 585 whitelisted_urls=benchmark_setup['cache_whitelist'],
550 output_cache_archive_path=BuildBenchmarkCacheArchive.path) 586 output_cache_archive_path=BuildBenchmarkCacheArchive.path)
551 587
552 @self.RegisterTask(task_prefix + '-run/', 588 @self.RegisterTask(task_prefix + '-run/',
553 dependencies=[BuildBenchmarkCacheArchive]) 589 dependencies=[BuildBenchmarkCacheArchive])
554 def RunBenchmark(): 590 def RunBenchmark():
555 runner = self._common_builder.CreateSandwichRunner() 591 runner = self._common_builder.CreateSandwichRunner()
556 for transformer in transformer_list: 592 for transformer in transformer_list:
557 transformer(runner) 593 transformer(runner)
558 runner.wpr_archive_path = self._patched_wpr_task.path 594 runner.wpr_archive_path = self._wpr_archive_path
559 runner.wpr_out_log_path = os.path.join( 595 runner.wpr_out_log_path = os.path.join(
560 RunBenchmark.path, sandwich_runner.WPR_LOG_FILENAME) 596 RunBenchmark.path, sandwich_runner.WPR_LOG_FILENAME)
561 runner.cache_archive_path = BuildBenchmarkCacheArchive.path 597 runner.cache_archive_path = BuildBenchmarkCacheArchive.path
562 runner.cache_operation = sandwich_runner.CacheOperation.PUSH 598 runner.cache_operation = sandwich_runner.CacheOperation.PUSH
563 runner.output_dir = RunBenchmark.path 599 runner.output_dir = RunBenchmark.path
564 runner.Run() 600 runner.Run()
565 601
566 @self.RegisterTask(task_prefix + '-metrics.csv', 602 @self.RegisterTask(task_prefix + '-metrics.csv',
567 dependencies=[RunBenchmark]) 603 dependencies=[RunBenchmark])
568 def ExtractMetrics(): 604 def ProcessRunOutputDir():
569 # TODO(gabadie): Performance improvement: load each trace only once and 605 benchmark_setup = json.load(open(SetupBenchmark.path))
570 # use it for validation and extraction of metrics later. 606 cache_validation_result = json.load(
571 _VerifyBenchmarkOutputDirectory(SetupBenchmark.path, RunBenchmark.path) 607 open(self._cache_validation_task.path))
572 608
573 benchmark_setup = json.load(open(SetupBenchmark.path)) 609 run_metrics_list = _ProcessRunOutputDir(
574 run_metrics_list = [] 610 cache_validation_result, benchmark_setup, RunBenchmark.path)
575 for repeat_id, repeat_dir in sandwich_runner.WalkRepeatedRuns( 611 with open(ProcessRunOutputDir.path, 'w') as csv_file:
576 RunBenchmark.path):
577 trace_path = os.path.join(repeat_dir, sandwich_runner.TRACE_FILENAME)
578 logging.info('processing trace: %s', trace_path)
579 trace = loading_trace.LoadingTrace.FromJsonFile(trace_path)
580 run_metrics = {
581 'url': trace.url,
582 'repeat_id': repeat_id,
583 'subresource_discoverer': benchmark_setup['subresource_discoverer'],
584 'subresource_count': len(_ListUrlRequests(
585 trace, _RequestOutcome.All)),
586 'subresource_count_theoretic':
587 len(benchmark_setup['url_resources']),
588 'cached_subresource_count': len(_ListUrlRequests(
589 trace, _RequestOutcome.ServedFromCache)),
590 'cached_subresource_count_theoretic':
591 len(benchmark_setup['cache_whitelist']),
592 }
593 run_metrics.update(
594 sandwich_metrics.ExtractCommonMetricsFromRepeatDirectory(
595 repeat_dir, trace))
596 run_metrics_list.append(run_metrics)
597
598 run_metrics_list.sort(key=lambda e: e['repeat_id'])
599 with open(ExtractMetrics.path, 'w') as csv_file:
600 writer = csv.DictWriter(csv_file, fieldnames=(additional_column_names + 612 writer = csv.DictWriter(csv_file, fieldnames=(additional_column_names +
601 sandwich_metrics.COMMON_CSV_COLUMN_NAMES)) 613 sandwich_metrics.COMMON_CSV_COLUMN_NAMES))
602 writer.writeheader() 614 writer.writeheader()
603 for trace_metrics in run_metrics_list: 615 for trace_metrics in run_metrics_list:
604 writer.writerow(trace_metrics) 616 writer.writerow(trace_metrics)
605 617
606 self._common_builder.default_final_tasks.append(ExtractMetrics) 618 self._common_builder.default_final_tasks.append(ProcessRunOutputDir)
OLDNEW
« no previous file with comments | « tools/android/loading/sandwich_metrics.py ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698