Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(12)

Side by Side Diff: tools/android/loading/sandwich_prefetch.py

Issue 2033093002: sandwich: Merge cache-validation.json and urls-for-resources.json tasks (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Forgot PrefetchBenchmarkBuilder._PopulateCommonPipelines()'s docstring =D Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « tools/android/loading/sandwich_metrics.py ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2016 The Chromium Authors. All rights reserved. 1 # Copyright 2016 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 import csv 5 import csv
pasko 2016/06/03 17:13:46 Let's add a top-level comment like: # Implements a
gabadie 2016/06/06 09:43:15 Done.
6 import logging 6 import logging
7 import json 7 import json
8 import os 8 import os
9 import re 9 import re
10 import shutil 10 import shutil
11 from urlparse import urlparse 11 from urlparse import urlparse
12 12
13 import chrome_cache 13 import chrome_cache
14 import common_util 14 import common_util
15 import loading_trace 15 import loading_trace
(...skipping 175 matching lines...) Expand 10 before | Expand all | Expand 10 after
191 elif subresource_discoverer == PARSER_DISCOVERER: 191 elif subresource_discoverer == PARSER_DISCOVERER:
192 discovered_requests = PrefetchSimulationView.ParserDiscoverableRequests( 192 discovered_requests = PrefetchSimulationView.ParserDiscoverableRequests(
193 first_resource_request, dependencies_lens) 193 first_resource_request, dependencies_lens)
194 elif subresource_discoverer == HTML_PRELOAD_SCANNER_DISCOVERER: 194 elif subresource_discoverer == HTML_PRELOAD_SCANNER_DISCOVERER:
195 discovered_requests = PrefetchSimulationView.PreloadedRequests( 195 discovered_requests = PrefetchSimulationView.PreloadedRequests(
196 first_resource_request, dependencies_lens, trace) 196 first_resource_request, dependencies_lens, trace)
197 else: 197 else:
198 assert False 198 assert False
199 199
200 whitelisted_urls = set() 200 whitelisted_urls = set()
201 logging.info('white-listing %s' % first_resource_request.url)
202 for request in _FilterOutDataAndIncompleteRequests(discovered_requests): 201 for request in _FilterOutDataAndIncompleteRequests(discovered_requests):
203 logging.info('white-listing %s' % request.url) 202 logging.debug('white-listing %s', request.url)
204 whitelisted_urls.add(request.url) 203 whitelisted_urls.add(request.url)
204 logging.info('number of white-listed resources: %d', len(whitelisted_urls))
205 return whitelisted_urls 205 return whitelisted_urls
206 206
207 207
208 def _PrintUrlSetComparison(ref_url_set, url_set, url_set_name): 208 def _PrintUrlSetComparison(ref_url_set, url_set, url_set_name):
209 """Compare URL sets and log the diffs. 209 """Compare URL sets and log the diffs.
210 210
211 Args: 211 Args:
212 ref_url_set: Set of reference urls. 212 ref_url_set: Set of reference urls.
213 url_set: Set of urls to compare to the reference. 213 url_set: Set of urls to compare to the reference.
214 url_set_name: The set name for logging purposes. 214 url_set_name: The set name for logging purposes.
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after
254 request_event.method.upper().strip() == 'POST'): 254 request_event.method.upper().strip() == 'POST'):
255 urls.add(request_event.url) 255 urls.add(request_event.url)
256 elif (request_kind == _RequestOutcome.NotServedFromCache and 256 elif (request_kind == _RequestOutcome.NotServedFromCache and
257 not request_event.from_disk_cache): 257 not request_event.from_disk_cache):
258 urls.add(request_event.url) 258 urls.add(request_event.url)
259 elif request_kind == _RequestOutcome.All: 259 elif request_kind == _RequestOutcome.All:
260 urls.add(request_event.url) 260 urls.add(request_event.url)
261 return urls 261 return urls
262 262
263 263
264 def _VerifyBenchmarkOutputDirectory(benchmark_setup_path, 264 class _RunOutputVerifier(object):
265 benchmark_output_directory_path): 265 """Object to verify benchmark run from traces and WPR log stored in the
266 """Verifies that all run inside the run_output_directory worked as expected. 266 runner output directory.
267 """
267 268
268 Args: 269 def __init__(self, cache_validation_result, benchmark_setup):
269 benchmark_setup_path: Path of the JSON of the benchmark setup. 270 """Constructor.
270 benchmark_output_directory_path: Path of the benchmark output directory to
271 verify.
272 """
273 # TODO(gabadie): What's the best way of propagating errors happening in here?
274 benchmark_setup = json.load(open(benchmark_setup_path))
275 cache_whitelist = set(benchmark_setup['cache_whitelist'])
276 original_requests = set(benchmark_setup['url_resources'])
277 original_cached_requests = original_requests.intersection(cache_whitelist)
278 original_uncached_requests = original_requests.difference(cache_whitelist)
279 all_sent_url_requests = set()
280 271
281 # Verify requests from traces. 272 Args:
282 run_id = -1 273 cache_validation_result: JSON of the cache validation task.
283 while True: 274 benchmark_setup: JSON of the benchmark setup.
284 run_id += 1 275 """
285 run_path = os.path.join(benchmark_output_directory_path, str(run_id)) 276 self._cache_whitelist = set(benchmark_setup['cache_whitelist'])
286 if not os.path.isdir(run_path): 277 self._original_requests = set(cache_validation_result['effective_requests'])
287 break 278 self._original_post_requests = set(
288 trace_path = os.path.join(run_path, sandwich_runner.TRACE_FILENAME) 279 cache_validation_result['effective_post_requests'])
289 if not os.path.isfile(trace_path): 280 self._original_cached_requests = self._original_requests.intersection(
290 logging.error('missing trace %s' % trace_path) 281 self._cache_whitelist)
291 continue 282 self._original_uncached_requests = self._original_requests.difference(
292 trace = loading_trace.LoadingTrace.FromJsonFile(trace_path) 283 self._cache_whitelist)
293 logging.info('verifying %s from %s' % (trace.url, trace_path)) 284 self._all_sent_url_requests = set()
294 285
286 def VerifyTrace(self, trace):
287 """Verifies a trace with the cache validation result and the benchmark
288 setup.
289 """
295 effective_requests = _ListUrlRequests(trace, _RequestOutcome.All) 290 effective_requests = _ListUrlRequests(trace, _RequestOutcome.All)
296 effective_post_requests = _ListUrlRequests(trace, _RequestOutcome.Post) 291 effective_post_requests = _ListUrlRequests(trace, _RequestOutcome.Post)
297 effective_cached_requests = \ 292 effective_cached_requests = \
298 _ListUrlRequests(trace, _RequestOutcome.ServedFromCache) 293 _ListUrlRequests(trace, _RequestOutcome.ServedFromCache)
299 effective_uncached_requests = \ 294 effective_uncached_requests = \
300 _ListUrlRequests(trace, _RequestOutcome.NotServedFromCache) 295 _ListUrlRequests(trace, _RequestOutcome.NotServedFromCache)
301 296
302 missing_requests = original_requests.difference(effective_requests) 297 missing_requests = self._original_requests.difference(effective_requests)
303 unexpected_requests = effective_requests.difference(original_requests) 298 unexpected_requests = effective_requests.difference(self._original_requests)
304 expected_cached_requests = \ 299 expected_cached_requests = \
305 original_cached_requests.difference(missing_requests) 300 self._original_cached_requests.difference(missing_requests)
306 missing_cached_requests = \ 301 expected_uncached_requests = self._original_uncached_requests.union(
307 expected_cached_requests.difference(effective_cached_requests) 302 unexpected_requests).difference(missing_requests)
308 expected_uncached_requests = original_uncached_requests.union(
309 unexpected_requests).union(missing_cached_requests)
310 all_sent_url_requests.update(effective_uncached_requests)
311 303
312 # POST requests are known to be unable to use the cache. 304 # POST requests are known to be unable to use the cache.
313 expected_cached_requests.difference_update(effective_post_requests) 305 expected_cached_requests.difference_update(effective_post_requests)
314 expected_uncached_requests.update(effective_post_requests) 306 expected_uncached_requests.update(effective_post_requests)
315 307
316 _PrintUrlSetComparison(original_requests, effective_requests, 308 _PrintUrlSetComparison(self._original_requests, effective_requests,
317 'All resources') 309 'All resources')
318 _PrintUrlSetComparison(set(), effective_post_requests, 310 _PrintUrlSetComparison(set(), effective_post_requests, 'POST resources')
319 'POST resources')
320 _PrintUrlSetComparison(expected_cached_requests, effective_cached_requests, 311 _PrintUrlSetComparison(expected_cached_requests, effective_cached_requests,
321 'Cached resources') 312 'Cached resources')
322 _PrintUrlSetComparison(expected_uncached_requests, 313 _PrintUrlSetComparison(expected_uncached_requests,
323 effective_uncached_requests, 'Non cached resources') 314 effective_uncached_requests, 'Non cached resources')
324 315
325 # Verify requests from WPR. 316 self._all_sent_url_requests.update(effective_uncached_requests)
326 wpr_log_path = os.path.join(
327 benchmark_output_directory_path, sandwich_runner.WPR_LOG_FILENAME)
328 logging.info('verifying requests from %s' % wpr_log_path)
329 all_wpr_requests = wpr_backend.ExtractRequestsFromLog(wpr_log_path)
330 all_wpr_urls = set()
331 unserved_wpr_urls = set()
332 wpr_command_colliding_urls = set()
333 317
334 for request in all_wpr_requests: 318 def VerifyWprLog(self, wpr_log_path):
335 if request.is_wpr_host: 319 """Verifies WPR log with previously verified traces."""
336 continue 320 all_wpr_requests = wpr_backend.ExtractRequestsFromLog(wpr_log_path)
337 if urlparse(request.url).path.startswith('/web-page-replay'): 321 all_wpr_urls = set()
338 wpr_command_colliding_urls.add(request.url) 322 unserved_wpr_urls = set()
339 elif request.is_served is False: 323 wpr_command_colliding_urls = set()
340 unserved_wpr_urls.add(request.url)
341 all_wpr_urls.add(request.url)
342 324
343 _PrintUrlSetComparison(set(), unserved_wpr_urls, 325 for request in all_wpr_requests:
344 'Distinct unserved resources from WPR') 326 if request.is_wpr_host:
345 _PrintUrlSetComparison(set(), wpr_command_colliding_urls, 327 continue
346 'Distinct resources colliding to WPR commands') 328 if urlparse(request.url).path.startswith('/web-page-replay'):
347 _PrintUrlSetComparison(all_wpr_urls, all_sent_url_requests, 329 wpr_command_colliding_urls.add(request.url)
348 'Distinct resource requests to WPR') 330 elif request.is_served is False:
331 unserved_wpr_urls.add(request.url)
332 all_wpr_urls.add(request.url)
349 333
350 334 _PrintUrlSetComparison(set(), unserved_wpr_urls,
351 def _ReadSubresourceFromRunnerOutputDir(runner_output_dir): 335 'Distinct unserved resources from WPR')
352 """Extracts a list of subresources in runner output directory. 336 _PrintUrlSetComparison(set(), wpr_command_colliding_urls,
353 337 'Distinct resources colliding to WPR commands')
354 Args: 338 _PrintUrlSetComparison(all_wpr_urls, self._all_sent_url_requests,
355 runner_output_dir: Path of the runner's output directory. 339 'Distinct resource requests to WPR')
356
357 Returns:
358 [URLs of sub-resources]
359 """
360 trace_path = os.path.join(
361 runner_output_dir, '0', sandwich_runner.TRACE_FILENAME)
362 trace = loading_trace.LoadingTrace.FromJsonFile(trace_path)
363 url_set = set()
364 for request_event in _FilterOutDataAndIncompleteRequests(
365 trace.request_track.GetEvents()):
366 url_set.add(request_event.url)
367 logging.info('lists %s resources of %s from %s' % \
368 (len(url_set), trace.url, trace_path))
369 return [url for url in url_set]
370 340
371 341
372 def _ValidateCacheArchiveContent(cache_build_trace_path, cache_archive_path): 342 def _ValidateCacheArchiveContent(cache_build_trace_path, cache_archive_path):
373 """Validates a cache archive content. 343 """Validates a cache archive content.
374 344
375 Args: 345 Args:
376 cache_build_trace_path: Path of the generated trace at the cache build time. 346 cache_build_trace_path: Path of the generated trace at the cache build time.
377 cache_archive_path: Cache archive's path to validate. 347 cache_archive_path: Cache archive's path to validate.
348
349 Returns:
350 {
351 'effective_requests': [URLs of all requests],
352 'effective_post_requests': [URLs of POST requests],
353 'expected_cached_resources': [URLs of resources expected to be cached]
pasko 2016/06/03 17:13:46 nit: comma at the end
gabadie 2016/06/06 09:43:15 Done.
354 'successfully_cached': [URLs of cached sub-resources]
355 }
378 """ 356 """
379 # TODO(gabadie): What's the best way of propagating errors happening in here? 357 # TODO(gabadie): What's the best way of propagating errors happening in here?
380 logging.info('lists cached urls from %s' % cache_archive_path) 358 logging.info('lists cached urls from %s' % cache_archive_path)
381 with common_util.TemporaryDirectory() as cache_directory: 359 with common_util.TemporaryDirectory() as cache_directory:
382 chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_directory) 360 chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_directory)
383 cache_keys = set( 361 cache_keys = set(
384 chrome_cache.CacheBackend(cache_directory, 'simple').ListKeys()) 362 chrome_cache.CacheBackend(cache_directory, 'simple').ListKeys())
385 trace = loading_trace.LoadingTrace.FromJsonFile(cache_build_trace_path) 363 trace = loading_trace.LoadingTrace.FromJsonFile(cache_build_trace_path)
386 effective_requests = _ListUrlRequests(trace, _RequestOutcome.All) 364 effective_requests = _ListUrlRequests(trace, _RequestOutcome.All)
387 effective_post_requests = _ListUrlRequests(trace, _RequestOutcome.Post) 365 effective_post_requests = _ListUrlRequests(trace, _RequestOutcome.Post)
(...skipping 10 matching lines...) Expand all
398 expected_cached_requests = effective_requests.difference( 376 expected_cached_requests = effective_requests.difference(
399 effective_post_requests) 377 effective_post_requests)
400 effective_cache_keys = cache_keys.difference( 378 effective_cache_keys = cache_keys.difference(
401 upload_data_stream_cache_entry_keys) 379 upload_data_stream_cache_entry_keys)
402 380
403 _PrintUrlSetComparison(effective_post_requests, upload_data_stream_requests, 381 _PrintUrlSetComparison(effective_post_requests, upload_data_stream_requests,
404 'POST resources') 382 'POST resources')
405 _PrintUrlSetComparison(expected_cached_requests, effective_cache_keys, 383 _PrintUrlSetComparison(expected_cached_requests, effective_cache_keys,
406 'Cached resources') 384 'Cached resources')
407 385
386 return {
387 'effective_requests': [url for url in effective_requests],
388 'effective_post_requests': [url for url in effective_post_requests],
389 'expected_cached_resources': [url for url in expected_cached_requests],
390 'successfully_cached_resources': [url for url in effective_cache_keys]
391 }
392
393
394 def _ProcessRunOutputDir(
395 cache_validation_result, benchmark_setup, runner_output_dir):
396 """Process benchmark's run output directory.
397
398 Args:
399 cache_validation_result: Same as for _RunOutputVerifier
400 benchmark_setup: Same as for _RunOutputVerifier
401 runner_output_dir: Same as for SandwichRunner.output_dir
402
403 Returns:
404 List of dictionary.
405 """
406 run_metrics_list = []
407 run_output_verifier = _RunOutputVerifier(
408 cache_validation_result, benchmark_setup)
409 for repeat_id, repeat_dir in sandwich_runner.WalkRepeatedRuns(
410 runner_output_dir):
411 trace_path = os.path.join(repeat_dir, sandwich_runner.TRACE_FILENAME)
412
413 logging.info('loading trace: %s', trace_path)
414 trace = loading_trace.LoadingTrace.FromJsonFile(trace_path)
415
416 logging.info('verifying trace: %s', trace_path)
417 run_output_verifier.VerifyTrace(trace)
418
419 logging.info('extracting metrics from trace: %s', trace_path)
420 run_metrics = {
421 'url': trace.url,
422 'repeat_id': repeat_id,
423 'subresource_discoverer': benchmark_setup['subresource_discoverer'],
424 'cache_recording.subresource_count':
425 len(cache_validation_result['effective_requests']),
426 'cache_recording.cached_subresource_count_theoretic':
427 len(cache_validation_result['successfully_cached_resources']),
428 'cache_recording.cached_subresource_count':
429 len(cache_validation_result['expected_cached_resources']),
430 'benchmark_repeat.subresource_count': len(_ListUrlRequests(
431 trace, _RequestOutcome.All)),
432 'benchmark_repeat.served_from_cache_count_theoretic':
433 len(benchmark_setup['cache_whitelist']),
434 'benchmark_repeat.served_from_cache_count': len(_ListUrlRequests(
435 trace, _RequestOutcome.ServedFromCache)),
436 }
437 run_metrics.update(
438 sandwich_metrics.ExtractCommonMetricsFromRepeatDirectory(
439 repeat_dir, trace))
440 run_metrics_list.append(run_metrics)
441 run_metrics_list.sort(key=lambda e: e['repeat_id'])
442
443 wpr_log_path = os.path.join(
444 runner_output_dir, sandwich_runner.WPR_LOG_FILENAME)
445 logging.info('verifying wpr log: %s', wpr_log_path)
446 run_output_verifier.VerifyWprLog(wpr_log_path)
447 return run_metrics_list
448
408 449
409 class PrefetchBenchmarkBuilder(task_manager.Builder): 450 class PrefetchBenchmarkBuilder(task_manager.Builder):
410 """A builder for a graph of tasks for NoState-Prefetch emulated benchmarks.""" 451 """A builder for a graph of tasks for NoState-Prefetch emulated benchmarks."""
411 452
412 def __init__(self, common_builder): 453 def __init__(self, common_builder):
413 task_manager.Builder.__init__(self, 454 task_manager.Builder.__init__(self,
414 common_builder.output_directory, 455 common_builder.output_directory,
415 common_builder.output_subdirectory) 456 common_builder.output_subdirectory)
416 self._common_builder = common_builder 457 self._common_builder = common_builder
417 458
418 self._patched_wpr_task = None 459 self._patched_wpr_task = None
419 self._reference_cache_task = None 460 self._cache_task = None
420 self._trace_from_grabbing_reference_cache = None 461 self._trace_from_grabbing_reference_cache = None
421 self._subresources_for_urls_task = None 462 self._cache_validation_task = None
pasko 2016/06/03 17:13:46 I would prefer to just keep a path here than the w
gabadie 2016/06/06 09:43:14 Done.
422 self._PopulateCommonPipelines() 463 self._PopulateCommonPipelines()
423 464
424 def _PopulateCommonPipelines(self): 465 def _PopulateCommonPipelines(self):
425 """Creates necessary tasks to produce initial cache archive. 466 """Creates necessary tasks to produce initial cache archive.
426 467
427 Also creates a task for producing a json file with a mapping of URLs to 468 Also creates a task for producing a json file with a mapping of URLs to
428 subresources (urls-resources.json). 469 subresources (urls-resources.json).
429 470
430 Here is the full dependency tree for the returned task: 471 Here is the full dependency tree for the returned task:
431 common/patched-cache-validation.log 472 common/patched-cache-validation.json
pasko 2016/06/03 17:13:46 probably should rename later to something like pat
gabadie 2016/06/06 09:43:14 Ok will make it in a separate CL.
432 depends on: common/patched-cache.zip 473 depends on: common/patched-cache.zip
433 depends on: common/original-cache.zip 474 depends on: common/original-cache.zip
434 depends on: common/webpages-patched.wpr 475 depends on: common/webpages-patched.wpr
435 depends on: common/webpages.wpr 476 depends on: common/webpages.wpr
436 depends on: common/urls-resources.json
437 depends on: common/original-cache.zip
438 """ 477 """
439 @self.RegisterTask('common/webpages-patched.wpr', 478 @self.RegisterTask('common/webpages-patched.wpr',
440 dependencies=[self._common_builder.original_wpr_task]) 479 dependencies=[self._common_builder.original_wpr_task])
441 def BuildPatchedWpr(): 480 def BuildPatchedWpr():
442 common_util.EnsureParentDirectoryExists(BuildPatchedWpr.path) 481 common_util.EnsureParentDirectoryExists(BuildPatchedWpr.path)
443 shutil.copyfile( 482 shutil.copyfile(
444 self._common_builder.original_wpr_task.path, BuildPatchedWpr.path) 483 self._common_builder.original_wpr_task.path, BuildPatchedWpr.path)
445 _PatchWpr(BuildPatchedWpr.path) 484 _PatchWpr(BuildPatchedWpr.path)
446 485
447 @self.RegisterTask('common/original-cache.zip', [BuildPatchedWpr]) 486 @self.RegisterTask('common/original-cache.zip', [BuildPatchedWpr])
448 def BuildOriginalCache(): 487 def BuildOriginalCache():
449 runner = self._common_builder.CreateSandwichRunner() 488 runner = self._common_builder.CreateSandwichRunner()
450 runner.wpr_archive_path = BuildPatchedWpr.path 489 runner.wpr_archive_path = BuildPatchedWpr.path
451 runner.cache_archive_path = BuildOriginalCache.path 490 runner.cache_archive_path = BuildOriginalCache.path
452 runner.cache_operation = sandwich_runner.CacheOperation.SAVE 491 runner.cache_operation = sandwich_runner.CacheOperation.SAVE
453 runner.output_dir = BuildOriginalCache.run_path 492 runner.output_dir = BuildOriginalCache.run_path
454 runner.Run() 493 runner.Run()
455 BuildOriginalCache.run_path = BuildOriginalCache.path[:-4] + '-run' 494 BuildOriginalCache.run_path = BuildOriginalCache.path[:-4] + '-run'
456 original_cache_trace_path = os.path.join( 495 original_cache_trace_path = os.path.join(
457 BuildOriginalCache.run_path, '0', sandwich_runner.TRACE_FILENAME) 496 BuildOriginalCache.run_path, '0', sandwich_runner.TRACE_FILENAME)
458 497
459 @self.RegisterTask('common/patched-cache.zip', [BuildOriginalCache]) 498 @self.RegisterTask('common/patched-cache.zip', [BuildOriginalCache])
460 def BuildPatchedCache(): 499 def BuildPatchedCache():
461 _PatchCacheArchive(BuildOriginalCache.path, 500 _PatchCacheArchive(BuildOriginalCache.path,
462 original_cache_trace_path, BuildPatchedCache.path) 501 original_cache_trace_path, BuildPatchedCache.path)
463 502
464 @self.RegisterTask('common/subresources-for-urls.json', 503 @self.RegisterTask('common/patched-cache-validation.json',
465 [BuildOriginalCache])
466 def ListUrlsResources():
467 url_resources = _ReadSubresourceFromRunnerOutputDir(
468 BuildOriginalCache.run_path)
469 with open(ListUrlsResources.path, 'w') as output:
470 json.dump(url_resources, output)
471
472 @self.RegisterTask('common/patched-cache-validation.log',
473 [BuildPatchedCache]) 504 [BuildPatchedCache])
474 def ValidatePatchedCache(): 505 def ValidatePatchedCache():
475 handler = logging.FileHandler(ValidatePatchedCache.path) 506 cache_validation_result = _ValidateCacheArchiveContent(
476 logging.getLogger().addHandler(handler) 507 original_cache_trace_path, BuildPatchedCache.path)
477 try: 508 with open(ValidatePatchedCache.path, 'w') as output:
478 _ValidateCacheArchiveContent( 509 json.dump(cache_validation_result, output)
479 original_cache_trace_path, BuildPatchedCache.path)
480 finally:
481 logging.getLogger().removeHandler(handler)
482 510
483 self._patched_wpr_task = BuildPatchedWpr 511 self._patched_wpr_task = BuildPatchedWpr
484 self._trace_from_grabbing_reference_cache = original_cache_trace_path 512 self._trace_from_grabbing_reference_cache = original_cache_trace_path
485 self._reference_cache_task = BuildPatchedCache 513 self._cache_task = BuildPatchedCache
486 self._subresources_for_urls_task = ListUrlsResources 514 self._cache_validation_task = ValidatePatchedCache
487 515
488 self._common_builder.default_final_tasks.append(ValidatePatchedCache) 516 self._common_builder.default_final_tasks.append(ValidatePatchedCache)
489 517
490 def PopulateLoadBenchmark(self, subresource_discoverer, 518 def PopulateLoadBenchmark(self, subresource_discoverer,
491 transformer_list_name, transformer_list): 519 transformer_list_name, transformer_list):
492 """Populate benchmarking tasks from its setup tasks. 520 """Populate benchmarking tasks from its setup tasks.
493 521
494 Args: 522 Args:
495 subresource_discoverer: Name of a subresources discoverer. 523 subresource_discoverer: Name of a subresources discoverer.
496 transformer_list_name: A string describing the transformers, will be used 524 transformer_list_name: A string describing the transformers, will be used
497 in Task names (prefer names without spaces and special characters). 525 in Task names (prefer names without spaces and special characters).
498 transformer_list: An ordered list of function that takes an instance of 526 transformer_list: An ordered list of function that takes an instance of
499 SandwichRunner as parameter, would be applied immediately before 527 SandwichRunner as parameter, would be applied immediately before
500 SandwichRunner.Run() in the given order. 528 SandwichRunner.Run() in the given order.
501 529
502 Here is the full dependency of the added tree for the returned task: 530 Here is the full dependency of the added tree for the returned task:
503 <transformer_list_name>/<subresource_discoverer>-metrics.csv 531 <transformer_list_name>/<subresource_discoverer>-metrics.csv
504 depends on: <transformer_list_name>/<subresource_discoverer>-run/ 532 depends on: <transformer_list_name>/<subresource_discoverer>-run/
505 depends on: common/<subresource_discoverer>-cache.zip 533 depends on: common/<subresource_discoverer>-cache.zip
506 depends on: some tasks saved by PopulateCommonPipelines() 534 depends on: some tasks saved by PopulateCommonPipelines()
pasko 2016/06/03 17:13:46 only depends on -setup.json, right?
gabadie 2016/06/06 09:43:14 Done.
507 depends on: common/<subresource_discoverer>-setup.json 535 depends on: common/<subresource_discoverer>-setup.json
508 depends on: some tasks saved by PopulateCommonPipelines() 536 depends on: some tasks saved by PopulateCommonPipelines()
pasko 2016/06/03 17:13:46 some? should it just say common/patched-cache-vali
gabadie 2016/06/06 09:43:14 Done.
509 """ 537 """
510 additional_column_names = [ 538 additional_column_names = [
511 'url', 539 'url',
512 'repeat_id', 540 'repeat_id',
513 'subresource_discoverer', 541 'subresource_discoverer',
514 'subresource_count', 542 'cache_recording.subresource_count',
515 # The amount of subresources detected at SetupBenchmark step. 543 'cache_recording.cached_subresource_count_theoretic',
516 'subresource_count_theoretic', 544 'cache_recording.cached_subresource_count',
517 # Amount of subresources for caching as suggested by the subresource 545 'benchmark_repeat.subresource_count',
pasko 2016/06/03 17:13:46 'benchmark_repeat' sounds more like a command than
gabadie 2016/06/06 09:43:15 Done.
518 # discoverer. 546 'benchmark_repeat.served_from_cache_count_theoretic',
519 'cached_subresource_count_theoretic', 547 'benchmark_repeat.served_from_cache_count']
520 'cached_subresource_count']
521 548
522 assert subresource_discoverer in SUBRESOURCE_DISCOVERERS 549 assert subresource_discoverer in SUBRESOURCE_DISCOVERERS
523 assert 'common' not in SUBRESOURCE_DISCOVERERS 550 assert 'common' not in SUBRESOURCE_DISCOVERERS
524 shared_task_prefix = os.path.join('common', subresource_discoverer) 551 shared_task_prefix = os.path.join('common', subresource_discoverer)
525 task_prefix = os.path.join(transformer_list_name, subresource_discoverer) 552 task_prefix = os.path.join(transformer_list_name, subresource_discoverer)
526 553
527 @self.RegisterTask(shared_task_prefix + '-setup.json', merge=True, 554 @self.RegisterTask(shared_task_prefix + '-setup.json', merge=True,
528 dependencies=[self._subresources_for_urls_task]) 555 dependencies=[self._cache_validation_task])
529 def SetupBenchmark(): 556 def SetupBenchmark():
530 whitelisted_urls = _ExtractDiscoverableUrls( 557 whitelisted_urls = _ExtractDiscoverableUrls(
531 self._trace_from_grabbing_reference_cache, subresource_discoverer) 558 self._trace_from_grabbing_reference_cache, subresource_discoverer)
532 559
533 url_resources = json.load(open(self._subresources_for_urls_task.path))
534 common_util.EnsureParentDirectoryExists(SetupBenchmark.path) 560 common_util.EnsureParentDirectoryExists(SetupBenchmark.path)
535 with open(SetupBenchmark.path, 'w') as output: 561 with open(SetupBenchmark.path, 'w') as output:
536 json.dump({ 562 json.dump({
537 'cache_whitelist': [url for url in whitelisted_urls], 563 'cache_whitelist': [url for url in whitelisted_urls],
538 'subresource_discoverer': subresource_discoverer, 564 'subresource_discoverer': subresource_discoverer,
539 'url_resources': url_resources,
540 }, output) 565 }, output)
541 566
542 @self.RegisterTask(shared_task_prefix + '-cache.zip', merge=True, 567 @self.RegisterTask(shared_task_prefix + '-cache.zip', merge=True,
543 dependencies=[ 568 dependencies=[SetupBenchmark])
544 SetupBenchmark, self._reference_cache_task])
545 def BuildBenchmarkCacheArchive(): 569 def BuildBenchmarkCacheArchive():
546 setup = json.load(open(SetupBenchmark.path)) 570 benchmark_setup = json.load(open(SetupBenchmark.path))
547 chrome_cache.ApplyUrlWhitelistToCacheArchive( 571 chrome_cache.ApplyUrlWhitelistToCacheArchive(
548 cache_archive_path=self._reference_cache_task.path, 572 cache_archive_path=self._cache_task.path,
549 whitelisted_urls=setup['cache_whitelist'], 573 whitelisted_urls=benchmark_setup['cache_whitelist'],
550 output_cache_archive_path=BuildBenchmarkCacheArchive.path) 574 output_cache_archive_path=BuildBenchmarkCacheArchive.path)
551 575
552 @self.RegisterTask(task_prefix + '-run/', 576 @self.RegisterTask(task_prefix + '-run/',
553 dependencies=[BuildBenchmarkCacheArchive]) 577 dependencies=[BuildBenchmarkCacheArchive])
554 def RunBenchmark(): 578 def RunBenchmark():
555 runner = self._common_builder.CreateSandwichRunner() 579 runner = self._common_builder.CreateSandwichRunner()
556 for transformer in transformer_list: 580 for transformer in transformer_list:
557 transformer(runner) 581 transformer(runner)
558 runner.wpr_archive_path = self._patched_wpr_task.path 582 runner.wpr_archive_path = self._patched_wpr_task.path
559 runner.wpr_out_log_path = os.path.join( 583 runner.wpr_out_log_path = os.path.join(
560 RunBenchmark.path, sandwich_runner.WPR_LOG_FILENAME) 584 RunBenchmark.path, sandwich_runner.WPR_LOG_FILENAME)
561 runner.cache_archive_path = BuildBenchmarkCacheArchive.path 585 runner.cache_archive_path = BuildBenchmarkCacheArchive.path
562 runner.cache_operation = sandwich_runner.CacheOperation.PUSH 586 runner.cache_operation = sandwich_runner.CacheOperation.PUSH
563 runner.output_dir = RunBenchmark.path 587 runner.output_dir = RunBenchmark.path
564 runner.Run() 588 runner.Run()
565 589
566 @self.RegisterTask(task_prefix + '-metrics.csv', 590 @self.RegisterTask(task_prefix + '-metrics.csv',
567 dependencies=[RunBenchmark]) 591 dependencies=[RunBenchmark])
568 def ExtractMetrics(): 592 def ProcessRunOutputDir():
569 # TODO(gabadie): Performance improvement: load each trace only once and 593 benchmark_setup = json.load(open(SetupBenchmark.path))
570 # use it for validation and extraction of metrics later. 594 cache_validation_result = json.load(
571 _VerifyBenchmarkOutputDirectory(SetupBenchmark.path, RunBenchmark.path) 595 open(self._cache_validation_task.path))
572 596
573 benchmark_setup = json.load(open(SetupBenchmark.path)) 597 run_metrics_list = _ProcessRunOutputDir(
574 run_metrics_list = [] 598 cache_validation_result, benchmark_setup, RunBenchmark.path)
575 for repeat_id, repeat_dir in sandwich_runner.WalkRepeatedRuns( 599 with open(ProcessRunOutputDir.path, 'w') as csv_file:
576 RunBenchmark.path):
577 trace_path = os.path.join(repeat_dir, sandwich_runner.TRACE_FILENAME)
578 logging.info('processing trace: %s', trace_path)
579 trace = loading_trace.LoadingTrace.FromJsonFile(trace_path)
580 run_metrics = {
581 'url': trace.url,
582 'repeat_id': repeat_id,
583 'subresource_discoverer': benchmark_setup['subresource_discoverer'],
584 'subresource_count': len(_ListUrlRequests(
585 trace, _RequestOutcome.All)),
586 'subresource_count_theoretic':
587 len(benchmark_setup['url_resources']),
588 'cached_subresource_count': len(_ListUrlRequests(
589 trace, _RequestOutcome.ServedFromCache)),
590 'cached_subresource_count_theoretic':
591 len(benchmark_setup['cache_whitelist']),
592 }
593 run_metrics.update(
594 sandwich_metrics.ExtractCommonMetricsFromRepeatDirectory(
595 repeat_dir, trace))
596 run_metrics_list.append(run_metrics)
597
598 run_metrics_list.sort(key=lambda e: e['repeat_id'])
599 with open(ExtractMetrics.path, 'w') as csv_file:
600 writer = csv.DictWriter(csv_file, fieldnames=(additional_column_names + 600 writer = csv.DictWriter(csv_file, fieldnames=(additional_column_names +
601 sandwich_metrics.COMMON_CSV_COLUMN_NAMES)) 601 sandwich_metrics.COMMON_CSV_COLUMN_NAMES))
602 writer.writeheader() 602 writer.writeheader()
603 for trace_metrics in run_metrics_list: 603 for trace_metrics in run_metrics_list:
604 writer.writerow(trace_metrics) 604 writer.writerow(trace_metrics)
605 605
606 self._common_builder.default_final_tasks.append(ExtractMetrics) 606 self._common_builder.default_final_tasks.append(ProcessRunOutputDir)
OLDNEW
« no previous file with comments | « tools/android/loading/sandwich_metrics.py ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698