| Index: tools/android/loading/sandwich_prefetch.py
|
| diff --git a/tools/android/loading/sandwich_misc.py b/tools/android/loading/sandwich_prefetch.py
|
| similarity index 58%
|
| rename from tools/android/loading/sandwich_misc.py
|
| rename to tools/android/loading/sandwich_prefetch.py
|
| index 172f4e40b0552eeedd797d36d4f1f72c18a39932..173d0bffbb1d820583bc2f808fe2b453ee634c7f 100644
|
| --- a/tools/android/loading/sandwich_misc.py
|
| +++ b/tools/android/loading/sandwich_prefetch.py
|
| @@ -2,18 +2,22 @@
|
| # Use of this source code is governed by a BSD-style license that can be
|
| # found in the LICENSE file.
|
|
|
| +import csv
|
| import logging
|
| import json
|
| import os
|
| import re
|
| +import shutil
|
| from urlparse import urlparse
|
|
|
| import chrome_cache
|
| import common_util
|
| -from loading_trace import LoadingTrace
|
| +import loading_trace
|
| from prefetch_view import PrefetchSimulationView
|
| from request_dependencies_lens import RequestDependencyLens
|
| +import sandwich_metrics
|
| import sandwich_runner
|
| +import task_manager
|
| import wpr_backend
|
|
|
|
|
| @@ -43,7 +47,7 @@ SUBRESOURCE_DISCOVERERS = set([
|
| _UPLOAD_DATA_STREAM_REQUESTS_REGEX = re.compile(r'^\d+/(?P<url>.*)$')
|
|
|
|
|
| -def PatchWpr(wpr_archive_path):
|
| +def _PatchWpr(wpr_archive_path):
|
| """Patches a WPR archive to get all resources into the HTTP cache and avoid
|
| invalidation and revalidations.
|
|
|
| @@ -104,8 +108,8 @@ def _FilterOutDataAndIncompleteRequests(requests):
|
| yield request
|
|
|
|
|
| -def PatchCacheArchive(cache_archive_path, loading_trace_path,
|
| - cache_archive_dest_path):
|
| +def _PatchCacheArchive(cache_archive_path, loading_trace_path,
|
| + cache_archive_dest_path):
|
| """Patch the cache archive.
|
|
|
| Note: This method update the raw response headers of cache entries' to store
|
| @@ -119,7 +123,7 @@ def PatchCacheArchive(cache_archive_path, loading_trace_path,
|
| archive <cache_archive_path>.
|
| cache_archive_dest_path: Archive destination's path.
|
| """
|
| - trace = LoadingTrace.FromJsonFile(loading_trace_path)
|
| + trace = loading_trace.LoadingTrace.FromJsonFile(loading_trace_path)
|
| with common_util.TemporaryDirectory(prefix='sandwich_tmp') as tmp_path:
|
| cache_path = os.path.join(tmp_path, 'cache')
|
| chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_path)
|
| @@ -154,7 +158,7 @@ def PatchCacheArchive(cache_archive_path, loading_trace_path,
|
| logging.info('Patched cache size: %d bytes' % cache_backend.GetSize())
|
|
|
|
|
| -def ExtractDiscoverableUrls(loading_trace_path, subresource_discoverer):
|
| +def _ExtractDiscoverableUrls(loading_trace_path, subresource_discoverer):
|
| """Extracts discoverable resource urls from a loading trace according to a
|
| sub-resource discoverer.
|
|
|
| @@ -171,7 +175,7 @@ def ExtractDiscoverableUrls(loading_trace_path, subresource_discoverer):
|
|
|
| # Load trace and related infos.
|
| logging.info('loading %s' % loading_trace_path)
|
| - trace = LoadingTrace.FromJsonFile(loading_trace_path)
|
| + trace = loading_trace.LoadingTrace.FromJsonFile(loading_trace_path)
|
| dependencies_lens = RequestDependencyLens(trace)
|
| first_resource_request = trace.request_track.GetFirstResourceRequest()
|
|
|
| @@ -226,16 +230,16 @@ def _PrintUrlSetComparison(ref_url_set, url_set, url_set_name):
|
| logging.error('+ ' + url)
|
|
|
|
|
| -class RequestOutcome:
|
| +class _RequestOutcome:
|
| All, ServedFromCache, NotServedFromCache, Post = range(4)
|
|
|
|
|
| -def ListUrlRequests(trace, request_kind):
|
| +def _ListUrlRequests(trace, request_kind):
|
| """Lists requested URLs from a trace.
|
|
|
| Args:
|
| - trace: (LoadingTrace) loading trace.
|
| - request_kind: RequestOutcome indicating the subset of requests to output.
|
| + trace: (loading_trace.LoadingTrace) loading trace.
|
| + request_kind: _RequestOutcome indicating the subset of requests to output.
|
|
|
| Returns:
|
| set([str])
|
| @@ -243,22 +247,22 @@ def ListUrlRequests(trace, request_kind):
|
| urls = set()
|
| for request_event in _FilterOutDataAndIncompleteRequests(
|
| trace.request_track.GetEvents()):
|
| - if (request_kind == RequestOutcome.ServedFromCache and
|
| + if (request_kind == _RequestOutcome.ServedFromCache and
|
| request_event.from_disk_cache):
|
| urls.add(request_event.url)
|
| - elif (request_kind == RequestOutcome.Post and
|
| + elif (request_kind == _RequestOutcome.Post and
|
| request_event.method.upper().strip() == 'POST'):
|
| urls.add(request_event.url)
|
| - elif (request_kind == RequestOutcome.NotServedFromCache and
|
| + elif (request_kind == _RequestOutcome.NotServedFromCache and
|
| not request_event.from_disk_cache):
|
| urls.add(request_event.url)
|
| - elif request_kind == RequestOutcome.All:
|
| + elif request_kind == _RequestOutcome.All:
|
| urls.add(request_event.url)
|
| return urls
|
|
|
|
|
| -def VerifyBenchmarkOutputDirectory(benchmark_setup_path,
|
| - benchmark_output_directory_path):
|
| +def _VerifyBenchmarkOutputDirectory(benchmark_setup_path,
|
| + benchmark_output_directory_path):
|
| """Verifies that all run inside the run_output_directory worked as expected.
|
|
|
| Args:
|
| @@ -285,15 +289,15 @@ def VerifyBenchmarkOutputDirectory(benchmark_setup_path,
|
| if not os.path.isfile(trace_path):
|
| logging.error('missing trace %s' % trace_path)
|
| continue
|
| - trace = LoadingTrace.FromJsonFile(trace_path)
|
| + trace = loading_trace.LoadingTrace.FromJsonFile(trace_path)
|
| logging.info('verifying %s from %s' % (trace.url, trace_path))
|
|
|
| - effective_requests = ListUrlRequests(trace, RequestOutcome.All)
|
| - effective_post_requests = ListUrlRequests(trace, RequestOutcome.Post)
|
| + effective_requests = _ListUrlRequests(trace, _RequestOutcome.All)
|
| + effective_post_requests = _ListUrlRequests(trace, _RequestOutcome.Post)
|
| effective_cached_requests = \
|
| - ListUrlRequests(trace, RequestOutcome.ServedFromCache)
|
| + _ListUrlRequests(trace, _RequestOutcome.ServedFromCache)
|
| effective_uncached_requests = \
|
| - ListUrlRequests(trace, RequestOutcome.NotServedFromCache)
|
| + _ListUrlRequests(trace, _RequestOutcome.NotServedFromCache)
|
|
|
| missing_requests = original_requests.difference(effective_requests)
|
| unexpected_requests = effective_requests.difference(original_requests)
|
| @@ -344,7 +348,7 @@ def VerifyBenchmarkOutputDirectory(benchmark_setup_path,
|
| 'Distinct resource requests to WPR')
|
|
|
|
|
| -def ReadSubresourceFromRunnerOutputDir(runner_output_dir):
|
| +def _ReadSubresourceFromRunnerOutputDir(runner_output_dir):
|
| """Extracts a list of subresources in runner output directory.
|
|
|
| Args:
|
| @@ -355,7 +359,7 @@ def ReadSubresourceFromRunnerOutputDir(runner_output_dir):
|
| """
|
| trace_path = os.path.join(
|
| runner_output_dir, '0', sandwich_runner.TRACE_FILENAME)
|
| - trace = LoadingTrace.FromJsonFile(trace_path)
|
| + trace = loading_trace.LoadingTrace.FromJsonFile(trace_path)
|
| url_set = set()
|
| for request_event in _FilterOutDataAndIncompleteRequests(
|
| trace.request_track.GetEvents()):
|
| @@ -365,7 +369,7 @@ def ReadSubresourceFromRunnerOutputDir(runner_output_dir):
|
| return [url for url in url_set]
|
|
|
|
|
| -def ValidateCacheArchiveContent(cache_build_trace_path, cache_archive_path):
|
| +def _ValidateCacheArchiveContent(cache_build_trace_path, cache_archive_path):
|
| """Validates a cache archive content.
|
|
|
| Args:
|
| @@ -378,9 +382,9 @@ def ValidateCacheArchiveContent(cache_build_trace_path, cache_archive_path):
|
| chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_directory)
|
| cache_keys = set(
|
| chrome_cache.CacheBackend(cache_directory, 'simple').ListKeys())
|
| - trace = LoadingTrace.FromJsonFile(cache_build_trace_path)
|
| - effective_requests = ListUrlRequests(trace, RequestOutcome.All)
|
| - effective_post_requests = ListUrlRequests(trace, RequestOutcome.Post)
|
| + trace = loading_trace.LoadingTrace.FromJsonFile(cache_build_trace_path)
|
| + effective_requests = _ListUrlRequests(trace, _RequestOutcome.All)
|
| + effective_post_requests = _ListUrlRequests(trace, _RequestOutcome.Post)
|
|
|
| upload_data_stream_cache_entry_keys = set()
|
| upload_data_stream_requests = set()
|
| @@ -400,3 +404,203 @@ def ValidateCacheArchiveContent(cache_build_trace_path, cache_archive_path):
|
| 'POST resources')
|
| _PrintUrlSetComparison(expected_cached_requests, effective_cache_keys,
|
| 'Cached resources')
|
| +
|
| +
|
| +class PrefetchBenchmarkBuilder(task_manager.Builder):
|
| + """A builder for a graph of tasks for NoState-Prefetch emulated benchmarks."""
|
| +
|
| + def __init__(self, common_builder):
|
| + task_manager.Builder.__init__(self,
|
| + common_builder.output_directory,
|
| + common_builder.output_subdirectory)
|
| + self._common_builder = common_builder
|
| +
|
| + self._patched_wpr_task = None
|
| + self._reference_cache_task = None
|
| + self._trace_from_grabbing_reference_cache = None
|
| + self._subresources_for_urls_task = None
|
| + self._PopulateCommonPipelines()
|
| +
|
| + def _PopulateCommonPipelines(self):
|
| + """Creates necessary tasks to produce initial cache archive.
|
| +
|
| + Also creates a task for producing a json file with a mapping of URLs to
|
| + subresources (urls-resources.json).
|
| +
|
| + Here is the full dependency tree for the returned task:
|
| + common/patched-cache-validation.log
|
| + depends on: common/patched-cache.zip
|
| + depends on: common/original-cache.zip
|
| + depends on: common/webpages-patched.wpr
|
| + depends on: common/webpages.wpr
|
| + depends on: common/urls-resources.json
|
| + depends on: common/original-cache.zip
|
| + """
|
| + @self.RegisterTask('common/webpages-patched.wpr',
|
| + dependencies=[self._common_builder.original_wpr_task])
|
| + def BuildPatchedWpr():
|
| + common_util.EnsureParentDirectoryExists(BuildPatchedWpr.path)
|
| + shutil.copyfile(
|
| + self._common_builder.original_wpr_task.path, BuildPatchedWpr.path)
|
| + _PatchWpr(BuildPatchedWpr.path)
|
| +
|
| + @self.RegisterTask('common/original-cache.zip', [BuildPatchedWpr])
|
| + def BuildOriginalCache():
|
| + runner = self._common_builder.CreateSandwichRunner()
|
| + runner.wpr_archive_path = BuildPatchedWpr.path
|
| + runner.cache_archive_path = BuildOriginalCache.path
|
| + runner.cache_operation = sandwich_runner.CacheOperation.SAVE
|
| + runner.output_dir = BuildOriginalCache.run_path
|
| + runner.Run()
|
| + BuildOriginalCache.run_path = BuildOriginalCache.path[:-4] + '-run'
|
| + original_cache_trace_path = os.path.join(
|
| + BuildOriginalCache.run_path, '0', sandwich_runner.TRACE_FILENAME)
|
| +
|
| + @self.RegisterTask('common/patched-cache.zip', [BuildOriginalCache])
|
| + def BuildPatchedCache():
|
| + _PatchCacheArchive(BuildOriginalCache.path,
|
| + original_cache_trace_path, BuildPatchedCache.path)
|
| +
|
| + @self.RegisterTask('common/subresources-for-urls.json',
|
| + [BuildOriginalCache])
|
| + def ListUrlsResources():
|
| + url_resources = _ReadSubresourceFromRunnerOutputDir(
|
| + BuildOriginalCache.run_path)
|
| + with open(ListUrlsResources.path, 'w') as output:
|
| + json.dump(url_resources, output)
|
| +
|
| + @self.RegisterTask('common/patched-cache-validation.log',
|
| + [BuildPatchedCache])
|
| + def ValidatePatchedCache():
|
| + handler = logging.FileHandler(ValidatePatchedCache.path)
|
| + logging.getLogger().addHandler(handler)
|
| + try:
|
| + _ValidateCacheArchiveContent(
|
| + original_cache_trace_path, BuildPatchedCache.path)
|
| + finally:
|
| + logging.getLogger().removeHandler(handler)
|
| +
|
| + self._patched_wpr_task = BuildPatchedWpr
|
| + self._trace_from_grabbing_reference_cache = original_cache_trace_path
|
| + self._reference_cache_task = BuildPatchedCache
|
| + self._subresources_for_urls_task = ListUrlsResources
|
| +
|
| + self._common_builder.default_final_tasks.append(ValidatePatchedCache)
|
| +
|
| + def PopulateLoadBenchmark(self, subresource_discoverer,
|
| + transformer_list_name, transformer_list):
|
| + """Populate benchmarking tasks from its setup tasks.
|
| +
|
| + Args:
|
| + subresource_discoverer: Name of a subresources discoverer.
|
| + transformer_list_name: A string describing the transformers, will be used
|
| + in Task names (prefer names without spaces and special characters).
|
| + transformer_list: An ordered list of function that takes an instance of
|
| + SandwichRunner as parameter, would be applied immediately before
|
| + SandwichRunner.Run() in the given order.
|
| +
|
| + Here is the full dependency of the added tree for the returned task:
|
| + <transformer_list_name>/<subresource_discoverer>-metrics.csv
|
| + depends on: <transformer_list_name>/<subresource_discoverer>-run/
|
| + depends on: common/<subresource_discoverer>-cache.zip
|
| + depends on: some tasks saved by PopulateCommonPipelines()
|
| + depends on: common/<subresource_discoverer>-setup.json
|
| + depends on: some tasks saved by PopulateCommonPipelines()
|
| + """
|
| + additional_column_names = [
|
| + 'url',
|
| + 'repeat_id',
|
| + 'subresource_discoverer',
|
| + 'subresource_count',
|
| + # The amount of subresources detected at SetupBenchmark step.
|
| + 'subresource_count_theoretic',
|
| + # Amount of subresources for caching as suggested by the subresource
|
| + # discoverer.
|
| + 'cached_subresource_count_theoretic',
|
| + 'cached_subresource_count']
|
| +
|
| + assert subresource_discoverer in SUBRESOURCE_DISCOVERERS
|
| + assert 'common' not in SUBRESOURCE_DISCOVERERS
|
| + shared_task_prefix = os.path.join('common', subresource_discoverer)
|
| + task_prefix = os.path.join(transformer_list_name, subresource_discoverer)
|
| +
|
| + @self.RegisterTask(shared_task_prefix + '-setup.json', merge=True,
|
| + dependencies=[self._subresources_for_urls_task])
|
| + def SetupBenchmark():
|
| + whitelisted_urls = _ExtractDiscoverableUrls(
|
| + self._trace_from_grabbing_reference_cache, subresource_discoverer)
|
| +
|
| + url_resources = json.load(open(self._subresources_for_urls_task.path))
|
| + common_util.EnsureParentDirectoryExists(SetupBenchmark.path)
|
| + with open(SetupBenchmark.path, 'w') as output:
|
| + json.dump({
|
| + 'cache_whitelist': [url for url in whitelisted_urls],
|
| + 'subresource_discoverer': subresource_discoverer,
|
| + 'url_resources': url_resources,
|
| + }, output)
|
| +
|
| + @self.RegisterTask(shared_task_prefix + '-cache.zip', merge=True,
|
| + dependencies=[
|
| + SetupBenchmark, self._reference_cache_task])
|
| + def BuildBenchmarkCacheArchive():
|
| + setup = json.load(open(SetupBenchmark.path))
|
| + chrome_cache.ApplyUrlWhitelistToCacheArchive(
|
| + cache_archive_path=self._reference_cache_task.path,
|
| + whitelisted_urls=setup['cache_whitelist'],
|
| + output_cache_archive_path=BuildBenchmarkCacheArchive.path)
|
| +
|
| + @self.RegisterTask(task_prefix + '-run/',
|
| + dependencies=[BuildBenchmarkCacheArchive])
|
| + def RunBenchmark():
|
| + runner = self._common_builder.CreateSandwichRunner()
|
| + for transformer in transformer_list:
|
| + transformer(runner)
|
| + runner.wpr_archive_path = self._patched_wpr_task.path
|
| + runner.wpr_out_log_path = os.path.join(
|
| + RunBenchmark.path, sandwich_runner.WPR_LOG_FILENAME)
|
| + runner.cache_archive_path = BuildBenchmarkCacheArchive.path
|
| + runner.cache_operation = sandwich_runner.CacheOperation.PUSH
|
| + runner.output_dir = RunBenchmark.path
|
| + runner.Run()
|
| +
|
| + @self.RegisterTask(task_prefix + '-metrics.csv',
|
| + dependencies=[RunBenchmark])
|
| + def ExtractMetrics():
|
| + # TODO(gabadie): Performance improvement: load each trace only once and
|
| + # use it for validation and extraction of metrics later.
|
| + _VerifyBenchmarkOutputDirectory(SetupBenchmark.path, RunBenchmark.path)
|
| +
|
| + benchmark_setup = json.load(open(SetupBenchmark.path))
|
| + run_metrics_list = []
|
| + for repeat_id, repeat_dir in sandwich_runner.WalkRepeatedRuns(
|
| + RunBenchmark.path):
|
| + trace_path = os.path.join(repeat_dir, sandwich_runner.TRACE_FILENAME)
|
| + logging.info('processing trace: %s', trace_path)
|
| + trace = loading_trace.LoadingTrace.FromJsonFile(trace_path)
|
| + run_metrics = {
|
| + 'url': trace.url,
|
| + 'repeat_id': repeat_id,
|
| + 'subresource_discoverer': benchmark_setup['subresource_discoverer'],
|
| + 'subresource_count': len(_ListUrlRequests(
|
| + trace, _RequestOutcome.All)),
|
| + 'subresource_count_theoretic':
|
| + len(benchmark_setup['url_resources']),
|
| + 'cached_subresource_count': len(_ListUrlRequests(
|
| + trace, _RequestOutcome.ServedFromCache)),
|
| + 'cached_subresource_count_theoretic':
|
| + len(benchmark_setup['cache_whitelist']),
|
| + }
|
| + run_metrics.update(
|
| + sandwich_metrics.ExtractCommonMetricsFromRepeatDirectory(
|
| + repeat_dir, trace))
|
| + run_metrics_list.append(run_metrics)
|
| +
|
| + run_metrics_list.sort(key=lambda e: e['repeat_id'])
|
| + with open(ExtractMetrics.path, 'w') as csv_file:
|
| + writer = csv.DictWriter(csv_file, fieldnames=(additional_column_names +
|
| + sandwich_metrics.COMMON_CSV_COLUMN_NAMES))
|
| + writer.writeheader()
|
| + for trace_metrics in run_metrics_list:
|
| + writer.writerow(trace_metrics)
|
| +
|
| + self._common_builder.default_final_tasks.append(ExtractMetrics)
|
|
|