Index: tools/android/loading/sandwich_prefetch.py |
diff --git a/tools/android/loading/sandwich_misc.py b/tools/android/loading/sandwich_prefetch.py |
similarity index 58% |
rename from tools/android/loading/sandwich_misc.py |
rename to tools/android/loading/sandwich_prefetch.py |
index 172f4e40b0552eeedd797d36d4f1f72c18a39932..173d0bffbb1d820583bc2f808fe2b453ee634c7f 100644 |
--- a/tools/android/loading/sandwich_misc.py |
+++ b/tools/android/loading/sandwich_prefetch.py |
@@ -2,18 +2,22 @@ |
# Use of this source code is governed by a BSD-style license that can be |
# found in the LICENSE file. |
+import csv |
import logging |
import json |
import os |
import re |
+import shutil |
from urlparse import urlparse |
import chrome_cache |
import common_util |
-from loading_trace import LoadingTrace |
+import loading_trace |
from prefetch_view import PrefetchSimulationView |
from request_dependencies_lens import RequestDependencyLens |
+import sandwich_metrics |
import sandwich_runner |
+import task_manager |
import wpr_backend |
@@ -43,7 +47,7 @@ SUBRESOURCE_DISCOVERERS = set([ |
_UPLOAD_DATA_STREAM_REQUESTS_REGEX = re.compile(r'^\d+/(?P<url>.*)$') |
-def PatchWpr(wpr_archive_path): |
+def _PatchWpr(wpr_archive_path): |
"""Patches a WPR archive to get all resources into the HTTP cache and avoid |
invalidation and revalidations. |
@@ -104,8 +108,8 @@ def _FilterOutDataAndIncompleteRequests(requests): |
yield request |
-def PatchCacheArchive(cache_archive_path, loading_trace_path, |
- cache_archive_dest_path): |
+def _PatchCacheArchive(cache_archive_path, loading_trace_path, |
+ cache_archive_dest_path): |
"""Patch the cache archive. |
Note: This method update the raw response headers of cache entries' to store |
@@ -119,7 +123,7 @@ def PatchCacheArchive(cache_archive_path, loading_trace_path, |
archive <cache_archive_path>. |
cache_archive_dest_path: Archive destination's path. |
""" |
- trace = LoadingTrace.FromJsonFile(loading_trace_path) |
+ trace = loading_trace.LoadingTrace.FromJsonFile(loading_trace_path) |
with common_util.TemporaryDirectory(prefix='sandwich_tmp') as tmp_path: |
cache_path = os.path.join(tmp_path, 'cache') |
chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_path) |
@@ -154,7 +158,7 @@ def PatchCacheArchive(cache_archive_path, loading_trace_path, |
logging.info('Patched cache size: %d bytes' % cache_backend.GetSize()) |
-def ExtractDiscoverableUrls(loading_trace_path, subresource_discoverer): |
+def _ExtractDiscoverableUrls(loading_trace_path, subresource_discoverer): |
"""Extracts discoverable resource urls from a loading trace according to a |
sub-resource discoverer. |
@@ -171,7 +175,7 @@ def ExtractDiscoverableUrls(loading_trace_path, subresource_discoverer): |
# Load trace and related infos. |
logging.info('loading %s' % loading_trace_path) |
- trace = LoadingTrace.FromJsonFile(loading_trace_path) |
+ trace = loading_trace.LoadingTrace.FromJsonFile(loading_trace_path) |
dependencies_lens = RequestDependencyLens(trace) |
first_resource_request = trace.request_track.GetFirstResourceRequest() |
@@ -226,16 +230,16 @@ def _PrintUrlSetComparison(ref_url_set, url_set, url_set_name): |
logging.error('+ ' + url) |
-class RequestOutcome: |
+class _RequestOutcome: |
All, ServedFromCache, NotServedFromCache, Post = range(4) |
-def ListUrlRequests(trace, request_kind): |
+def _ListUrlRequests(trace, request_kind): |
"""Lists requested URLs from a trace. |
Args: |
- trace: (LoadingTrace) loading trace. |
- request_kind: RequestOutcome indicating the subset of requests to output. |
+ trace: (loading_trace.LoadingTrace) loading trace. |
+ request_kind: _RequestOutcome indicating the subset of requests to output. |
Returns: |
set([str]) |
@@ -243,22 +247,22 @@ def ListUrlRequests(trace, request_kind): |
urls = set() |
for request_event in _FilterOutDataAndIncompleteRequests( |
trace.request_track.GetEvents()): |
- if (request_kind == RequestOutcome.ServedFromCache and |
+ if (request_kind == _RequestOutcome.ServedFromCache and |
request_event.from_disk_cache): |
urls.add(request_event.url) |
- elif (request_kind == RequestOutcome.Post and |
+ elif (request_kind == _RequestOutcome.Post and |
request_event.method.upper().strip() == 'POST'): |
urls.add(request_event.url) |
- elif (request_kind == RequestOutcome.NotServedFromCache and |
+ elif (request_kind == _RequestOutcome.NotServedFromCache and |
not request_event.from_disk_cache): |
urls.add(request_event.url) |
- elif request_kind == RequestOutcome.All: |
+ elif request_kind == _RequestOutcome.All: |
urls.add(request_event.url) |
return urls |
-def VerifyBenchmarkOutputDirectory(benchmark_setup_path, |
- benchmark_output_directory_path): |
+def _VerifyBenchmarkOutputDirectory(benchmark_setup_path, |
+ benchmark_output_directory_path): |
"""Verifies that all run inside the run_output_directory worked as expected. |
Args: |
@@ -285,15 +289,15 @@ def VerifyBenchmarkOutputDirectory(benchmark_setup_path, |
if not os.path.isfile(trace_path): |
logging.error('missing trace %s' % trace_path) |
continue |
- trace = LoadingTrace.FromJsonFile(trace_path) |
+ trace = loading_trace.LoadingTrace.FromJsonFile(trace_path) |
logging.info('verifying %s from %s' % (trace.url, trace_path)) |
- effective_requests = ListUrlRequests(trace, RequestOutcome.All) |
- effective_post_requests = ListUrlRequests(trace, RequestOutcome.Post) |
+ effective_requests = _ListUrlRequests(trace, _RequestOutcome.All) |
+ effective_post_requests = _ListUrlRequests(trace, _RequestOutcome.Post) |
effective_cached_requests = \ |
- ListUrlRequests(trace, RequestOutcome.ServedFromCache) |
+ _ListUrlRequests(trace, _RequestOutcome.ServedFromCache) |
effective_uncached_requests = \ |
- ListUrlRequests(trace, RequestOutcome.NotServedFromCache) |
+ _ListUrlRequests(trace, _RequestOutcome.NotServedFromCache) |
missing_requests = original_requests.difference(effective_requests) |
unexpected_requests = effective_requests.difference(original_requests) |
@@ -344,7 +348,7 @@ def VerifyBenchmarkOutputDirectory(benchmark_setup_path, |
'Distinct resource requests to WPR') |
-def ReadSubresourceFromRunnerOutputDir(runner_output_dir): |
+def _ReadSubresourceFromRunnerOutputDir(runner_output_dir): |
"""Extracts a list of subresources in runner output directory. |
Args: |
@@ -355,7 +359,7 @@ def ReadSubresourceFromRunnerOutputDir(runner_output_dir): |
""" |
trace_path = os.path.join( |
runner_output_dir, '0', sandwich_runner.TRACE_FILENAME) |
- trace = LoadingTrace.FromJsonFile(trace_path) |
+ trace = loading_trace.LoadingTrace.FromJsonFile(trace_path) |
url_set = set() |
for request_event in _FilterOutDataAndIncompleteRequests( |
trace.request_track.GetEvents()): |
@@ -365,7 +369,7 @@ def ReadSubresourceFromRunnerOutputDir(runner_output_dir): |
return [url for url in url_set] |
-def ValidateCacheArchiveContent(cache_build_trace_path, cache_archive_path): |
+def _ValidateCacheArchiveContent(cache_build_trace_path, cache_archive_path): |
"""Validates a cache archive content. |
Args: |
@@ -378,9 +382,9 @@ def ValidateCacheArchiveContent(cache_build_trace_path, cache_archive_path): |
chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_directory) |
cache_keys = set( |
chrome_cache.CacheBackend(cache_directory, 'simple').ListKeys()) |
- trace = LoadingTrace.FromJsonFile(cache_build_trace_path) |
- effective_requests = ListUrlRequests(trace, RequestOutcome.All) |
- effective_post_requests = ListUrlRequests(trace, RequestOutcome.Post) |
+ trace = loading_trace.LoadingTrace.FromJsonFile(cache_build_trace_path) |
+ effective_requests = _ListUrlRequests(trace, _RequestOutcome.All) |
+ effective_post_requests = _ListUrlRequests(trace, _RequestOutcome.Post) |
upload_data_stream_cache_entry_keys = set() |
upload_data_stream_requests = set() |
@@ -400,3 +404,203 @@ def ValidateCacheArchiveContent(cache_build_trace_path, cache_archive_path): |
'POST resources') |
_PrintUrlSetComparison(expected_cached_requests, effective_cache_keys, |
'Cached resources') |
+ |
+ |
+class PrefetchBenchmarkBuilder(task_manager.Builder): |
+ """A builder for a graph of tasks for NoState-Prefetch emulated benchmarks.""" |
+ |
+ def __init__(self, common_builder): |
+ task_manager.Builder.__init__(self, |
+ common_builder.output_directory, |
+ common_builder.output_subdirectory) |
+ self._common_builder = common_builder |
+ |
+ self._patched_wpr_task = None |
+ self._reference_cache_task = None |
+ self._trace_from_grabbing_reference_cache = None |
+ self._subresources_for_urls_task = None |
+ self._PopulateCommonPipelines() |
+ |
+ def _PopulateCommonPipelines(self): |
+ """Creates necessary tasks to produce initial cache archive. |
+ |
+ Also creates a task for producing a json file with a mapping of URLs to |
+ subresources (urls-resources.json). |
+ |
+ Here is the full dependency tree for the returned task: |
+ common/patched-cache-validation.log |
+ depends on: common/patched-cache.zip |
+ depends on: common/original-cache.zip |
+ depends on: common/webpages-patched.wpr |
+ depends on: common/webpages.wpr |
+ depends on: common/urls-resources.json |
+ depends on: common/original-cache.zip |
+ """ |
+ @self.RegisterTask('common/webpages-patched.wpr', |
+ dependencies=[self._common_builder.original_wpr_task]) |
+ def BuildPatchedWpr(): |
+ common_util.EnsureParentDirectoryExists(BuildPatchedWpr.path) |
+ shutil.copyfile( |
+ self._common_builder.original_wpr_task.path, BuildPatchedWpr.path) |
+ _PatchWpr(BuildPatchedWpr.path) |
+ |
+ @self.RegisterTask('common/original-cache.zip', [BuildPatchedWpr]) |
+ def BuildOriginalCache(): |
+ runner = self._common_builder.CreateSandwichRunner() |
+ runner.wpr_archive_path = BuildPatchedWpr.path |
+ runner.cache_archive_path = BuildOriginalCache.path |
+ runner.cache_operation = sandwich_runner.CacheOperation.SAVE |
+ runner.output_dir = BuildOriginalCache.run_path |
+ runner.Run() |
+ BuildOriginalCache.run_path = BuildOriginalCache.path[:-4] + '-run' |
+ original_cache_trace_path = os.path.join( |
+ BuildOriginalCache.run_path, '0', sandwich_runner.TRACE_FILENAME) |
+ |
+ @self.RegisterTask('common/patched-cache.zip', [BuildOriginalCache]) |
+ def BuildPatchedCache(): |
+ _PatchCacheArchive(BuildOriginalCache.path, |
+ original_cache_trace_path, BuildPatchedCache.path) |
+ |
+ @self.RegisterTask('common/subresources-for-urls.json', |
+ [BuildOriginalCache]) |
+ def ListUrlsResources(): |
+ url_resources = _ReadSubresourceFromRunnerOutputDir( |
+ BuildOriginalCache.run_path) |
+ with open(ListUrlsResources.path, 'w') as output: |
+ json.dump(url_resources, output) |
+ |
+ @self.RegisterTask('common/patched-cache-validation.log', |
+ [BuildPatchedCache]) |
+ def ValidatePatchedCache(): |
+ handler = logging.FileHandler(ValidatePatchedCache.path) |
+ logging.getLogger().addHandler(handler) |
+ try: |
+ _ValidateCacheArchiveContent( |
+ original_cache_trace_path, BuildPatchedCache.path) |
+ finally: |
+ logging.getLogger().removeHandler(handler) |
+ |
+ self._patched_wpr_task = BuildPatchedWpr |
+ self._trace_from_grabbing_reference_cache = original_cache_trace_path |
+ self._reference_cache_task = BuildPatchedCache |
+ self._subresources_for_urls_task = ListUrlsResources |
+ |
+ self._common_builder.default_final_tasks.append(ValidatePatchedCache) |
+ |
+ def PopulateLoadBenchmark(self, subresource_discoverer, |
+ transformer_list_name, transformer_list): |
+ """Populate benchmarking tasks from its setup tasks. |
+ |
+ Args: |
+ subresource_discoverer: Name of a subresources discoverer. |
+ transformer_list_name: A string describing the transformers, will be used |
+ in Task names (prefer names without spaces and special characters). |
+ transformer_list: An ordered list of function that takes an instance of |
+ SandwichRunner as parameter, would be applied immediately before |
+ SandwichRunner.Run() in the given order. |
+ |
+ Here is the full dependency of the added tree for the returned task: |
+ <transformer_list_name>/<subresource_discoverer>-metrics.csv |
+ depends on: <transformer_list_name>/<subresource_discoverer>-run/ |
+ depends on: common/<subresource_discoverer>-cache.zip |
+ depends on: some tasks saved by PopulateCommonPipelines() |
+ depends on: common/<subresource_discoverer>-setup.json |
+ depends on: some tasks saved by PopulateCommonPipelines() |
+ """ |
+ additional_column_names = [ |
+ 'url', |
+ 'repeat_id', |
+ 'subresource_discoverer', |
+ 'subresource_count', |
+ # The amount of subresources detected at SetupBenchmark step. |
+ 'subresource_count_theoretic', |
+ # Amount of subresources for caching as suggested by the subresource |
+ # discoverer. |
+ 'cached_subresource_count_theoretic', |
+ 'cached_subresource_count'] |
+ |
+ assert subresource_discoverer in SUBRESOURCE_DISCOVERERS |
+ assert 'common' not in SUBRESOURCE_DISCOVERERS |
+ shared_task_prefix = os.path.join('common', subresource_discoverer) |
+ task_prefix = os.path.join(transformer_list_name, subresource_discoverer) |
+ |
+ @self.RegisterTask(shared_task_prefix + '-setup.json', merge=True, |
+ dependencies=[self._subresources_for_urls_task]) |
+ def SetupBenchmark(): |
+ whitelisted_urls = _ExtractDiscoverableUrls( |
+ self._trace_from_grabbing_reference_cache, subresource_discoverer) |
+ |
+ url_resources = json.load(open(self._subresources_for_urls_task.path)) |
+ common_util.EnsureParentDirectoryExists(SetupBenchmark.path) |
+ with open(SetupBenchmark.path, 'w') as output: |
+ json.dump({ |
+ 'cache_whitelist': [url for url in whitelisted_urls], |
+ 'subresource_discoverer': subresource_discoverer, |
+ 'url_resources': url_resources, |
+ }, output) |
+ |
+ @self.RegisterTask(shared_task_prefix + '-cache.zip', merge=True, |
+ dependencies=[ |
+ SetupBenchmark, self._reference_cache_task]) |
+ def BuildBenchmarkCacheArchive(): |
+ setup = json.load(open(SetupBenchmark.path)) |
+ chrome_cache.ApplyUrlWhitelistToCacheArchive( |
+ cache_archive_path=self._reference_cache_task.path, |
+ whitelisted_urls=setup['cache_whitelist'], |
+ output_cache_archive_path=BuildBenchmarkCacheArchive.path) |
+ |
+ @self.RegisterTask(task_prefix + '-run/', |
+ dependencies=[BuildBenchmarkCacheArchive]) |
+ def RunBenchmark(): |
+ runner = self._common_builder.CreateSandwichRunner() |
+ for transformer in transformer_list: |
+ transformer(runner) |
+ runner.wpr_archive_path = self._patched_wpr_task.path |
+ runner.wpr_out_log_path = os.path.join( |
+ RunBenchmark.path, sandwich_runner.WPR_LOG_FILENAME) |
+ runner.cache_archive_path = BuildBenchmarkCacheArchive.path |
+ runner.cache_operation = sandwich_runner.CacheOperation.PUSH |
+ runner.output_dir = RunBenchmark.path |
+ runner.Run() |
+ |
+ @self.RegisterTask(task_prefix + '-metrics.csv', |
+ dependencies=[RunBenchmark]) |
+ def ExtractMetrics(): |
+ # TODO(gabadie): Performance improvement: load each trace only once and |
+ # use it for validation and extraction of metrics later. |
+ _VerifyBenchmarkOutputDirectory(SetupBenchmark.path, RunBenchmark.path) |
+ |
+ benchmark_setup = json.load(open(SetupBenchmark.path)) |
+ run_metrics_list = [] |
+ for repeat_id, repeat_dir in sandwich_runner.WalkRepeatedRuns( |
+ RunBenchmark.path): |
+ trace_path = os.path.join(repeat_dir, sandwich_runner.TRACE_FILENAME) |
+ logging.info('processing trace: %s', trace_path) |
+ trace = loading_trace.LoadingTrace.FromJsonFile(trace_path) |
+ run_metrics = { |
+ 'url': trace.url, |
+ 'repeat_id': repeat_id, |
+ 'subresource_discoverer': benchmark_setup['subresource_discoverer'], |
+ 'subresource_count': len(_ListUrlRequests( |
+ trace, _RequestOutcome.All)), |
+ 'subresource_count_theoretic': |
+ len(benchmark_setup['url_resources']), |
+ 'cached_subresource_count': len(_ListUrlRequests( |
+ trace, _RequestOutcome.ServedFromCache)), |
+ 'cached_subresource_count_theoretic': |
+ len(benchmark_setup['cache_whitelist']), |
+ } |
+ run_metrics.update( |
+ sandwich_metrics.ExtractCommonMetricsFromRepeatDirectory( |
+ repeat_dir, trace)) |
+ run_metrics_list.append(run_metrics) |
+ |
+ run_metrics_list.sort(key=lambda e: e['repeat_id']) |
+ with open(ExtractMetrics.path, 'w') as csv_file: |
+ writer = csv.DictWriter(csv_file, fieldnames=(additional_column_names + |
+ sandwich_metrics.COMMON_CSV_COLUMN_NAMES)) |
+ writer.writeheader() |
+ for trace_metrics in run_metrics_list: |
+ writer.writerow(trace_metrics) |
+ |
+ self._common_builder.default_final_tasks.append(ExtractMetrics) |