Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(8)

Unified Diff: tools/android/loading/sandwich_prefetch.py

Issue 2023263002: sandwich: Move all NoState-Prefetch related code in sandwich_prefetch.py (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 4 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: tools/android/loading/sandwich_prefetch.py
diff --git a/tools/android/loading/sandwich_misc.py b/tools/android/loading/sandwich_prefetch.py
similarity index 58%
rename from tools/android/loading/sandwich_misc.py
rename to tools/android/loading/sandwich_prefetch.py
index 172f4e40b0552eeedd797d36d4f1f72c18a39932..173d0bffbb1d820583bc2f808fe2b453ee634c7f 100644
--- a/tools/android/loading/sandwich_misc.py
+++ b/tools/android/loading/sandwich_prefetch.py
@@ -2,18 +2,22 @@
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
+import csv
import logging
import json
import os
import re
+import shutil
from urlparse import urlparse
import chrome_cache
import common_util
-from loading_trace import LoadingTrace
+import loading_trace
from prefetch_view import PrefetchSimulationView
from request_dependencies_lens import RequestDependencyLens
+import sandwich_metrics
import sandwich_runner
+import task_manager
import wpr_backend
@@ -43,7 +47,7 @@ SUBRESOURCE_DISCOVERERS = set([
_UPLOAD_DATA_STREAM_REQUESTS_REGEX = re.compile(r'^\d+/(?P<url>.*)$')
-def PatchWpr(wpr_archive_path):
+def _PatchWpr(wpr_archive_path):
"""Patches a WPR archive to get all resources into the HTTP cache and avoid
invalidation and revalidations.
@@ -104,8 +108,8 @@ def _FilterOutDataAndIncompleteRequests(requests):
yield request
-def PatchCacheArchive(cache_archive_path, loading_trace_path,
- cache_archive_dest_path):
+def _PatchCacheArchive(cache_archive_path, loading_trace_path,
+ cache_archive_dest_path):
"""Patch the cache archive.
Note: This method update the raw response headers of cache entries' to store
@@ -119,7 +123,7 @@ def PatchCacheArchive(cache_archive_path, loading_trace_path,
archive <cache_archive_path>.
cache_archive_dest_path: Archive destination's path.
"""
- trace = LoadingTrace.FromJsonFile(loading_trace_path)
+ trace = loading_trace.LoadingTrace.FromJsonFile(loading_trace_path)
with common_util.TemporaryDirectory(prefix='sandwich_tmp') as tmp_path:
cache_path = os.path.join(tmp_path, 'cache')
chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_path)
@@ -154,7 +158,7 @@ def PatchCacheArchive(cache_archive_path, loading_trace_path,
logging.info('Patched cache size: %d bytes' % cache_backend.GetSize())
-def ExtractDiscoverableUrls(loading_trace_path, subresource_discoverer):
+def _ExtractDiscoverableUrls(loading_trace_path, subresource_discoverer):
"""Extracts discoverable resource urls from a loading trace according to a
sub-resource discoverer.
@@ -171,7 +175,7 @@ def ExtractDiscoverableUrls(loading_trace_path, subresource_discoverer):
# Load trace and related infos.
logging.info('loading %s' % loading_trace_path)
- trace = LoadingTrace.FromJsonFile(loading_trace_path)
+ trace = loading_trace.LoadingTrace.FromJsonFile(loading_trace_path)
dependencies_lens = RequestDependencyLens(trace)
first_resource_request = trace.request_track.GetFirstResourceRequest()
@@ -226,16 +230,16 @@ def _PrintUrlSetComparison(ref_url_set, url_set, url_set_name):
logging.error('+ ' + url)
-class RequestOutcome:
+class _RequestOutcome:
All, ServedFromCache, NotServedFromCache, Post = range(4)
-def ListUrlRequests(trace, request_kind):
+def _ListUrlRequests(trace, request_kind):
"""Lists requested URLs from a trace.
Args:
- trace: (LoadingTrace) loading trace.
- request_kind: RequestOutcome indicating the subset of requests to output.
+ trace: (loading_trace.LoadingTrace) loading trace.
+ request_kind: _RequestOutcome indicating the subset of requests to output.
Returns:
set([str])
@@ -243,22 +247,22 @@ def ListUrlRequests(trace, request_kind):
urls = set()
for request_event in _FilterOutDataAndIncompleteRequests(
trace.request_track.GetEvents()):
- if (request_kind == RequestOutcome.ServedFromCache and
+ if (request_kind == _RequestOutcome.ServedFromCache and
request_event.from_disk_cache):
urls.add(request_event.url)
- elif (request_kind == RequestOutcome.Post and
+ elif (request_kind == _RequestOutcome.Post and
request_event.method.upper().strip() == 'POST'):
urls.add(request_event.url)
- elif (request_kind == RequestOutcome.NotServedFromCache and
+ elif (request_kind == _RequestOutcome.NotServedFromCache and
not request_event.from_disk_cache):
urls.add(request_event.url)
- elif request_kind == RequestOutcome.All:
+ elif request_kind == _RequestOutcome.All:
urls.add(request_event.url)
return urls
-def VerifyBenchmarkOutputDirectory(benchmark_setup_path,
- benchmark_output_directory_path):
+def _VerifyBenchmarkOutputDirectory(benchmark_setup_path,
+ benchmark_output_directory_path):
"""Verifies that all run inside the run_output_directory worked as expected.
Args:
@@ -285,15 +289,15 @@ def VerifyBenchmarkOutputDirectory(benchmark_setup_path,
if not os.path.isfile(trace_path):
logging.error('missing trace %s' % trace_path)
continue
- trace = LoadingTrace.FromJsonFile(trace_path)
+ trace = loading_trace.LoadingTrace.FromJsonFile(trace_path)
logging.info('verifying %s from %s' % (trace.url, trace_path))
- effective_requests = ListUrlRequests(trace, RequestOutcome.All)
- effective_post_requests = ListUrlRequests(trace, RequestOutcome.Post)
+ effective_requests = _ListUrlRequests(trace, _RequestOutcome.All)
+ effective_post_requests = _ListUrlRequests(trace, _RequestOutcome.Post)
effective_cached_requests = \
- ListUrlRequests(trace, RequestOutcome.ServedFromCache)
+ _ListUrlRequests(trace, _RequestOutcome.ServedFromCache)
effective_uncached_requests = \
- ListUrlRequests(trace, RequestOutcome.NotServedFromCache)
+ _ListUrlRequests(trace, _RequestOutcome.NotServedFromCache)
missing_requests = original_requests.difference(effective_requests)
unexpected_requests = effective_requests.difference(original_requests)
@@ -344,7 +348,7 @@ def VerifyBenchmarkOutputDirectory(benchmark_setup_path,
'Distinct resource requests to WPR')
-def ReadSubresourceFromRunnerOutputDir(runner_output_dir):
+def _ReadSubresourceFromRunnerOutputDir(runner_output_dir):
"""Extracts a list of subresources in runner output directory.
Args:
@@ -355,7 +359,7 @@ def ReadSubresourceFromRunnerOutputDir(runner_output_dir):
"""
trace_path = os.path.join(
runner_output_dir, '0', sandwich_runner.TRACE_FILENAME)
- trace = LoadingTrace.FromJsonFile(trace_path)
+ trace = loading_trace.LoadingTrace.FromJsonFile(trace_path)
url_set = set()
for request_event in _FilterOutDataAndIncompleteRequests(
trace.request_track.GetEvents()):
@@ -365,7 +369,7 @@ def ReadSubresourceFromRunnerOutputDir(runner_output_dir):
return [url for url in url_set]
-def ValidateCacheArchiveContent(cache_build_trace_path, cache_archive_path):
+def _ValidateCacheArchiveContent(cache_build_trace_path, cache_archive_path):
"""Validates a cache archive content.
Args:
@@ -378,9 +382,9 @@ def ValidateCacheArchiveContent(cache_build_trace_path, cache_archive_path):
chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_directory)
cache_keys = set(
chrome_cache.CacheBackend(cache_directory, 'simple').ListKeys())
- trace = LoadingTrace.FromJsonFile(cache_build_trace_path)
- effective_requests = ListUrlRequests(trace, RequestOutcome.All)
- effective_post_requests = ListUrlRequests(trace, RequestOutcome.Post)
+ trace = loading_trace.LoadingTrace.FromJsonFile(cache_build_trace_path)
+ effective_requests = _ListUrlRequests(trace, _RequestOutcome.All)
+ effective_post_requests = _ListUrlRequests(trace, _RequestOutcome.Post)
upload_data_stream_cache_entry_keys = set()
upload_data_stream_requests = set()
@@ -400,3 +404,203 @@ def ValidateCacheArchiveContent(cache_build_trace_path, cache_archive_path):
'POST resources')
_PrintUrlSetComparison(expected_cached_requests, effective_cache_keys,
'Cached resources')
+
+
+class PrefetchBenchmarkBuilder(task_manager.Builder):
+ """A builder for a graph of tasks for NoState-Prefetch emulated benchmarks."""
+
+ def __init__(self, common_builder):
+ task_manager.Builder.__init__(self,
+ common_builder.output_directory,
+ common_builder.output_subdirectory)
+ self._common_builder = common_builder
+
+ self._patched_wpr_task = None
+ self._reference_cache_task = None
+ self._trace_from_grabbing_reference_cache = None
+ self._subresources_for_urls_task = None
+ self._PopulateCommonPipelines()
+
+ def _PopulateCommonPipelines(self):
+ """Creates necessary tasks to produce initial cache archive.
+
+ Also creates a task for producing a json file with a mapping of URLs to
+ subresources (urls-resources.json).
+
+ Here is the full dependency tree for the returned task:
+ common/patched-cache-validation.log
+ depends on: common/patched-cache.zip
+ depends on: common/original-cache.zip
+ depends on: common/webpages-patched.wpr
+ depends on: common/webpages.wpr
+ depends on: common/urls-resources.json
+ depends on: common/original-cache.zip
+ """
+ @self.RegisterTask('common/webpages-patched.wpr',
+ dependencies=[self._common_builder.original_wpr_task])
+ def BuildPatchedWpr():
+ common_util.EnsureParentDirectoryExists(BuildPatchedWpr.path)
+ shutil.copyfile(
+ self._common_builder.original_wpr_task.path, BuildPatchedWpr.path)
+ _PatchWpr(BuildPatchedWpr.path)
+
+ @self.RegisterTask('common/original-cache.zip', [BuildPatchedWpr])
+ def BuildOriginalCache():
+ runner = self._common_builder.CreateSandwichRunner()
+ runner.wpr_archive_path = BuildPatchedWpr.path
+ runner.cache_archive_path = BuildOriginalCache.path
+ runner.cache_operation = sandwich_runner.CacheOperation.SAVE
+ runner.output_dir = BuildOriginalCache.run_path
+ runner.Run()
+ BuildOriginalCache.run_path = BuildOriginalCache.path[:-4] + '-run'
+ original_cache_trace_path = os.path.join(
+ BuildOriginalCache.run_path, '0', sandwich_runner.TRACE_FILENAME)
+
+ @self.RegisterTask('common/patched-cache.zip', [BuildOriginalCache])
+ def BuildPatchedCache():
+ _PatchCacheArchive(BuildOriginalCache.path,
+ original_cache_trace_path, BuildPatchedCache.path)
+
+ @self.RegisterTask('common/subresources-for-urls.json',
+ [BuildOriginalCache])
+ def ListUrlsResources():
+ url_resources = _ReadSubresourceFromRunnerOutputDir(
+ BuildOriginalCache.run_path)
+ with open(ListUrlsResources.path, 'w') as output:
+ json.dump(url_resources, output)
+
+ @self.RegisterTask('common/patched-cache-validation.log',
+ [BuildPatchedCache])
+ def ValidatePatchedCache():
+ handler = logging.FileHandler(ValidatePatchedCache.path)
+ logging.getLogger().addHandler(handler)
+ try:
+ _ValidateCacheArchiveContent(
+ original_cache_trace_path, BuildPatchedCache.path)
+ finally:
+ logging.getLogger().removeHandler(handler)
+
+ self._patched_wpr_task = BuildPatchedWpr
+ self._trace_from_grabbing_reference_cache = original_cache_trace_path
+ self._reference_cache_task = BuildPatchedCache
+ self._subresources_for_urls_task = ListUrlsResources
+
+ self._common_builder.default_final_tasks.append(ValidatePatchedCache)
+
+ def PopulateLoadBenchmark(self, subresource_discoverer,
+ transformer_list_name, transformer_list):
+ """Populate benchmarking tasks from its setup tasks.
+
+ Args:
+ subresource_discoverer: Name of a subresources discoverer.
+ transformer_list_name: A string describing the transformers, will be used
+ in Task names (prefer names without spaces and special characters).
+ transformer_list: An ordered list of function that takes an instance of
+ SandwichRunner as parameter, would be applied immediately before
+ SandwichRunner.Run() in the given order.
+
+ Here is the full dependency of the added tree for the returned task:
+ <transformer_list_name>/<subresource_discoverer>-metrics.csv
+ depends on: <transformer_list_name>/<subresource_discoverer>-run/
+ depends on: common/<subresource_discoverer>-cache.zip
+ depends on: some tasks saved by PopulateCommonPipelines()
+ depends on: common/<subresource_discoverer>-setup.json
+ depends on: some tasks saved by PopulateCommonPipelines()
+ """
+ additional_column_names = [
+ 'url',
+ 'repeat_id',
+ 'subresource_discoverer',
+ 'subresource_count',
+ # The amount of subresources detected at SetupBenchmark step.
+ 'subresource_count_theoretic',
+ # Amount of subresources for caching as suggested by the subresource
+ # discoverer.
+ 'cached_subresource_count_theoretic',
+ 'cached_subresource_count']
+
+ assert subresource_discoverer in SUBRESOURCE_DISCOVERERS
+ assert 'common' not in SUBRESOURCE_DISCOVERERS
+ shared_task_prefix = os.path.join('common', subresource_discoverer)
+ task_prefix = os.path.join(transformer_list_name, subresource_discoverer)
+
+ @self.RegisterTask(shared_task_prefix + '-setup.json', merge=True,
+ dependencies=[self._subresources_for_urls_task])
+ def SetupBenchmark():
+ whitelisted_urls = _ExtractDiscoverableUrls(
+ self._trace_from_grabbing_reference_cache, subresource_discoverer)
+
+ url_resources = json.load(open(self._subresources_for_urls_task.path))
+ common_util.EnsureParentDirectoryExists(SetupBenchmark.path)
+ with open(SetupBenchmark.path, 'w') as output:
+ json.dump({
+ 'cache_whitelist': [url for url in whitelisted_urls],
+ 'subresource_discoverer': subresource_discoverer,
+ 'url_resources': url_resources,
+ }, output)
+
+ @self.RegisterTask(shared_task_prefix + '-cache.zip', merge=True,
+ dependencies=[
+ SetupBenchmark, self._reference_cache_task])
+ def BuildBenchmarkCacheArchive():
+ setup = json.load(open(SetupBenchmark.path))
+ chrome_cache.ApplyUrlWhitelistToCacheArchive(
+ cache_archive_path=self._reference_cache_task.path,
+ whitelisted_urls=setup['cache_whitelist'],
+ output_cache_archive_path=BuildBenchmarkCacheArchive.path)
+
+ @self.RegisterTask(task_prefix + '-run/',
+ dependencies=[BuildBenchmarkCacheArchive])
+ def RunBenchmark():
+ runner = self._common_builder.CreateSandwichRunner()
+ for transformer in transformer_list:
+ transformer(runner)
+ runner.wpr_archive_path = self._patched_wpr_task.path
+ runner.wpr_out_log_path = os.path.join(
+ RunBenchmark.path, sandwich_runner.WPR_LOG_FILENAME)
+ runner.cache_archive_path = BuildBenchmarkCacheArchive.path
+ runner.cache_operation = sandwich_runner.CacheOperation.PUSH
+ runner.output_dir = RunBenchmark.path
+ runner.Run()
+
+ @self.RegisterTask(task_prefix + '-metrics.csv',
+ dependencies=[RunBenchmark])
+ def ExtractMetrics():
+ # TODO(gabadie): Performance improvement: load each trace only once and
+ # use it for validation and extraction of metrics later.
+ _VerifyBenchmarkOutputDirectory(SetupBenchmark.path, RunBenchmark.path)
+
+ benchmark_setup = json.load(open(SetupBenchmark.path))
+ run_metrics_list = []
+ for repeat_id, repeat_dir in sandwich_runner.WalkRepeatedRuns(
+ RunBenchmark.path):
+ trace_path = os.path.join(repeat_dir, sandwich_runner.TRACE_FILENAME)
+ logging.info('processing trace: %s', trace_path)
+ trace = loading_trace.LoadingTrace.FromJsonFile(trace_path)
+ run_metrics = {
+ 'url': trace.url,
+ 'repeat_id': repeat_id,
+ 'subresource_discoverer': benchmark_setup['subresource_discoverer'],
+ 'subresource_count': len(_ListUrlRequests(
+ trace, _RequestOutcome.All)),
+ 'subresource_count_theoretic':
+ len(benchmark_setup['url_resources']),
+ 'cached_subresource_count': len(_ListUrlRequests(
+ trace, _RequestOutcome.ServedFromCache)),
+ 'cached_subresource_count_theoretic':
+ len(benchmark_setup['cache_whitelist']),
+ }
+ run_metrics.update(
+ sandwich_metrics.ExtractCommonMetricsFromRepeatDirectory(
+ repeat_dir, trace))
+ run_metrics_list.append(run_metrics)
+
+ run_metrics_list.sort(key=lambda e: e['repeat_id'])
+ with open(ExtractMetrics.path, 'w') as csv_file:
+ writer = csv.DictWriter(csv_file, fieldnames=(additional_column_names +
+ sandwich_metrics.COMMON_CSV_COLUMN_NAMES))
+ writer.writeheader()
+ for trace_metrics in run_metrics_list:
+ writer.writerow(trace_metrics)
+
+ self._common_builder.default_final_tasks.append(ExtractMetrics)
« no previous file with comments | « tools/android/loading/sandwich_misc_unittest.py ('k') | tools/android/loading/sandwich_prefetch_unittest.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698