tools/android/loading/sandwich_prefetch.py - Issue 2023263002: sandwich: Move all NoState-Prefetch related code in sandwich_prefetch.py

Unified Diff: tools/android/loading/sandwich_prefetch.py

Issue 2023263002: sandwich: Move all NoState-Prefetch related code in sandwich_prefetch.py (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Created 4 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: tools/android/loading/sandwich_prefetch.py

diff --git a/tools/android/loading/sandwich_misc.py b/tools/android/loading/sandwich_prefetch.py

similarity index 58%

rename from tools/android/loading/sandwich_misc.py

rename to tools/android/loading/sandwich_prefetch.py

index 172f4e40b0552eeedd797d36d4f1f72c18a39932..173d0bffbb1d820583bc2f808fe2b453ee634c7f 100644

--- a/tools/android/loading/sandwich_misc.py

+++ b/tools/android/loading/sandwich_prefetch.py

@@ -2,18 +2,22 @@

# Use of this source code is governed by a BSD-style license that can be

# found in the LICENSE file.

+import csv

import logging

import json

import os

import re

+import shutil

from urlparse import urlparse

import chrome_cache

import common_util

-from loading_trace import LoadingTrace

+import loading_trace

from prefetch_view import PrefetchSimulationView

from request_dependencies_lens import RequestDependencyLens

+import sandwich_metrics

import sandwich_runner

+import task_manager

import wpr_backend

@@ -43,7 +47,7 @@ SUBRESOURCE_DISCOVERERS = set([

_UPLOAD_DATA_STREAM_REQUESTS_REGEX = re.compile(r'^\d+/(?P<url>.*)$')

-def PatchWpr(wpr_archive_path):

+def _PatchWpr(wpr_archive_path):

"""Patches a WPR archive to get all resources into the HTTP cache and avoid

invalidation and revalidations.

@@ -104,8 +108,8 @@ def _FilterOutDataAndIncompleteRequests(requests):

yield request

-def PatchCacheArchive(cache_archive_path, loading_trace_path,

- cache_archive_dest_path):

+def _PatchCacheArchive(cache_archive_path, loading_trace_path,

+ cache_archive_dest_path):

"""Patch the cache archive.

Note: This method update the raw response headers of cache entries' to store

@@ -119,7 +123,7 @@ def PatchCacheArchive(cache_archive_path, loading_trace_path,

archive <cache_archive_path>.

cache_archive_dest_path: Archive destination's path.

"""

- trace = LoadingTrace.FromJsonFile(loading_trace_path)

+ trace = loading_trace.LoadingTrace.FromJsonFile(loading_trace_path)

with common_util.TemporaryDirectory(prefix='sandwich_tmp') as tmp_path:

cache_path = os.path.join(tmp_path, 'cache')

chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_path)

@@ -154,7 +158,7 @@ def PatchCacheArchive(cache_archive_path, loading_trace_path,

logging.info('Patched cache size: %d bytes' % cache_backend.GetSize())

-def ExtractDiscoverableUrls(loading_trace_path, subresource_discoverer):

+def _ExtractDiscoverableUrls(loading_trace_path, subresource_discoverer):

"""Extracts discoverable resource urls from a loading trace according to a

sub-resource discoverer.

@@ -171,7 +175,7 @@ def ExtractDiscoverableUrls(loading_trace_path, subresource_discoverer):

# Load trace and related infos.

logging.info('loading %s' % loading_trace_path)

- trace = LoadingTrace.FromJsonFile(loading_trace_path)

+ trace = loading_trace.LoadingTrace.FromJsonFile(loading_trace_path)

dependencies_lens = RequestDependencyLens(trace)

first_resource_request = trace.request_track.GetFirstResourceRequest()

@@ -226,16 +230,16 @@ def _PrintUrlSetComparison(ref_url_set, url_set, url_set_name):

logging.error('+ ' + url)

-class RequestOutcome:

+class _RequestOutcome:

All, ServedFromCache, NotServedFromCache, Post = range(4)

-def ListUrlRequests(trace, request_kind):

+def _ListUrlRequests(trace, request_kind):

"""Lists requested URLs from a trace.

Args:

- trace: (LoadingTrace) loading trace.

- request_kind: RequestOutcome indicating the subset of requests to output.

+ trace: (loading_trace.LoadingTrace) loading trace.

+ request_kind: _RequestOutcome indicating the subset of requests to output.

Returns:

set([str])

@@ -243,22 +247,22 @@ def ListUrlRequests(trace, request_kind):

urls = set()

for request_event in _FilterOutDataAndIncompleteRequests(

trace.request_track.GetEvents()):

- if (request_kind == RequestOutcome.ServedFromCache and

+ if (request_kind == _RequestOutcome.ServedFromCache and

request_event.from_disk_cache):

urls.add(request_event.url)

- elif (request_kind == RequestOutcome.Post and

+ elif (request_kind == _RequestOutcome.Post and

request_event.method.upper().strip() == 'POST'):

urls.add(request_event.url)

- elif (request_kind == RequestOutcome.NotServedFromCache and

+ elif (request_kind == _RequestOutcome.NotServedFromCache and

not request_event.from_disk_cache):

urls.add(request_event.url)

- elif request_kind == RequestOutcome.All:

+ elif request_kind == _RequestOutcome.All:

urls.add(request_event.url)

return urls

-def VerifyBenchmarkOutputDirectory(benchmark_setup_path,

- benchmark_output_directory_path):

+def _VerifyBenchmarkOutputDirectory(benchmark_setup_path,

+ benchmark_output_directory_path):

"""Verifies that all run inside the run_output_directory worked as expected.

Args:

@@ -285,15 +289,15 @@ def VerifyBenchmarkOutputDirectory(benchmark_setup_path,

if not os.path.isfile(trace_path):

logging.error('missing trace %s' % trace_path)

continue

- trace = LoadingTrace.FromJsonFile(trace_path)

+ trace = loading_trace.LoadingTrace.FromJsonFile(trace_path)

logging.info('verifying %s from %s' % (trace.url, trace_path))

- effective_requests = ListUrlRequests(trace, RequestOutcome.All)

- effective_post_requests = ListUrlRequests(trace, RequestOutcome.Post)

+ effective_requests = _ListUrlRequests(trace, _RequestOutcome.All)

+ effective_post_requests = _ListUrlRequests(trace, _RequestOutcome.Post)

effective_cached_requests = \

- ListUrlRequests(trace, RequestOutcome.ServedFromCache)

+ _ListUrlRequests(trace, _RequestOutcome.ServedFromCache)

effective_uncached_requests = \

- ListUrlRequests(trace, RequestOutcome.NotServedFromCache)

+ _ListUrlRequests(trace, _RequestOutcome.NotServedFromCache)

missing_requests = original_requests.difference(effective_requests)

unexpected_requests = effective_requests.difference(original_requests)

@@ -344,7 +348,7 @@ def VerifyBenchmarkOutputDirectory(benchmark_setup_path,

'Distinct resource requests to WPR')

-def ReadSubresourceFromRunnerOutputDir(runner_output_dir):

+def _ReadSubresourceFromRunnerOutputDir(runner_output_dir):

"""Extracts a list of subresources in runner output directory.

Args:

@@ -355,7 +359,7 @@ def ReadSubresourceFromRunnerOutputDir(runner_output_dir):

"""

trace_path = os.path.join(

runner_output_dir, '0', sandwich_runner.TRACE_FILENAME)

- trace = LoadingTrace.FromJsonFile(trace_path)

+ trace = loading_trace.LoadingTrace.FromJsonFile(trace_path)

url_set = set()

for request_event in _FilterOutDataAndIncompleteRequests(

trace.request_track.GetEvents()):

@@ -365,7 +369,7 @@ def ReadSubresourceFromRunnerOutputDir(runner_output_dir):

return [url for url in url_set]

-def ValidateCacheArchiveContent(cache_build_trace_path, cache_archive_path):

+def _ValidateCacheArchiveContent(cache_build_trace_path, cache_archive_path):

"""Validates a cache archive content.

Args:

@@ -378,9 +382,9 @@ def ValidateCacheArchiveContent(cache_build_trace_path, cache_archive_path):

chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_directory)

cache_keys = set(

chrome_cache.CacheBackend(cache_directory, 'simple').ListKeys())

- trace = LoadingTrace.FromJsonFile(cache_build_trace_path)

- effective_requests = ListUrlRequests(trace, RequestOutcome.All)

- effective_post_requests = ListUrlRequests(trace, RequestOutcome.Post)

+ trace = loading_trace.LoadingTrace.FromJsonFile(cache_build_trace_path)

+ effective_requests = _ListUrlRequests(trace, _RequestOutcome.All)

+ effective_post_requests = _ListUrlRequests(trace, _RequestOutcome.Post)

upload_data_stream_cache_entry_keys = set()

upload_data_stream_requests = set()

@@ -400,3 +404,203 @@ def ValidateCacheArchiveContent(cache_build_trace_path, cache_archive_path):

'POST resources')

_PrintUrlSetComparison(expected_cached_requests, effective_cache_keys,

'Cached resources')

+class PrefetchBenchmarkBuilder(task_manager.Builder):

+ """A builder for a graph of tasks for NoState-Prefetch emulated benchmarks."""

+ def __init__(self, common_builder):

+ task_manager.Builder.__init__(self,

+ common_builder.output_directory,

+ common_builder.output_subdirectory)

+ self._common_builder = common_builder

+ self._patched_wpr_task = None

+ self._reference_cache_task = None

+ self._trace_from_grabbing_reference_cache = None

+ self._subresources_for_urls_task = None

+ self._PopulateCommonPipelines()

+ def _PopulateCommonPipelines(self):

+ """Creates necessary tasks to produce initial cache archive.

+ Also creates a task for producing a json file with a mapping of URLs to

+ subresources (urls-resources.json).

+ Here is the full dependency tree for the returned task:

+ common/patched-cache-validation.log

+ depends on: common/patched-cache.zip

+ depends on: common/original-cache.zip

+ depends on: common/webpages-patched.wpr

+ depends on: common/webpages.wpr

+ depends on: common/urls-resources.json

+ depends on: common/original-cache.zip

+ """

+ @self.RegisterTask('common/webpages-patched.wpr',

+ dependencies=[self._common_builder.original_wpr_task])

+ def BuildPatchedWpr():

+ common_util.EnsureParentDirectoryExists(BuildPatchedWpr.path)

+ shutil.copyfile(

+ self._common_builder.original_wpr_task.path, BuildPatchedWpr.path)

+ _PatchWpr(BuildPatchedWpr.path)

+ @self.RegisterTask('common/original-cache.zip', [BuildPatchedWpr])

+ def BuildOriginalCache():

+ runner = self._common_builder.CreateSandwichRunner()

+ runner.wpr_archive_path = BuildPatchedWpr.path

+ runner.cache_archive_path = BuildOriginalCache.path

+ runner.cache_operation = sandwich_runner.CacheOperation.SAVE

+ runner.output_dir = BuildOriginalCache.run_path

+ runner.Run()

+ BuildOriginalCache.run_path = BuildOriginalCache.path[:-4] + '-run'

+ original_cache_trace_path = os.path.join(

+ BuildOriginalCache.run_path, '0', sandwich_runner.TRACE_FILENAME)

+ @self.RegisterTask('common/patched-cache.zip', [BuildOriginalCache])

+ def BuildPatchedCache():

+ _PatchCacheArchive(BuildOriginalCache.path,

+ original_cache_trace_path, BuildPatchedCache.path)

+ @self.RegisterTask('common/subresources-for-urls.json',

+ [BuildOriginalCache])

+ def ListUrlsResources():

+ url_resources = _ReadSubresourceFromRunnerOutputDir(

+ BuildOriginalCache.run_path)

+ with open(ListUrlsResources.path, 'w') as output:

+ json.dump(url_resources, output)

+ @self.RegisterTask('common/patched-cache-validation.log',

+ [BuildPatchedCache])

+ def ValidatePatchedCache():

+ handler = logging.FileHandler(ValidatePatchedCache.path)

+ logging.getLogger().addHandler(handler)

+ try:

+ _ValidateCacheArchiveContent(

+ original_cache_trace_path, BuildPatchedCache.path)

+ finally:

+ logging.getLogger().removeHandler(handler)

+ self._patched_wpr_task = BuildPatchedWpr

+ self._trace_from_grabbing_reference_cache = original_cache_trace_path

+ self._reference_cache_task = BuildPatchedCache

+ self._subresources_for_urls_task = ListUrlsResources

+ self._common_builder.default_final_tasks.append(ValidatePatchedCache)

+ def PopulateLoadBenchmark(self, subresource_discoverer,

+ transformer_list_name, transformer_list):

+ """Populate benchmarking tasks from its setup tasks.

+ Args:

+ subresource_discoverer: Name of a subresources discoverer.

+ transformer_list_name: A string describing the transformers, will be used

+ in Task names (prefer names without spaces and special characters).

+ transformer_list: An ordered list of function that takes an instance of

+ SandwichRunner as parameter, would be applied immediately before

+ SandwichRunner.Run() in the given order.

+ Here is the full dependency of the added tree for the returned task:

+ <transformer_list_name>/<subresource_discoverer>-metrics.csv

+ depends on: <transformer_list_name>/<subresource_discoverer>-run/

+ depends on: common/<subresource_discoverer>-cache.zip

+ depends on: some tasks saved by PopulateCommonPipelines()

+ depends on: common/<subresource_discoverer>-setup.json

+ depends on: some tasks saved by PopulateCommonPipelines()

+ """

+ additional_column_names = [

+ 'url',

+ 'repeat_id',

+ 'subresource_discoverer',

+ 'subresource_count',

+ # The amount of subresources detected at SetupBenchmark step.

+ 'subresource_count_theoretic',

+ # Amount of subresources for caching as suggested by the subresource

+ # discoverer.

+ 'cached_subresource_count_theoretic',

+ 'cached_subresource_count']

+ assert subresource_discoverer in SUBRESOURCE_DISCOVERERS

+ assert 'common' not in SUBRESOURCE_DISCOVERERS

+ shared_task_prefix = os.path.join('common', subresource_discoverer)

+ task_prefix = os.path.join(transformer_list_name, subresource_discoverer)

+ @self.RegisterTask(shared_task_prefix + '-setup.json', merge=True,

+ dependencies=[self._subresources_for_urls_task])

+ def SetupBenchmark():

+ whitelisted_urls = _ExtractDiscoverableUrls(

+ self._trace_from_grabbing_reference_cache, subresource_discoverer)

+ url_resources = json.load(open(self._subresources_for_urls_task.path))

+ common_util.EnsureParentDirectoryExists(SetupBenchmark.path)

+ with open(SetupBenchmark.path, 'w') as output:

+ json.dump({

+ 'cache_whitelist': [url for url in whitelisted_urls],

+ 'subresource_discoverer': subresource_discoverer,

+ 'url_resources': url_resources,

+ }, output)

+ @self.RegisterTask(shared_task_prefix + '-cache.zip', merge=True,

+ dependencies=[

+ SetupBenchmark, self._reference_cache_task])

+ def BuildBenchmarkCacheArchive():

+ setup = json.load(open(SetupBenchmark.path))

+ chrome_cache.ApplyUrlWhitelistToCacheArchive(

+ cache_archive_path=self._reference_cache_task.path,

+ whitelisted_urls=setup['cache_whitelist'],

+ output_cache_archive_path=BuildBenchmarkCacheArchive.path)

+ @self.RegisterTask(task_prefix + '-run/',

+ dependencies=[BuildBenchmarkCacheArchive])

+ def RunBenchmark():

+ runner = self._common_builder.CreateSandwichRunner()

+ for transformer in transformer_list:

+ transformer(runner)

+ runner.wpr_archive_path = self._patched_wpr_task.path

+ runner.wpr_out_log_path = os.path.join(

+ RunBenchmark.path, sandwich_runner.WPR_LOG_FILENAME)

+ runner.cache_archive_path = BuildBenchmarkCacheArchive.path

+ runner.cache_operation = sandwich_runner.CacheOperation.PUSH

+ runner.output_dir = RunBenchmark.path

+ runner.Run()

+ @self.RegisterTask(task_prefix + '-metrics.csv',

+ dependencies=[RunBenchmark])

+ def ExtractMetrics():

+ # TODO(gabadie): Performance improvement: load each trace only once and

+ # use it for validation and extraction of metrics later.

+ _VerifyBenchmarkOutputDirectory(SetupBenchmark.path, RunBenchmark.path)

+ benchmark_setup = json.load(open(SetupBenchmark.path))

+ run_metrics_list = []

+ for repeat_id, repeat_dir in sandwich_runner.WalkRepeatedRuns(

+ RunBenchmark.path):

+ trace_path = os.path.join(repeat_dir, sandwich_runner.TRACE_FILENAME)

+ logging.info('processing trace: %s', trace_path)

+ trace = loading_trace.LoadingTrace.FromJsonFile(trace_path)

+ run_metrics = {

+ 'url': trace.url,

+ 'repeat_id': repeat_id,

+ 'subresource_discoverer': benchmark_setup['subresource_discoverer'],

+ 'subresource_count': len(_ListUrlRequests(

+ trace, _RequestOutcome.All)),

+ 'subresource_count_theoretic':

+ len(benchmark_setup['url_resources']),

+ 'cached_subresource_count': len(_ListUrlRequests(

+ trace, _RequestOutcome.ServedFromCache)),

+ 'cached_subresource_count_theoretic':

+ len(benchmark_setup['cache_whitelist']),

+ }

+ run_metrics.update(

+ sandwich_metrics.ExtractCommonMetricsFromRepeatDirectory(

+ repeat_dir, trace))

+ run_metrics_list.append(run_metrics)

+ run_metrics_list.sort(key=lambda e: e['repeat_id'])

+ with open(ExtractMetrics.path, 'w') as csv_file:

+ writer = csv.DictWriter(csv_file, fieldnames=(additional_column_names +

+ sandwich_metrics.COMMON_CSV_COLUMN_NAMES))

+ writer.writeheader()

+ for trace_metrics in run_metrics_list:

+ writer.writerow(trace_metrics)

+ self._common_builder.default_final_tasks.append(ExtractMetrics)

« no previous file with comments | « tools/android/loading/sandwich_misc_unittest.py ('k') | tools/android/loading/sandwich_prefetch_unittest.py » ('j') | no next file with comments »