tools/perf/metrics/chrome_proxy.py - Issue 191383003: First cut of chrome-proxy (data reduction proxy) measurements.

Unified Diff: tools/perf/metrics/chrome_proxy.py

Issue 191383003: First cut of chrome-proxy (data reduction proxy) measurements. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: . Created 6 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« tools/perf/measurements/chrome_proxy.py ('K') | « tools/perf/measurements/chrome_proxy.py ('k') | tools/perf/page_sets/chrome_proxy/bypass.json » ('j') | tools/perf/page_sets/chrome_proxy/bypass.json » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: tools/perf/metrics/chrome_proxy.py

diff --git a/tools/perf/metrics/chrome_proxy.py b/tools/perf/metrics/chrome_proxy.py

new file mode 100644

index 0000000000000000000000000000000000000000..365e3465bacdfb3ed68bd049048ee91949bd0ed4

--- /dev/null

+++ b/tools/perf/metrics/chrome_proxy.py

@@ -0,0 +1,323 @@

tonyg 2014/03/12 02:08:37 2014

bolian 2014/03/19 00:20:03 Done.

+# Use of this source code is governed by a BSD-style license that can be

+# found in the LICENSE file.

+import base64

+import logging

tonyg 2014/03/12 02:08:37 nit: alphabetize

bolian 2014/03/19 00:20:03 Done.

+import gzip

+import hashlib

+from io import BytesIO

+from metrics import Metric

+from metrics import loading

+from telemetry.page import page_measurement

+# All chrome_proxy metrics are Chrome only.

bengr 2014/03/12 18:17:36 Is there a check somewhere that Chrome is being te

bolian 2014/03/19 00:20:03 No. But an exception will be thrown if a non-Chrom

+from telemetry.core.backends.chrome import inspector_network

+from telemetry.core.timeline import recording_options

+class ChromeProxyMetricException(page_measurement.MeasurementFailure):

+ pass

+class ChromeProxyLatency(Metric):

tonyg 2014/03/12 02:08:37 Let's remove this Metric, fold your new results in

bolian 2014/03/19 00:20:03 Done.

+ """The metrics for page loading latency based on window.performance"""

+ def __init__(self):

+ super(ChromeProxyLatency, self).__init__()

+ def Start(self, page, tab):

+ raise NotImplementedError()

+ def Stop(self, page, tab):

+ raise NotImplementedError()

+ def AddResults(self, tab, results):

+ loading.LoadingMetric().AddResults(tab, results)

+ load_timings = tab.EvaluateJavaScript('window.performance.timing')

+ # Navigation timing segments. Named after corresponding histograms.

+ # See chrome/renderer/page_load_histograms.cc.

+ nt_delay_before_fetch = (float(load_timings['fetchStart']) -

+ load_timings['navigationStart'])

+ results.Add('nt_delay_before_fetch', 'ms', nt_delay_before_fetch)

+ nt_delay_before_request = (float(load_timings['requestStart']) -

+ load_timings['navigationStart'])

+ results.Add('nt_delay_before_request', 'ms', nt_delay_before_request)

+ nt_domain_lookup = (float(load_timings['domainLookupEnd']) -

+ load_timings['domainLookupStart'])

+ results.Add('nt_domain_lookup', 'ms', nt_domain_lookup)

+ nt_connect = (float(load_timings['connectEnd']) -

+ load_timings['connectStart'])

+ results.Add('nt_connect', 'ms', nt_connect)

+ nt_request = (float(load_timings['responseStart']) -

+ load_timings['requestStart'])

+ results.Add('nt_request', 'ms', nt_request)

+ nt_response = (float(load_timings['responseEnd']) -

+ load_timings['responseStart'])

+ results.Add('nt_response', 'ms', nt_response)

+_CHROME_PROXY_VIA_HEADER = 'Chrome-Compression-Proxy'

+_CHROME_PROXY_VIA_HEADER_OLD = '1.1 Chrome Compression Proxy'

+class ChromeProxyResponse(object):

+ """ Represents an HTTP response from a timeleine event."""

+ def __init__(self, event):

+ self._response = (

+ inspector_network.InspectorNetworkResponseData.FromTimelineEvent(event))

+ self._content_length = None

+ @property

+ def response(self):

+ return self._response

+ @property

+ def url_signature(self):

+ return hashlib.md5(self.response.url).hexdigest()

+ @property

+ def content_length(self):

+ if self._content_length is None:

+ self._content_length = self.GetContentLength(self.response)

+ return self._content_length

+ @property

+ def has_original_content_length(self):

+ return 'X-Original-Content-Length' in self.response.headers

+ @property

+ def original_content_length(self):

+ if self.has_original_content_length:

+ return int(self.response.GetHeader('X-Original-Content-Length'))

+ return 0

+ @property

+ def data_saving_rate(self):

+ if (not self.has_original_content_length or

+ self.original_content_length <= 0):

+ return 0.0

+ return (float(self.original_content_length - self.content_length) /

+ self.original_content_length)

+ @staticmethod

+ def GetGizppedBodyLength(body):

+ if not body:

+ return 0

+ bio = BytesIO()

+ try:

+ with gzip.GzipFile(fileobj=bio, mode="wb") as f:

+ f.write(body.encode('utf-8'))

+ except Exception, e:

+ logging.warning('Fail to gzip response body: %s', e)

+ raise e

+ return len(bio.getvalue())

+ @staticmethod

+ def ShouldGzipContent(content_type):

+ """Returns True if we need to gzip the content."""

+ if not content_type:

+ return False

+ if 'text/' in content_type:

+ return True

+ if ('application/' in content_type and

+ ('javascript' in content_type or 'json' in content_type)):

+ return True

+ return False

+ @staticmethod

+ def GetContentLengthFromBody(body, base64_encoded, content_type):

+ if not body:

+ return 0

+ if base64_encoded:

+ decoded = base64.b64decode(body)

bengr 2014/03/12 18:48:27 Why do you decode a base64 encoded body?

bolian 2014/03/19 00:20:03 Added comments for that. The binary body (like tha

+ return len(decoded)

+ else:

+ # Use gzipped content length if we can gzip the body based on

+ # Content-Type and the gzipped length is less than body length.

+ if ChromeProxyResponse.ShouldGzipContent(content_type):

bengr 2014/03/12 18:48:27 I don't understand. What if it is possible to gzip

bolian 2014/03/19 00:20:03 Fixed and added comments. The logic now is that if

+ gzipped = ChromeProxyResponse.GetGizppedBodyLength(body)

+ return gzipped if gzipped <= len(body) else len(body)

+ else:

+ return len(body)

+ @staticmethod

+ def GetContentLength(resp):

+ cl = 0

+ body, base64_encoded = resp.GetBody()

+ try:

+ cl = ChromeProxyResponse.GetContentLengthFromBody(

+ body, base64_encoded, resp.GetHeader('Content-Type'))

+ except Exception, e:

+ logging.warning('Fail to get content length for %s from body: %s',

+ resp.url[:100], e)

+ cl_header = resp.GetHeader('Content-Length')

+ if cl_header:

+ cl = int(cl_header)

+ elif body:

+ cl = len(body)

+ return cl

+ @staticmethod

+ def ShouldHaveChromeProxyViaHeader(resp):

+ # Ignore https and data url

+ if resp.url.startswith('https') or resp.url.startswith('data:'):

bengr 2014/03/12 18:48:27 This reminds me. We should have an integration tes

bolian 2014/03/19 00:20:03 I think cached resource has Via header and "data"

+ return False

+ # Ignore 304 Not Modified.

+ if resp.status == 304:

+ return False

+ return True

+ @staticmethod

+ def HasChromeProxyViaHeader(resp):

+ via_header = resp.GetHeader('Via')

+ if not via_header:

+ return False

+ vias = [v.strip(' ') for v in via_header.split(',')]

+ # The Via header is valid if it is the old format or the new format

+ # with 4-character version prefix, for example,

+ # "1.1 Chrome-Compression-Proxy".

+ return (_CHROME_PROXY_VIA_HEADER_OLD in vias or

+ any(v[4:] == _CHROME_PROXY_VIA_HEADER for v in vias))

+ def IsValidByViaHeader(self):

bengr 2014/03/12 18:48:27 I don't know why you need a function like this one

bolian 2014/03/19 00:20:03 Yes, I am using this to tell whether Chrome proxy

+ return (not self.ShouldHaveChromeProxyViaHeader(self.response) or

+ self.HasChromeProxyViaHeader(self.response))

+ def IsSafebrowsingResponse(self):

+ if (self.response.status == 307 and

+ self.response.GetHeader('X-Malware-Url') == '1' and

+ self.IsValidByViaHeader() and

+ self.response.GetHeader('Location') == self.response.url):

+ return True

+ return False

+class ChromeProxyTimelineMetrics(Metric):

+ """A Chrome proxy timeline metric."""

+ def __init__(self):

+ super(ChromeProxyTimelineMetrics, self).__init__()

+ # Whether to add detailed result for each sub-resource in a page.

+ self.add_result_for_resource = False

+ self._events = None

+ def Start(self, page, tab):

+ self._events = None

+ opts = recording_options.TimelineRecordingOptions()

+ opts.record_network = True

+ tab.StartTimelineRecording(opts)

+ def Stop(self, page, tab):

+ assert self._events is None

+ tab.StopTimelineRecording()

+ def AddResults(self, tab, results):

+ raise NotImplementedError

+ def IterResponses(self, tab):

+ if self._events is None:

+ self._events = tab.timeline_model.GetAllEventsOfName('HTTPResponse')

+ if len(self._events) == 0:

+ return

+ for e in self._events:

+ yield ChromeProxyResponse(e)

+ def AddResultsForDataSaving(self, tab, results):

+ resources_via_proxy = 0

+ resources_from_cache = 0

+ resources_other = 0

+ content_length = 0

+ original_content_length = 0

+ for resp in self.IterResponses(tab):

+ if resp.response.served_from_cache:

+ resources_from_cache += 1

+ continue

+ if ChromeProxyResponse.HasChromeProxyViaHeader(resp.response):

+ resources_via_proxy += 1

+ else:

+ resources_other += 1

+ resource = resp.response.url

+ resource_signature = resp.url_signature

+ cl = resp.content_length

+ if resp.has_original_content_length:

+ ocl = resp.original_content_length

+ if ocl < cl:

+ logging.warning('original content length (%d) is less than content '

+ 'lenght(%d) for resource %s', ocl, cl, resource)

+ if self.add_result_for_resource:

+ results.Add('resource_data_saving_' + resource_signature,

+ 'percent', resp.data_saving_rate * 100)

+ results.Add('resource_original_content_length_' + resource_signature,

+ 'bytes', ocl)

+ original_content_length += ocl

+ else:

+ original_content_length += cl

+ if self.add_result_for_resource:

+ results.Add('resource_content_length_' + resource_signature,

+ 'bytes', cl)

+ content_length += cl

+ results.Add('resources_via_proxy', 'count', resources_via_proxy)

+ results.Add('resources_from_cache', 'count', resources_from_cache)

+ results.Add('resources_other', 'count', resources_other)

+ results.Add('content_length', 'bytes', content_length)

+ results.Add('original_content_length', 'bytes', original_content_length)

tonyg 2014/03/12 02:08:37 Let's generalize this a bit and have a Network(Met

bolian 2014/03/19 00:20:03 Done. Added a new network metric.

+ if (original_content_length > 0 and

+ original_content_length >= content_length):

+ saving = (float(original_content_length-content_length) /

+ original_content_length * 100)

+ results.Add('data_saving', 'percent', saving)

+ else:

+ results.Add('data_saving', 'percent', 0.0)

+ def AddResultsForHeaderValidation(self, tab, results):

+ via_count = 0

+ for resp in self.IterResponses(tab):

+ if resp.IsValidByViaHeader():

+ via_count += 1

+ else:

+ r = resp.response

+ raise ChromeProxyMetricException, (

+ '%s: Via header (%s) is not valid (refer=%s, status=%d)' % (

+ r.url, r.GetHeader('Via'), r.GetHeader('Referer'), r.status))

+ results.Add('checked_via_header', 'count', via_count)

+ def AddResultsForBypass(self, tab, results):

+ bypass_count = 0

+ for resp in self.IterResponses(tab):

+ r = resp.response

+ if ChromeProxyResponse.HasChromeProxyViaHeader(r):

+ raise ChromeProxyMetricException, (

+ '%s: Should not have Via header (%s) (refer=%s, status=%d)' % (

+ r.url, r.GetHeader('Via'), r.GetHeader('Referer'), r.status))

+ bypass_count += 1

+ results.Add('bypass_count', 'count', bypass_count)

+ def AddResultsForSafebrowsing(self, tab, results):

+ count = 0

+ safebrowsing_count = 0

+ for resp in self.IterResponses(tab):

+ count += 1

+ if resp.IsSafebrowsingResponse():

+ safebrowsing_count += 1

+ else:

+ r = resp.response

+ raise ChromeProxyMetricException, (

+ '%s: Not a valid safe browsing response.\n'

+ 'Reponse: status=(%d, %s)\nHeaders:\n %s' % (

+ r.url, r.status, r.status_text, r.headers))

+ if count == safebrowsing_count:

+ results.Add('safebrowsing', 'boolean', True)

+ else:

+ raise ChromeProxyMetricException, (

+ 'Safebrowsing failed (count=%d, safebrowsing_count=%d)\n' % (

+ count, safebrowsing_count))