tools/perf/metrics/chrome_proxy.py - Issue 191383003: First cut of chrome-proxy (data reduction proxy) measurements.

Side by Side Diff: tools/perf/metrics/chrome_proxy.py

Issue 191383003: First cut of chrome-proxy (data reduction proxy) measurements. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: . Created 6 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« tools/perf/measurements/chrome_proxy.py ('K') | « tools/perf/measurements/chrome_proxy.py ('k') | tools/perf/page_sets/chrome_proxy/bypass.json » ('j') | tools/perf/page_sets/chrome_proxy/bypass.json » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
(Empty)
	1 # Copyright 2013 The Chromium Authors. All rights reserved.
	tonyg 2014/03/12 02:08:37 2014 2014 bolian 2014/03/19 00:20:03 Done. Show quoted text On 2014/03/12 02:08:37, tonyg wrote: > 2014 Done.
	2 # Use of this source code is governed by a BSD-style license that can be

	3 # found in the LICENSE file.

	4 import base64

	5 import logging
	tonyg 2014/03/12 02:08:37 nit: alphabetize nit: alphabetize bolian 2014/03/19 00:20:03 Done. Show quoted text On 2014/03/12 02:08:37, tonyg wrote: > nit: alphabetize Done.
	6 import gzip

	7 import hashlib

	8

	9 from io import BytesIO

	10 from metrics import Metric

	11 from metrics import loading

	12 from telemetry.page import page_measurement

	13 # All chrome_proxy metrics are Chrome only.
	bengr 2014/03/12 18:17:36 Is there a check somewhere that Chrome is being te Is there a check somewhere that Chrome is being tested? bolian 2014/03/19 00:20:03 No. But an exception will be thrown if a non-Chrom No. But an exception will be thrown if a non-Chrome test uses this metric, because there will be no timeline event returned.
	14 from telemetry.core.backends.chrome import inspector_network

	15 from telemetry.core.timeline import recording_options

	16

	17

	18 class ChromeProxyMetricException(page_measurement.MeasurementFailure):

	19 pass

	20

	21

	22 class ChromeProxyLatency(Metric):
	tonyg 2014/03/12 02:08:37 Let's remove this Metric, fold your new results in Let's remove this Metric, fold your new results into LoadingMetric, make all the result names consistent in that file, and mark all results unimportant except the overall load_time. In case you are wondering, no bots depend on LoadingMetric, so we can change anything there that we want. bolian 2014/03/19 00:20:03 Done. Show quoted text On 2014/03/12 02:08:37, tonyg wrote: > Let's remove this Metric, fold your new results into LoadingMetric, make all the > result names consistent in that file, and mark all results unimportant except > the overall load_time. > > In case you are wondering, no bots depend on LoadingMetric, so we can change > anything there that we want. Done.
	23 """The metrics for page loading latency based on window.performance"""

	24

	25 def __init__(self):

	26 super(ChromeProxyLatency, self).__init__()

	27

	28 def Start(self, page, tab):

	29 raise NotImplementedError()

	30

	31 def Stop(self, page, tab):

	32 raise NotImplementedError()

	33

	34 def AddResults(self, tab, results):

	35 loading.LoadingMetric().AddResults(tab, results)

	36

	37 load_timings = tab.EvaluateJavaScript('window.performance.timing')

	38 # Navigation timing segments. Named after corresponding histograms.

	39 # See chrome/renderer/page_load_histograms.cc.

	40 nt_delay_before_fetch = (float(load_timings['fetchStart']) -

	41 load_timings['navigationStart'])

	42 results.Add('nt_delay_before_fetch', 'ms', nt_delay_before_fetch)

	43

	44 nt_delay_before_request = (float(load_timings['requestStart']) -

	45 load_timings['navigationStart'])

	46 results.Add('nt_delay_before_request', 'ms', nt_delay_before_request)

	47

	48 nt_domain_lookup = (float(load_timings['domainLookupEnd']) -

	49 load_timings['domainLookupStart'])

	50 results.Add('nt_domain_lookup', 'ms', nt_domain_lookup)

	51

	52 nt_connect = (float(load_timings['connectEnd']) -

	53 load_timings['connectStart'])

	54 results.Add('nt_connect', 'ms', nt_connect)

	55

	56 nt_request = (float(load_timings['responseStart']) -

	57 load_timings['requestStart'])

	58 results.Add('nt_request', 'ms', nt_request)

	59

	60 nt_response = (float(load_timings['responseEnd']) -

	61 load_timings['responseStart'])

	62 results.Add('nt_response', 'ms', nt_response)

	63

	64

	65 _CHROME_PROXY_VIA_HEADER = 'Chrome-Compression-Proxy'

	66 _CHROME_PROXY_VIA_HEADER_OLD = '1.1 Chrome Compression Proxy'

	67

	68

	69 class ChromeProxyResponse(object):

	70 """ Represents an HTTP response from a timeleine event."""

	71 def __init__(self, event):

	72 self._response = (

	73 inspector_network.InspectorNetworkResponseData.FromTimelineEvent(event))

	74 self._content_length = None

	75

	76 @property

	77 def response(self):

	78 return self._response

	79

	80 @property

	81 def url_signature(self):

	82 return hashlib.md5(self.response.url).hexdigest()

	83

	84 @property

	85 def content_length(self):

	86 if self._content_length is None:

	87 self._content_length = self.GetContentLength(self.response)

	88 return self._content_length

	89

	90 @property

	91 def has_original_content_length(self):

	92 return 'X-Original-Content-Length' in self.response.headers

	93

	94 @property

	95 def original_content_length(self):

	96 if self.has_original_content_length:

	97 return int(self.response.GetHeader('X-Original-Content-Length'))

	98 return 0

	99

	100 @property

	101 def data_saving_rate(self):

	102 if (not self.has_original_content_length or

	103 self.original_content_length <= 0):

	104 return 0.0

	105 return (float(self.original_content_length - self.content_length) /

	106 self.original_content_length)

	107

	108 @staticmethod

	109 def GetGizppedBodyLength(body):

	110 if not body:

	111 return 0

	112 bio = BytesIO()

	113 try:

	114 with gzip.GzipFile(fileobj=bio, mode="wb") as f:

	115 f.write(body.encode('utf-8'))

	116 except Exception, e:

	117 logging.warning('Fail to gzip response body: %s', e)

	118 raise e

	119 return len(bio.getvalue())

	120

	121 @staticmethod

	122 def ShouldGzipContent(content_type):

	123 """Returns True if we need to gzip the content."""

	124 if not content_type:

	125 return False

	126 if 'text/' in content_type:

	127 return True

	128 if ('application/' in content_type and

	129 ('javascript' in content_type or 'json' in content_type)):

	130 return True

	131 return False

	132

	133 @staticmethod

	134 def GetContentLengthFromBody(body, base64_encoded, content_type):

	135 if not body:

	136 return 0

	137 if base64_encoded:

	138 decoded = base64.b64decode(body)
	bengr 2014/03/12 18:48:27 Why do you decode a base64 encoded body? Why do you decode a base64 encoded body? bolian 2014/03/19 00:20:03 Added comments for that. The binary body (like tha Added comments for that. The binary body (like that of an image) that Telemetry got is base64 encoded, I need to get the actual length.
	139 return len(decoded)

	140 else:

	141 # Use gzipped content length if we can gzip the body based on

	142 # Content-Type and the gzipped length is less than body length.

	143 if ChromeProxyResponse.ShouldGzipContent(content_type):
	bengr 2014/03/12 18:48:27 I don't understand. What if it is possible to gzip I don't understand. What if it is possible to gzip the resource, but the origin doesn't gzip it? bolian 2014/03/19 00:20:03 Fixed and added comments. The logic now is that if Fixed and added comments. The logic now is that if the headers say it is compressed, compress it. The reason is that the body Telemetry got is always decompressed.
	144 gzipped = ChromeProxyResponse.GetGizppedBodyLength(body)

	145 return gzipped if gzipped <= len(body) else len(body)

	146 else:

	147 return len(body)

	148

	149 @staticmethod

	150 def GetContentLength(resp):

	151 cl = 0

	152 body, base64_encoded = resp.GetBody()

	153 try:

	154 cl = ChromeProxyResponse.GetContentLengthFromBody(

	155 body, base64_encoded, resp.GetHeader('Content-Type'))

	156 except Exception, e:

	157 logging.warning('Fail to get content length for %s from body: %s',

	158 resp.url[:100], e)

	159 cl_header = resp.GetHeader('Content-Length')

	160 if cl_header:

	161 cl = int(cl_header)

	162 elif body:

	163 cl = len(body)

	164 return cl

	165

	166 @staticmethod

	167 def ShouldHaveChromeProxyViaHeader(resp):

	168 # Ignore https and data url

	169 if resp.url.startswith('https') or resp.url.startswith('data:'):
	bengr 2014/03/12 18:48:27 This reminds me. We should have an integration tes This reminds me. We should have an integration test that verifies that the chrome proxy isn't bypassed when a data url or resource from cache is loaded that doesn't have the via header. bolian 2014/03/19 00:20:03 I think cached resource has Via header and "data" I think cached resource has Via header and "data" url may not go through via header checking. I will add tests for them in next batch.
	170 return False

	171 # Ignore 304 Not Modified.

	172 if resp.status == 304:

	173 return False

	174 return True

	175

	176 @staticmethod

	177 def HasChromeProxyViaHeader(resp):

	178 via_header = resp.GetHeader('Via')

	179 if not via_header:

	180 return False

	181 vias = [v.strip(' ') for v in via_header.split(',')]

	182 # The Via header is valid if it is the old format or the new format

	183 # with 4-character version prefix, for example,

	184 # "1.1 Chrome-Compression-Proxy".

	185 return (_CHROME_PROXY_VIA_HEADER_OLD in vias or

	186 any(v[4:] == _CHROME_PROXY_VIA_HEADER for v in vias))

	187

	188 def IsValidByViaHeader(self):
	bengr 2014/03/12 18:48:27 I don't know why you need a function like this one I don't know why you need a function like this one. We should test that Chrome does the right thing in the presence/absence of the via header, not that the response has a via header. I guess you could use this as a proxy that Chrome is doing the right thing. E.g., if you load a page with 5 resources and you expect all 5 to be proxied, then you should see the header 5 times. bolian 2014/03/19 00:20:03 Yes, I am using this to tell whether Chrome proxy Yes, I am using this to tell whether Chrome proxy is used. On the other hand, since we reply so heavily on the Via header, we want to have specific test for that. For example, run a set of pages 10 times and all responses have Via header.
	189 return (not self.ShouldHaveChromeProxyViaHeader(self.response) or

	190 self.HasChromeProxyViaHeader(self.response))

	191

	192 def IsSafebrowsingResponse(self):

	193 if (self.response.status == 307 and

	194 self.response.GetHeader('X-Malware-Url') == '1' and

	195 self.IsValidByViaHeader() and

	196 self.response.GetHeader('Location') == self.response.url):

	197 return True

	198 return False

	199

	200

	201 class ChromeProxyTimelineMetrics(Metric):

	202 """A Chrome proxy timeline metric."""

	203

	204 def __init__(self):

	205 super(ChromeProxyTimelineMetrics, self).__init__()

	206

	207 # Whether to add detailed result for each sub-resource in a page.

	208 self.add_result_for_resource = False

	209 self._events = None

	210

	211 def Start(self, page, tab):

	212 self._events = None

	213 opts = recording_options.TimelineRecordingOptions()

	214 opts.record_network = True

	215 tab.StartTimelineRecording(opts)

	216

	217 def Stop(self, page, tab):

	218 assert self._events is None

	219 tab.StopTimelineRecording()

	220

	221 def AddResults(self, tab, results):

	222 raise NotImplementedError

	223

	224 def IterResponses(self, tab):

	225 if self._events is None:

	226 self._events = tab.timeline_model.GetAllEventsOfName('HTTPResponse')

	227 if len(self._events) == 0:

	228 return

	229 for e in self._events:

	230 yield ChromeProxyResponse(e)

	231

	232 def AddResultsForDataSaving(self, tab, results):

	233 resources_via_proxy = 0

	234 resources_from_cache = 0

	235 resources_other = 0

	236 content_length = 0

	237 original_content_length = 0

	238

	239 for resp in self.IterResponses(tab):

	240 if resp.response.served_from_cache:

	241 resources_from_cache += 1

	242 continue

	243 if ChromeProxyResponse.HasChromeProxyViaHeader(resp.response):

	244 resources_via_proxy += 1

	245 else:

	246 resources_other += 1

	247

	248 resource = resp.response.url

	249 resource_signature = resp.url_signature

	250 cl = resp.content_length

	251 if resp.has_original_content_length:

	252 ocl = resp.original_content_length

	253 if ocl < cl:

	254 logging.warning('original content length (%d) is less than content '

	255 'lenght(%d) for resource %s', ocl, cl, resource)

	256 if self.add_result_for_resource:

	257 results.Add('resource_data_saving_' + resource_signature,

	258 'percent', resp.data_saving_rate * 100)

	259 results.Add('resource_original_content_length_' + resource_signature,

	260 'bytes', ocl)

	261 original_content_length += ocl

	262 else:

	263 original_content_length += cl

	264 if self.add_result_for_resource:

	265 results.Add('resource_content_length_' + resource_signature,

	266 'bytes', cl)

	267 content_length += cl

	268

	269 results.Add('resources_via_proxy', 'count', resources_via_proxy)

	270 results.Add('resources_from_cache', 'count', resources_from_cache)

	271 results.Add('resources_other', 'count', resources_other)

	272 results.Add('content_length', 'bytes', content_length)

	273 results.Add('original_content_length', 'bytes', original_content_length)
	tonyg 2014/03/12 02:08:37 Let's generalize this a bit and have a Network(Met Let's generalize this a bit and have a Network(Metric) which reports resource_from_cache, resources_other and content_length. Then if a proxy is detected it will also report the savings. bolian 2014/03/19 00:20:03 Done. Added a new network metric. Done. Added a new network metric.
	274 if (original_content_length > 0 and

	275 original_content_length >= content_length):

	276 saving = (float(original_content_length-content_length) /

	277 original_content_length * 100)

	278 results.Add('data_saving', 'percent', saving)

	279 else:

	280 results.Add('data_saving', 'percent', 0.0)

	281

	282 def AddResultsForHeaderValidation(self, tab, results):

	283 via_count = 0

	284 for resp in self.IterResponses(tab):

	285 if resp.IsValidByViaHeader():

	286 via_count += 1

	287 else:

	288 r = resp.response

	289 raise ChromeProxyMetricException, (

	290 '%s: Via header (%s) is not valid (refer=%s, status=%d)' % (

	291 r.url, r.GetHeader('Via'), r.GetHeader('Referer'), r.status))

	292 results.Add('checked_via_header', 'count', via_count)

	293

	294 def AddResultsForBypass(self, tab, results):

	295 bypass_count = 0

	296 for resp in self.IterResponses(tab):

	297 r = resp.response

	298 if ChromeProxyResponse.HasChromeProxyViaHeader(r):

	299 raise ChromeProxyMetricException, (

	300 '%s: Should not have Via header (%s) (refer=%s, status=%d)' % (

	301 r.url, r.GetHeader('Via'), r.GetHeader('Referer'), r.status))

	302 bypass_count += 1

	303 results.Add('bypass_count', 'count', bypass_count)

	304

	305 def AddResultsForSafebrowsing(self, tab, results):

	306 count = 0

	307 safebrowsing_count = 0

	308 for resp in self.IterResponses(tab):

	309 count += 1

	310 if resp.IsSafebrowsingResponse():

	311 safebrowsing_count += 1

	312 else:

	313 r = resp.response

	314 raise ChromeProxyMetricException, (

	315 '%s: Not a valid safe browsing response.\n'

	316 'Reponse: status=(%d, %s)\nHeaders:\n %s' % (

	317 r.url, r.status, r.status_text, r.headers))

	318 if count == safebrowsing_count:

	319 results.Add('safebrowsing', 'boolean', True)

	320 else:

	321 raise ChromeProxyMetricException, (

	322 'Safebrowsing failed (count=%d, safebrowsing_count=%d)\n' % (

	323 count, safebrowsing_count))

OLD	NEW