tracing/tracing/metrics/compare_samples.html - Issue 2089833002: Entry point for bisect sample comparison.

Side by Side Diff: tracing/tracing/metrics/compare_samples.html

Issue 2089833002: Entry point for bisect sample comparison. (Closed) Base URL: https://github.com/catapult-project/catapult.git@mann

Patch Set: Removed stray line. Created 4 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 <!DOCTYPE html>

	2 <!--

	3 Copyright 2016 The Chromium Authors. All rights reserved.

	4 Use of this source code is governed by a BSD-style license that can be

	5 found in the LICENSE file.

	6 -->

	7

	8 <link rel="import" href="/tracing/base/iteration_helpers.html">

	9 <link rel="import" href="/tracing/base/statistics.html">

	10 <link rel="import" href="/tracing/base/xhr.html">

	11 <link rel="import" href="/tracing/value/value_set.html">

	12

	13 <script>

	14 'use strict';

	15

	16 tr.exportTo('tr.metrics', function() {
	nednguyen 2016/09/12 17:13:32 You also don't need this export. You also don't need this export. RobertoCN 2016/09/19 20:07:43 Done. Show quoted text On 2016/09/12 17:13:32, nednguyen wrote: > You also don't need this export. Done. nednguyen 2016/09/19 20:14:41 Did you forget to upload your new patch? Show quoted text On 2016/09/19 20:07:43, RobertoCN wrote: > On 2016/09/12 17:13:32, nednguyen wrote: > > You also don't need this export. > > Done. Did you forget to upload your new patch?
	17 var escapeChars = s => s.replace(/[\:\|=\/#&,]/g, '_');

	18

	19 function findUnescapedKey(escaped, d) {

	20 for (var k of tr.b.dictionaryKeys(d))

	21 if (escapeChars(k) === escaped)

	22 return k;

	23 throw new Error('did not find key ' + escaped + ' in ' +

	24 tr.b.dictionaryKeys(d));

	25 }

	26

	27 function geoMeanFromHistogram(h) {

	28 if (!h.hasOwnProperty('buckets'))

	29 return 0.0;

	30 var count = 0;

	31 var sumOfLogs = 0;

	32 for (var bucket of h.buckets) {

	33 if (bucket.hasOwnProperty('high'))

	34 bucket.mean = (bucket.low + bucket.high) / 2.0;

	35 else

	36 bucket.mean = bucket.low;

	37

	38 if (bucket.mean > 0) {

	39 sumOfLogs += Math.log(bucket.mean) * bucket.count;

	40 count += bucket.count;

	41 }

	42 }

	43 if (count === 0)

	44 return 0.0;

	45 return Math.exp(sumOfLogs / count);

	46 }

	47

	48 function splitMetric(metricName) {

	49 var parts = metricName.split('/');

	50 var interactionName;

	51 var traceName = 'summary';

	52 var chartName = parts[0];

	53 if (parts.length === 3) {

	54 // parts[1] is the interactionName

	55 if (parts[1])

	56 chartName = parts[1] + '@@' + chartName;

	57 traceName = parts[2];

	58 } else if (parts.length === 2) {

	59 if (chartName !== parts[1])

	60 traceName = parts[1];

	61 } else

	62 throw new Error('Could not parse metric name.');

	63 return [chartName, traceName];

	64 }

	65

	66 function valuesFromCharts(listOfCharts, metricName) {

	67 var all_values = [];

	68 var chartAndTrace = splitMetric(metricName);

	69 for (var charts of listOfCharts) {

	70 var chartName = findUnescapedKey(chartAndTrace[0], charts.charts);

	71 if (chartName) {

	72 var traceName = findUnescapedKey(

	73 chartAndTrace[1], charts.charts[chartName]);

	74 if (traceName) {

	75 if (charts.charts[chartName][traceName].type ===

	76 'list_of_scalar_values')

	77 all_values.push(...charts.charts[chartName][traceName].values);

	78 if (charts.charts[chartName][traceName].type === 'histogram')

	79 all_values.push(

	80 geoMeanFromHistogram(charts.charts[chartName][traceName]));

	81 }

	82 }

	83 }

	84 return all_values;

	85 }

	86

	87 function rawValuesByMetricName(valueSet, metricName) {

	88 var interactionRecord, valueName, story;

	89 var metricNameParts = metricName.split('/');

	90 if (metricNameParts[0] === metricNameParts[1])

	91 story = 'summary';

	92 else

	93 story = metricNameParts[1];

	94 var chartNameParts = metricNameParts[0].split('-');

	95 valueName = chartNameParts[1];

	96 if (chartNameParts.length === 2)

	97 interactionRecord = chartNameParts[0];

	98 var values = valueSet.getValuesWithName(valueName);

	99 if (!values \|\| values.length === 0) {

	100 // If there was a dash in the chart name, but it wasn't an

	101 // interaction record.

	102 valueName = metricNameParts[0];

	103 values = valueSet.getValuesWithName(valueName);

	104 interactionRecord = undefined;

	105 if (!values \|\| values.length === 0)

	106 throw new Error('No values with name ' + valueName);

	107 }

	108 var filtered = [];

	109 for (var value of values) {

	110 if (value.name !== valueName)

	111 continue;

	112 var ii = tr.v.d.IterationInfo.getFromValue(value);

	113 if (interactionRecord) {

	114 var IRParts = [];

	115 var keys = Object.keys(ii.storyGroupingKeys);

	116 keys.sort();

	117 for (var key of keys)

	118 IRParts.push(ii.storyGroupingKeys[key]);

	119 if (interactionRecord === IRParts.join('_') &&

	120 escapeChars(ii.storyDisplayName) ===

	121 escapeChars(story))

	122 filtered.push(value);

	123 } else if (escapeChars(ii.storyDisplayName) ===

	124 escapeChars(story))

	125 filtered.push(value);

	126 }

	127

	128 var rawValues = [];

	129 for (var val of filtered) {

	130 if (val.numeric instanceof tr.v.Numeric)

	131 rawValues = rawValues.concat(val.numeric.sampleValues);

	132 else if (val.numeric instanceof tr.v.ScalarNumeric)

	133 rawValues.push(val.numeric.value);

	134 }

	135 return rawValues;

	136 }

	137

	138 function parseFiles(files) {

	139 var results = [];

	140 for (var path of files) {

	141 try {

	142 var current = tr.b.getSync('file://' + path);

	143 } catch (ex) {

	144 var err = new Error('Could not open' + path);

	145 err.name = 'File loading error';

	146 throw err;

	147 }

	148 results.push(JSON.parse(current));

	149 }

	150 return results;

	151 }

	152

	153 var escapeForRegExp = s => s.replace(/[-\/\\^$*+?.()\|[\]{}]/g, '\\$&');

	154

	155 var strFromRE = re => re.toString().split('/')[1];

	156

	157 function valuesFromBuildbotOutput(out, metric) {

	158 if (!out)

	159 return [];

	160 var stringVals = [];

	161 var floatVals = [];

	162 var chartAndTrace = splitMetric(metric);

	163 var metricRE = escapeForRegExp(

	164 'RESULT ' + chartAndTrace[0] + ': ' + chartAndTrace[1] + '=');

	165 var singleResultRE = new RegExp(metricRE +

	166 strFromRE(/\s*([-]?[\d\.]+)/), 'g');

	167 var multiResultsRE = new RegExp(metricRE +

	168 strFromRE(/\s\[\s([\d\., -]+)\s*\]/), 'g');

	169 var meanStdDevRE = new RegExp(metricRE +

	170 strFromRE(/\s\{\s([-]?\d(?:\.\d)?),\s([-]?\d(?:\.\d*)?)\}/), 'g');

	171 for (var line of out.split(/\r?\n/)) {

	172 var singleResultMatch = singleResultRE.exec(line);

	173 var multiResultsMatch = multiResultsRE.exec(line);

	174 var meanStdDevMatch = meanStdDevRE.exec(line);

	175 if (singleResultMatch && singleResultMatch.length > 1)

	176 stringVals.push(singleResultMatch[1]);

	177 else if (multiResultsMatch && multiResultsMatch.length > 1) {

	178 var values = multiResultsMatch[1].split(',');

	179 stringVals = stringVals.concat(values);

	180 } else if (meanStdDevMatch && meanStdDevMatch.length > 1)

	181 stringVals.push(meanStdDevMatch[1]);

	182 }

	183 for (var val of stringVals) {

	184 var f = parseFloat(val);

	185 if (!isNaN(f))

	186 floatVals.push(f);

	187 }

	188 return floatVals;

	189 }

	190

	191 function parseMultipleBuildbotStreams(files, metric) {

	192 var allValues = [];

	193 for (var path of files) {

	194 try {

	195 var contents = tr.b.getSync('file://' + path);

	196 }

	197 catch (ex) {

	198 var err = new Error('Could not open' + path);

	199 err.name = 'File loading error';

	200 throw err;

	201 }

	202 allValues = allValues.concat(valuesFromBuildbotOutput(contents, metric));

	203 }

	204 return allValues;

	205 }

	206

	207 var BisectComparison = {
	dtu 2016/09/15 21:31:56 The word "bisect" isn't used anywhere else. Probab The word "bisect" isn't used anywhere else. Probably rename this function. RobertoCN 2016/09/19 20:07:42 Done. Show quoted text On 2016/09/15 21:31:56, dtu wrote: > The word "bisect" isn't used anywhere else. Probably rename this function. Done.
	208 ENOUGH_SAMPLES: 18,

	209 SIGNIFICANCE_LEVEL: 0.05,
	dtu 2016/09/15 21:31:56 Seems like bisect is currently using 0.01, so I gu Seems like bisect is currently using 0.01, so I guess we're gonna say that results2 will use 0.01 for now, too. RobertoCN 2016/09/19 20:07:43 Done. Show quoted text On 2016/09/15 21:31:56, dtu wrote: > Seems like bisect is currently using 0.01, so I guess we're gonna say that > results2 will use 0.01 for now, too. Done.
	210

	211 compareBuildbotOutputs: function(
	dtu 2016/09/15 21:31:56 I like these composed functions. Very easy to read I like these composed functions. Very easy to read. RobertoCN 2016/09/19 20:07:43 Acknowledged. Show quoted text On 2016/09/15 21:31:56, dtu wrote: > I like these composed functions. Very easy to read. Acknowledged.
	212 buildbotOutputAPathList, buildbotOutputBPathList, metric) {

	213 var aPaths = buildbotOutputAPathList.split(',');

	214 var bPaths = buildbotOutputBPathList.split(',');

	215 var sampleA = parseMultipleBuildbotStreams(aPaths, metric);

	216 var sampleB = parseMultipleBuildbotStreams(bPaths, metric);

	217 return this.compareSamples(sampleA, sampleB);

	218 },

	219

	220 compareValuesets: function(valueSetAPathList, valueSetBPathList, metric) {

	221 var aPaths = valueSetAPathList.split(',');

	222 var bPaths = valueSetBPathList.split(',');

	223 var valueSetA = new tr.v.ValueSet();

	224 var valueSetB = new tr.v.ValueSet();

	225 var dictsA = parseFiles(aPaths);

	226 var dictsB = parseFiles(bPaths);

	227 for (var d of dictsA)

	228 valueSetA.addValuesFromDicts(d);

	229 for (var d of dictsB)

	230 valueSetB.addValuesFromDicts(d);

	231

	232 var sampleA = rawValuesByMetricName(valueSetA, metric);

	233 var sampleB = rawValuesByMetricName(valueSetB, metric);

	234 return this.compareSamples(sampleA, sampleB);

	235 },

	236

	237 compareCharts: function(chartPathListA, chartPathListB, metric) {

	238 var aPaths = chartPathListA.split(',');

	239 var bPaths = chartPathListB.split(',');

	240 var chartsA = parseFiles(aPaths);

	241 var chartsB = parseFiles(bPaths);

	242 var sampleA = valuesFromCharts(chartsA, metric);

	243 var sampleB = valuesFromCharts(chartsB, metric);

	244 return this.compareSamples(sampleA, sampleB);

	245 },

	246

	247 compareSamples: function(sampleA, sampleB) {

	248 var pValue = tr.b.Statistics.mwu.test(sampleA, sampleB);

	249 // Diagnostics

	250 var summaryStats = sample => ({

	251 std_dev: tr.b.Statistics.stddev(sample),

	252 mean: tr.b.Statistics.mean(sample),
	dtu 2016/09/15 21:35:41 Question for Ben and Ethan: are you going to be us Question for Ben and Ethan: are you going to be using these summary stats? From the bisect side it looks weird, because we're plumbing these summary statistics through bisect, when it's easier to store only the raw values and calculate statistics as needed for display in the UI. And what if we decide to pick different statistics? e.g. z-score, relative/absolute deltas, etc. benjhayden 2016/09/21 06:33:25 No, I don't think that results2.html will be using Show quoted text On 2016/09/15 at 21:35:41, dtu wrote: > Question for Ben and Ethan: are you going to be using these summary stats? From the bisect side it looks weird, because we're plumbing these summary statistics through bisect, when it's easier to store only the raw values and calculate statistics as needed for display in the UI. And what if we decide to pick different statistics? e.g. z-score, relative/absolute deltas, etc. No, I don't think that results2.html will be using these. As you say, they will probably be computed by the UI like histogram_span is doing in this CL: https://codereview.chromium.org/2341623002
	253 debug_values: sample

	254 });

	255 var result = {

	256 sample_a: summaryStats(sampleA),

	257 sample_b: summaryStats(sampleB),

	258 pValue: pValue.p,

	259 UStatistic: pValue.U,
	dtu 2016/09/15 21:31:56 Why is the U-statistic useful? Why is the U-statistic useful? RobertoCN 2016/09/19 20:07:42 Maybe it's not. I decided to surface it because it Show quoted text On 2016/09/15 21:31:56, dtu wrote: > Why is the U-statistic useful? Maybe it's not. I decided to surface it because it intuitively makes more sense to me than the p value alone. If you compare two samples of size 10 U statistic will be some int between 0 and (10*10) [or half of that depending on sidedness, I guess] where 0 is clearly no overlap between the samples, and 50 is a perfect tie. This seems a little useful when looking at 'purportedly inaccurate' bisects.
	260 result: 'needMoreData',

	261 };

	262 if (pValue.p < this.SIGNIFICANCE_LEVEL)

	263 result.result = true; // Reject the null
	dtu 2016/09/15 21:31:56 Don't mix types. Make them all string constants, I Don't mix types. Make them all string constants, I guess. RobertoCN 2016/09/19 20:07:43 Done. Show quoted text On 2016/09/15 21:31:56, dtu wrote: > Don't mix types. Make them all string constants, I guess. Done.
	264 else if (sampleA.length > this.ENOUGH_SAMPLES &&

	265 sampleB.length > this.ENOUGH_SAMPLES)

	266 result.result = false; // Fail to reject the null.

	267 return result;

	268 }

	269 };

	270

	271 return {

	272 BisectComparison: BisectComparison

	273 };

	274 });

	275 </script>

OLD	NEW

« no previous file with comments | « tracing/bin/compare_samples ('k') | tracing/tracing/metrics/compare_samples.py » ('j') | tracing/tracing/metrics/compare_samples.py » ('J')