OLD | NEW |
| (Empty) |
1 <!DOCTYPE html> | |
2 <!-- | |
3 Copyright 2014 The Chromium Authors. All rights reserved. | |
4 Use of this source code is governed by a BSD-style license that can be | |
5 found in the LICENSE file. | |
6 --> | |
7 | |
8 <link rel="import" href="/tracing/base/math.html"> | |
9 <link rel="import" href="/tracing/base/range.html"> | |
10 <script src="/mannwhitneyu/mannwhitneyu.js"></script> | |
11 | |
12 <script> | |
13 'use strict'; | |
14 | |
15 // In node, the script-src for mannwhitneyu above brings in mannwhitneyui | |
16 // into a module, instead of into the global scope. Whereas this file | |
17 // assumes that mannwhitneyu is in the global scope. So, in Node only, we | |
18 // require() it in, and then take all its exports and shove them into the | |
19 // global scope by hand. | |
20 (function(global) { | |
21 if (tr.isNode) { | |
22 var mwuAbsPath = HTMLImportsLoader.hrefToAbsolutePath( | |
23 '/mannwhitneyu.js'); | |
24 var mwuModule = require(mwuAbsPath); | |
25 for (var exportName in mwuModule) { | |
26 global[exportName] = mwuModule[exportName]; | |
27 } | |
28 } | |
29 })(this); | |
30 </script> | |
31 | |
32 <script> | |
33 'use strict'; | |
34 | |
35 // TODO(charliea): Remove: | |
36 /* eslint-disable catapult-camelcase */ | |
37 | |
38 tr.exportTo('tr.b', function() { | |
39 var identity = x => x; | |
40 | |
41 var Statistics = {}; | |
42 | |
43 /* Returns the quotient, or zero if the denominator is zero.*/ | |
44 Statistics.divideIfPossibleOrZero = function(numerator, denominator) { | |
45 if (denominator === 0) | |
46 return 0; | |
47 return numerator / denominator; | |
48 }; | |
49 | |
50 Statistics.sum = function(ary, opt_func, opt_this) { | |
51 var func = opt_func || identity; | |
52 var ret = 0; | |
53 var i = 0; | |
54 for (var elt of ary) | |
55 ret += func.call(opt_this, elt, i++); | |
56 return ret; | |
57 }; | |
58 | |
59 Statistics.mean = function(ary, opt_func, opt_this) { | |
60 var func = opt_func || identity; | |
61 var sum = 0; | |
62 var i = 0; | |
63 | |
64 for (var elt of ary) | |
65 sum += func.call(opt_this, elt, i++); | |
66 | |
67 if (i === 0) | |
68 return undefined; | |
69 | |
70 return sum / i; | |
71 }; | |
72 | |
73 Statistics.geometricMean = function(ary, opt_func, opt_this) { | |
74 var func = opt_func || identity; | |
75 var i = 0; | |
76 var logsum = 0; | |
77 | |
78 // The geometric mean is expressed as the arithmetic mean of logarithms | |
79 // in order to prevent overflow. | |
80 for (var elt of ary) { | |
81 var x = func.call(opt_this, elt, i++); | |
82 if (x <= 0) | |
83 return 0; | |
84 logsum += Math.log(Math.abs(x)); | |
85 } | |
86 | |
87 if (i === 0) | |
88 return 1; | |
89 | |
90 return Math.exp(logsum / i); | |
91 }; | |
92 | |
93 // Returns undefined if the sum of the weights is zero. | |
94 Statistics.weightedMean = function( | |
95 ary, weightCallback, opt_valueCallback, opt_this) { | |
96 var valueCallback = opt_valueCallback || identity; | |
97 var numerator = 0; | |
98 var denominator = 0; | |
99 var i = -1; | |
100 | |
101 for (var elt of ary) { | |
102 i++; | |
103 var value = valueCallback.call(opt_this, elt, i); | |
104 if (value === undefined) | |
105 continue; | |
106 var weight = weightCallback.call(opt_this, elt, i, value); | |
107 numerator += weight * value; | |
108 denominator += weight; | |
109 } | |
110 | |
111 if (denominator === 0) | |
112 return undefined; | |
113 | |
114 return numerator / denominator; | |
115 }; | |
116 | |
117 Statistics.variance = function(ary, opt_func, opt_this) { | |
118 if (ary.length === 0) | |
119 return undefined; | |
120 if (ary.length === 1) | |
121 return 0; | |
122 var func = opt_func || identity; | |
123 var mean = Statistics.mean(ary, func, opt_this); | |
124 var sumOfSquaredDistances = Statistics.sum( | |
125 ary, | |
126 function(d, i) { | |
127 var v = func.call(this, d, i) - mean; | |
128 return v * v; | |
129 }, | |
130 opt_this); | |
131 return sumOfSquaredDistances / (ary.length - 1); | |
132 }; | |
133 | |
134 Statistics.stddev = function(ary, opt_func, opt_this) { | |
135 if (ary.length === 0) | |
136 return undefined; | |
137 return Math.sqrt( | |
138 Statistics.variance(ary, opt_func, opt_this)); | |
139 }; | |
140 | |
141 Statistics.max = function(ary, opt_func, opt_this) { | |
142 var func = opt_func || identity; | |
143 var ret = -Infinity; | |
144 var i = 0; | |
145 for (var elt of ary) | |
146 ret = Math.max(ret, func.call(opt_this, elt, i++)); | |
147 return ret; | |
148 }; | |
149 | |
150 Statistics.min = function(ary, opt_func, opt_this) { | |
151 var func = opt_func || identity; | |
152 var ret = Infinity; | |
153 var i = 0; | |
154 for (var elt of ary) | |
155 ret = Math.min(ret, func.call(opt_this, elt, i++)); | |
156 return ret; | |
157 }; | |
158 | |
159 Statistics.range = function(ary, opt_func, opt_this) { | |
160 var func = opt_func || identity; | |
161 var ret = new tr.b.Range(); | |
162 var i = 0; | |
163 for (var elt of ary) | |
164 ret.addValue(func.call(opt_this, elt, i++)); | |
165 return ret; | |
166 }; | |
167 | |
168 Statistics.percentile = function(ary, percent, opt_func, opt_this) { | |
169 if (!(percent >= 0 && percent <= 1)) | |
170 throw new Error('percent must be [0,1]'); | |
171 | |
172 var func = opt_func || identity; | |
173 var tmp = new Array(ary.length); | |
174 var i = 0; | |
175 for (var elt of ary) | |
176 tmp[i] = func.call(opt_this, elt, i++); | |
177 tmp.sort((a, b) => a - b); | |
178 var idx = Math.floor((ary.length - 1) * percent); | |
179 return tmp[idx]; | |
180 }; | |
181 | |
182 /** | |
183 * Sorts the samples, and map them linearly to the range [0,1]. | |
184 * | |
185 * They're mapped such that for the N samples, the first sample is 0.5/N and | |
186 * the last sample is (N-0.5)/N. | |
187 * | |
188 * Background: The discrepancy of the sample set i/(N-1); i=0, ..., N-1 is | |
189 * 2/N, twice the discrepancy of the sample set (i+1/2)/N; i=0, ..., N-1. In | |
190 * our case we don't want to distinguish between these two cases, as our | |
191 * original domain is not bounded (it is for Monte Carlo integration, where | |
192 * discrepancy was first used). | |
193 **/ | |
194 Statistics.normalizeSamples = function(samples) { | |
195 if (samples.length === 0) { | |
196 return { | |
197 normalized_samples: samples, | |
198 scale: 1.0 | |
199 }; | |
200 } | |
201 // Create a copy to make sure that we don't mutate original |samples| input. | |
202 samples = samples.slice().sort( | |
203 function(a, b) { | |
204 return a - b; | |
205 } | |
206 ); | |
207 var low = Math.min.apply(null, samples); | |
208 var high = Math.max.apply(null, samples); | |
209 var newLow = 0.5 / samples.length; | |
210 var newHigh = (samples.length - 0.5) / samples.length; | |
211 if (high - low === 0.0) { | |
212 // Samples is an array of 0.5 in this case. | |
213 samples = Array.apply(null, new Array(samples.length)).map( | |
214 function() { return 0.5;}); | |
215 return { | |
216 normalized_samples: samples, | |
217 scale: 1.0 | |
218 }; | |
219 } | |
220 var scale = (newHigh - newLow) / (high - low); | |
221 for (var i = 0; i < samples.length; i++) { | |
222 samples[i] = (samples[i] - low) * scale + newLow; | |
223 } | |
224 return { | |
225 normalized_samples: samples, | |
226 scale: scale | |
227 }; | |
228 }; | |
229 | |
230 /** | |
231 * Computes the discrepancy of a set of 1D samples from the interval [0,1]. | |
232 * | |
233 * The samples must be sorted. We define the discrepancy of an empty set | |
234 * of samples to be zero. | |
235 * | |
236 * http://en.wikipedia.org/wiki/Low-discrepancy_sequence | |
237 * http://mathworld.wolfram.com/Discrepancy.html | |
238 */ | |
239 Statistics.discrepancy = function(samples, opt_locationCount) { | |
240 if (samples.length === 0) | |
241 return 0.0; | |
242 | |
243 var maxLocalDiscrepancy = 0; | |
244 var invSampleCount = 1.0 / samples.length; | |
245 var locations = []; | |
246 // For each location, stores the number of samples less than that location. | |
247 var countLess = []; | |
248 // For each location, stores the number of samples less than or equal to | |
249 // that location. | |
250 var countLessEqual = []; | |
251 | |
252 if (opt_locationCount !== undefined) { | |
253 // Generate list of equally spaced locations. | |
254 var sampleIndex = 0; | |
255 for (var i = 0; i < opt_locationCount; i++) { | |
256 var location = i / (opt_locationCount - 1); | |
257 locations.push(location); | |
258 while (sampleIndex < samples.length && | |
259 samples[sampleIndex] < location) { | |
260 sampleIndex += 1; | |
261 } | |
262 countLess.push(sampleIndex); | |
263 while (sampleIndex < samples.length && | |
264 samples[sampleIndex] <= location) { | |
265 sampleIndex += 1; | |
266 } | |
267 countLessEqual.push(sampleIndex); | |
268 } | |
269 } else { | |
270 // Populate locations with sample positions. Append 0 and 1 if necessary. | |
271 if (samples[0] > 0.0) { | |
272 locations.push(0.0); | |
273 countLess.push(0); | |
274 countLessEqual.push(0); | |
275 } | |
276 for (var i = 0; i < samples.length; i++) { | |
277 locations.push(samples[i]); | |
278 countLess.push(i); | |
279 countLessEqual.push(i + 1); | |
280 } | |
281 if (samples[-1] < 1.0) { | |
282 locations.push(1.0); | |
283 countLess.push(samples.length); | |
284 countLessEqual.push(samples.length); | |
285 } | |
286 } | |
287 | |
288 // Compute discrepancy as max(overshoot, -undershoot), where | |
289 // overshoot = max(countClosed(i, j)/N - length(i, j)) for all i < j, | |
290 // undershoot = min(countOpen(i, j)/N - length(i, j)) for all i < j, | |
291 // N = len(samples), | |
292 // countClosed(i, j) is the number of points between i and j | |
293 // including ends, | |
294 // countOpen(i, j) is the number of points between i and j excluding ends, | |
295 // length(i, j) is locations[i] - locations[j]. | |
296 | |
297 // The following algorithm is modification of Kadane's algorithm, | |
298 // see https://en.wikipedia.org/wiki/Maximum_subarray_problem. | |
299 | |
300 // The maximum of (countClosed(k, i-1)/N - length(k, i-1)) for any k < i-1. | |
301 var maxDiff = 0; | |
302 // The minimum of (countOpen(k, i-1)/N - length(k, i-1)) for any k < i-1. | |
303 var minDiff = 0; | |
304 for (var i = 1; i < locations.length; i++) { | |
305 var length = locations[i] - locations[i - 1]; | |
306 var countClosed = countLessEqual[i] - countLess[i - 1]; | |
307 var countOpen = countLess[i] - countLessEqual[i - 1]; | |
308 // Number of points that are added if we extend a closed range that | |
309 // ends at location (i-1). | |
310 var countClosedIncrement = | |
311 countLessEqual[i] - countLessEqual[i - 1]; | |
312 // Number of points that are added if we extend an open range that | |
313 // ends at location (i-1). | |
314 var countOpenIncrement = countLess[i] - countLess[i - 1]; | |
315 | |
316 // Either extend the previous optimal range or start a new one. | |
317 maxDiff = Math.max( | |
318 countClosedIncrement * invSampleCount - length + maxDiff, | |
319 countClosed * invSampleCount - length); | |
320 minDiff = Math.min( | |
321 countOpenIncrement * invSampleCount - length + minDiff, | |
322 countOpen * invSampleCount - length); | |
323 | |
324 maxLocalDiscrepancy = Math.max( | |
325 maxDiff, -minDiff, maxLocalDiscrepancy); | |
326 } | |
327 return maxLocalDiscrepancy; | |
328 }; | |
329 | |
330 /** | |
331 * A discrepancy based metric for measuring timestamp jank. | |
332 * | |
333 * timestampsDiscrepancy quantifies the largest area of jank observed in a | |
334 * series of timestamps. Note that this is different from metrics based on | |
335 * the max_time_interval. For example, the time stamp series A = [0,1,2,3,5,6] | |
336 * and B = [0,1,2,3,5,7] have the same max_time_interval = 2, but | |
337 * Discrepancy(B) > Discrepancy(A). | |
338 * | |
339 * Two variants of discrepancy can be computed: | |
340 * | |
341 * Relative discrepancy is following the original definition of | |
342 * discrepancy. It characterized the largest area of jank, relative to the | |
343 * duration of the entire time stamp series. We normalize the raw results, | |
344 * because the best case discrepancy for a set of N samples is 1/N (for | |
345 * equally spaced samples), and we want our metric to report 0.0 in that | |
346 * case. | |
347 * | |
348 * Absolute discrepancy also characterizes the largest area of jank, but its | |
349 * value wouldn't change (except for imprecisions due to a low | |
350 * |interval_multiplier|) if additional 'good' intervals were added to an | |
351 * exisiting list of time stamps. Its range is [0,inf] and the unit is | |
352 * milliseconds. | |
353 * | |
354 * The time stamp series C = [0,2,3,4] and D = [0,2,3,4,5] have the same | |
355 * absolute discrepancy, but D has lower relative discrepancy than C. | |
356 * | |
357 * |timestamps| may be a list of lists S = [S_1, S_2, ..., S_N], where each | |
358 * S_i is a time stamp series. In that case, the discrepancy D(S) is: | |
359 * D(S) = max(D(S_1), D(S_2), ..., D(S_N)) | |
360 **/ | |
361 Statistics.timestampsDiscrepancy = function(timestamps, opt_absolute, | |
362 opt_locationCount) { | |
363 if (timestamps.length === 0) | |
364 return 0.0; | |
365 | |
366 if (opt_absolute === undefined) | |
367 opt_absolute = true; | |
368 | |
369 if (Array.isArray(timestamps[0])) { | |
370 var rangeDiscrepancies = timestamps.map(function(r) { | |
371 return Statistics.timestampsDiscrepancy(r); | |
372 }); | |
373 return Math.max.apply(null, rangeDiscrepancies); | |
374 } | |
375 | |
376 var s = Statistics.normalizeSamples(timestamps); | |
377 var samples = s.normalized_samples; | |
378 var sampleScale = s.scale; | |
379 var discrepancy = Statistics.discrepancy(samples, opt_locationCount); | |
380 var invSampleCount = 1.0 / samples.length; | |
381 if (opt_absolute === true) { | |
382 // Compute absolute discrepancy | |
383 discrepancy /= sampleScale; | |
384 } else { | |
385 // Compute relative discrepancy | |
386 discrepancy = tr.b.clamp( | |
387 (discrepancy - invSampleCount) / (1.0 - invSampleCount), 0.0, 1.0); | |
388 } | |
389 return discrepancy; | |
390 }; | |
391 | |
392 /** | |
393 * A discrepancy based metric for measuring duration jank. | |
394 * | |
395 * DurationsDiscrepancy computes a jank metric which measures how irregular a | |
396 * given sequence of intervals is. In order to minimize jank, each duration | |
397 * should be equally long. This is similar to how timestamp jank works, | |
398 * and we therefore reuse the timestamp discrepancy function above to compute | |
399 * a similar duration discrepancy number. | |
400 * | |
401 * Because timestamp discrepancy is defined in terms of timestamps, we first | |
402 * convert the list of durations to monotonically increasing timestamps. | |
403 * | |
404 * Args: | |
405 * durations: List of interval lengths in milliseconds. | |
406 * absolute: See TimestampsDiscrepancy. | |
407 * opt_locationCount: See TimestampsDiscrepancy. | |
408 **/ | |
409 Statistics.durationsDiscrepancy = function( | |
410 durations, opt_absolute, opt_locationCount) { | |
411 if (durations.length === 0) | |
412 return 0.0; | |
413 | |
414 var timestamps = durations.reduce(function(prev, curr, index, array) { | |
415 prev.push(prev[prev.length - 1] + curr); | |
416 return prev; | |
417 }, [0]); | |
418 return Statistics.timestampsDiscrepancy( | |
419 timestamps, opt_absolute, opt_locationCount); | |
420 }; | |
421 | |
422 /** | |
423 * Modifies |samples| in-place to reduce its length down to |count|. | |
424 * | |
425 * @param {!Array} samples | |
426 * @param {number} count | |
427 * @return {!Array} | |
428 */ | |
429 Statistics.uniformlySampleArray = function(samples, count) { | |
430 if (samples.length <= count) { | |
431 return samples; | |
432 } | |
433 while (samples.length > count) { | |
434 var i = parseInt(Math.random() * samples.length); | |
435 samples.splice(i, 1); | |
436 } | |
437 return samples; | |
438 }; | |
439 | |
440 /** | |
441 * A mechanism to uniformly sample elements from an arbitrary long stream. | |
442 * | |
443 * Call this method every time a new element is obtained from the stream, | |
444 * passing always the same |samples| array and the |numSamples| you desire. | |
445 * Also pass in the current |streamLength|, which is the same as the index of | |
446 * |newElement| within that stream. | |
447 * | |
448 * The |samples| array will possibly be updated, replacing one of its element | |
449 * with |newElements|. The length of |samples| will not be more than | |
450 * |numSamples|. | |
451 * | |
452 * This method guarantees that after |streamLength| elements have been | |
453 * processed each one has equal probability of being in |samples|. The order | |
454 * of samples is not preserved though. | |
455 * | |
456 * Args: | |
457 * samples: Array of elements that have already been selected. Start with []. | |
458 * streamLength: The current length of the stream, up to |newElement|. | |
459 * newElement: The element that was just extracted from the stream. | |
460 * numSamples: The total number of samples desired. | |
461 **/ | |
462 Statistics.uniformlySampleStream = function(samples, streamLength, newElement, | |
463 numSamples) { | |
464 if (streamLength <= numSamples) { | |
465 if (samples.length >= streamLength) | |
466 samples[streamLength - 1] = newElement; | |
467 else | |
468 samples.push(newElement); | |
469 return; | |
470 } | |
471 | |
472 var probToKeep = numSamples / streamLength; | |
473 if (Math.random() > probToKeep) | |
474 return; // New sample was rejected. | |
475 | |
476 // Keeping it, replace an alement randomly. | |
477 var index = Math.floor(Math.random() * numSamples); | |
478 samples[index] = newElement; | |
479 }; | |
480 | |
481 /** | |
482 * A mechanism to merge two arrays of uniformly sampled elements in a way that | |
483 * ensures elements in the final array are still sampled uniformly. | |
484 * | |
485 * This works similarly to sampleStreamUniform. The |samplesA| array will be | |
486 * updated, some of its elements replaced by elements from |samplesB| in a | |
487 * way that ensure that elements will be sampled uniformly. | |
488 * | |
489 * Args: | |
490 * samplesA: Array of uniformly sampled elements, will be updated. | |
491 * streamLengthA: The length of the stream from which |samplesA| was sampled. | |
492 * samplesB: Other array of uniformly sampled elements, will NOT be updated. | |
493 * streamLengthB: The length of the stream from which |samplesB| was sampled. | |
494 * numSamples: The total number of samples desired, both in |samplesA| and | |
495 * |samplesB|. | |
496 **/ | |
497 Statistics.mergeSampledStreams = function( | |
498 samplesA, streamLengthA, | |
499 samplesB, streamLengthB, numSamples) { | |
500 if (streamLengthB < numSamples) { | |
501 // samplesB has not reached max capacity so every sample of stream B were | |
502 // chosen with certainty. Add them one by one into samplesA. | |
503 var nbElements = Math.min(streamLengthB, samplesB.length); | |
504 for (var i = 0; i < nbElements; ++i) { | |
505 Statistics.uniformlySampleStream(samplesA, streamLengthA + i + 1, | |
506 samplesB[i], numSamples); | |
507 } | |
508 return; | |
509 } | |
510 if (streamLengthA < numSamples) { | |
511 // samplesA has not reached max capacity so every sample of stream A were | |
512 // chosen with certainty. Add them one by one into samplesB. | |
513 var nbElements = Math.min(streamLengthA, samplesA.length); | |
514 var tempSamples = samplesB.slice(); | |
515 for (var i = 0; i < nbElements; ++i) { | |
516 Statistics.uniformlySampleStream(tempSamples, streamLengthB + i + 1, | |
517 samplesA[i], numSamples); | |
518 } | |
519 // Copy that back into the first vector. | |
520 for (var i = 0; i < tempSamples.length; ++i) { | |
521 samplesA[i] = tempSamples[i]; | |
522 } | |
523 return; | |
524 } | |
525 | |
526 // Both sample arrays are at max capacity, use the power of maths! | |
527 // Elements in samplesA have been selected with probability | |
528 // numSamples / streamLengthA. Same for samplesB. For each index of the | |
529 // array we keep samplesA[i] with probability | |
530 // P = streamLengthA / (streamLengthA + streamLengthB) | |
531 // and replace it with samplesB[i] with probability 1-P. | |
532 // The total probability of keeping it is therefore | |
533 // numSamples / streamLengthA * | |
534 // streamLengthA / (streamLengthA + streamLengthB) | |
535 // = numSamples / (streamLengthA + streamLengthB) | |
536 // A similar computation shows we have the same probability of keeping any | |
537 // element in samplesB. Magic! | |
538 var nbElements = Math.min(numSamples, samplesB.length); | |
539 var probOfSwapping = streamLengthB / (streamLengthA + streamLengthB); | |
540 for (var i = 0; i < nbElements; ++i) { | |
541 if (Math.random() < probOfSwapping) { | |
542 samplesA[i] = samplesB[i]; | |
543 } | |
544 } | |
545 }; | |
546 | |
547 /* Continuous distributions are defined by probability density functions. | |
548 * | |
549 * Random variables are referred to by capital letters: X, Y, Z. | |
550 * Particular values from these distributions are referred to by lowercase | |
551 * letters like |x|. | |
552 * The probability that |X| ever exactly equals |x| is P(X==x) = 0. | |
553 * | |
554 * For a discrete probability distribution, see tr.v.Histogram. | |
555 */ | |
556 function Distribution() { | |
557 } | |
558 | |
559 Distribution.prototype = { | |
560 /* The probability density of the random variable at value |x| is the | |
561 * relative likelihood for this random variable to take on the given value | |
562 * |x|. | |
563 * | |
564 * @param {number} x A value from the random distribution. | |
565 * @return {number} probability density at x. | |
566 */ | |
567 computeDensity: function(x) { | |
568 throw Error('Not implemented'); | |
569 }, | |
570 | |
571 /* A percentile is the probability that a sample from the distribution is | |
572 * less than the given value |x|. This function is monotonically increasing. | |
573 * | |
574 * @param {number} x A value from the random distribution. | |
575 * @return {number} P(X<x). | |
576 */ | |
577 computePercentile: function(x) { | |
578 throw Error('Not implemented'); | |
579 }, | |
580 | |
581 /* A complementary percentile is the probability that a sample from the | |
582 * distribution is greater than the given value |x|. This function is | |
583 * monotonically decreasing. | |
584 * | |
585 * @param {number} x A value from the random distribution. | |
586 * @return {number} P(X>x). | |
587 */ | |
588 computeComplementaryPercentile: function(x) { | |
589 return 1 - this.computePercentile(x); | |
590 }, | |
591 | |
592 /* Compute the mean of the probability distribution. | |
593 * | |
594 * @return {number} mean. | |
595 */ | |
596 get mean() { | |
597 throw Error('Not implemented'); | |
598 }, | |
599 | |
600 /* The mode of a distribution is the most likely value. | |
601 * The maximum of the computeDensity() function is at this mode. | |
602 * @return {number} mode. | |
603 */ | |
604 get mode() { | |
605 throw Error('Not implemented'); | |
606 }, | |
607 | |
608 /* The median is the center value of the distribution. | |
609 * computePercentile(median) = computeComplementaryPercentile(median) = 0.5 | |
610 * | |
611 * @return {number} median. | |
612 */ | |
613 get median() { | |
614 throw Error('Not implemented'); | |
615 }, | |
616 | |
617 /* The standard deviation is a measure of how dispersed or spread out the | |
618 * distribution is (this statistic has the same units as the values). | |
619 * | |
620 * @return {number} standard deviation. | |
621 */ | |
622 get standardDeviation() { | |
623 throw Error('Not implemented'); | |
624 }, | |
625 | |
626 /* An alternative measure of how spread out the distribution is, | |
627 * the variance is the square of the standard deviation. | |
628 * @return {number} variance. | |
629 */ | |
630 get variance() { | |
631 throw Error('Not implemented'); | |
632 } | |
633 }; | |
634 | |
635 Statistics.UniformDistribution = function(opt_range) { | |
636 if (!opt_range) | |
637 opt_range = tr.b.Range.fromExplicitRange(0, 1); | |
638 this.range = opt_range; | |
639 }; | |
640 | |
641 Statistics.UniformDistribution.prototype = { | |
642 __proto__: Distribution.prototype, | |
643 | |
644 computeDensity: function(x) { | |
645 return 1 / this.range.range; | |
646 }, | |
647 | |
648 computePercentile: function(x) { | |
649 return tr.b.normalize(x, this.range.min, this.range.max); | |
650 }, | |
651 | |
652 get mean() { | |
653 return this.range.center; | |
654 }, | |
655 | |
656 get mode() { | |
657 return undefined; | |
658 }, | |
659 | |
660 get median() { | |
661 return this.mean; | |
662 }, | |
663 | |
664 get standardDeviation() { | |
665 return Math.sqrt(this.variance); | |
666 }, | |
667 | |
668 get variance() { | |
669 return Math.pow(this.range.range, 2) / 12; | |
670 } | |
671 }; | |
672 | |
673 /* The Normal or Gaussian distribution, or bell curve, is common in complex | |
674 * processes such as are found in many of the natural sciences. If Z is the | |
675 * standard normal distribution with mean = 0 and variance = 1, then the | |
676 * general normal distribution is Y = mean + Z*sqrt(variance). | |
677 * https://www.desmos.com/calculator/tqtbjm4s3z | |
678 */ | |
679 Statistics.NormalDistribution = function(opt_mean, opt_variance) { | |
680 this.mean_ = opt_mean || 0; | |
681 this.variance_ = opt_variance || 1; | |
682 this.standardDeviation_ = Math.sqrt(this.variance_); | |
683 }; | |
684 | |
685 Statistics.NormalDistribution.prototype = { | |
686 __proto__: Distribution.prototype, | |
687 | |
688 computeDensity: function(x) { | |
689 var scale = (1.0 / (this.standardDeviation * Math.sqrt(2.0 * Math.PI))); | |
690 var exponent = -Math.pow(x - this.mean, 2) / (2.0 * this.variance); | |
691 return scale * Math.exp(exponent); | |
692 }, | |
693 | |
694 computePercentile: function(x) { | |
695 var standardizedX = ((x - this.mean) / | |
696 Math.sqrt(2.0 * this.variance)); | |
697 return (1.0 + tr.b.erf(standardizedX)) / 2.0; | |
698 }, | |
699 | |
700 get mean() { | |
701 return this.mean_; | |
702 }, | |
703 | |
704 get median() { | |
705 return this.mean; | |
706 }, | |
707 | |
708 get mode() { | |
709 return this.mean; | |
710 }, | |
711 | |
712 get standardDeviation() { | |
713 return this.standardDeviation_; | |
714 }, | |
715 | |
716 get variance() { | |
717 return this.variance_; | |
718 } | |
719 }; | |
720 | |
721 /* The log-normal distribution is a continuous probability distribution of a | |
722 * random variable whose logarithm is normally distributed. | |
723 * If Y is the general normal distribution, then X = exp(Y) is the general | |
724 * log-normal distribution. | |
725 * X will have different parameters from Y, | |
726 * so the mean of Y is called the "location" of X, | |
727 * and the standard deviation of Y is called the "shape" of X. | |
728 * The standard lognormal distribution exp(Z) has location = 0 and shape = 1. | |
729 * https://www.desmos.com/calculator/tqtbjm4s3z | |
730 */ | |
731 Statistics.LogNormalDistribution = function(opt_location, opt_shape) { | |
732 this.normalDistribution_ = new Statistics.NormalDistribution( | |
733 opt_location, Math.pow(opt_shape || 1, 2)); | |
734 }; | |
735 | |
736 Statistics.LogNormalDistribution.prototype = { | |
737 __proto__: Statistics.NormalDistribution.prototype, | |
738 | |
739 computeDensity: function(x) { | |
740 return this.normalDistribution_.computeDensity(Math.log(x)) / x; | |
741 }, | |
742 | |
743 computePercentile: function(x) { | |
744 return this.normalDistribution_.computePercentile(Math.log(x)); | |
745 }, | |
746 | |
747 get mean() { | |
748 return Math.exp(this.normalDistribution_.mean + | |
749 (this.normalDistribution_.variance / 2)); | |
750 }, | |
751 | |
752 get variance() { | |
753 var nm = this.normalDistribution_.mean; | |
754 var nv = this.normalDistribution_.variance; | |
755 return (Math.exp(2 * (nm + nv)) - | |
756 Math.exp(2 * nm + nv)); | |
757 }, | |
758 | |
759 get standardDeviation() { | |
760 return Math.sqrt(this.variance); | |
761 }, | |
762 | |
763 get median() { | |
764 return Math.exp(this.normalDistribution_.mean); | |
765 }, | |
766 | |
767 get mode() { | |
768 return Math.exp(this.normalDistribution_.mean - | |
769 this.normalDistribution_.variance); | |
770 } | |
771 }; | |
772 | |
773 /** | |
774 * Instead of describing a LogNormalDistribution in terms of its "location" | |
775 * and "shape", it can also be described in terms of its median | |
776 * and the point at which its complementary cumulative distribution | |
777 * function bends between the linear-ish region in the middle and the | |
778 * exponential-ish region. When the distribution is used to compute | |
779 * percentiles for log-normal random processes such as latency, as the latency | |
780 * improves, it hits a point of diminishing returns, when it becomes | |
781 * relatively difficult to improve the score further. This point of | |
782 * diminishing returns is the first x-intercept of the third derivative of the | |
783 * CDF, which is the second derivative of the PDF. | |
784 * | |
785 * https://www.desmos.com/calculator/cg5rnftabn | |
786 * | |
787 * @param {number} median The median of the distribution. | |
788 * @param {number} diminishingReturns The point of diminishing returns. | |
789 * @return {LogNormalDistribution} | |
790 */ | |
791 Statistics.LogNormalDistribution.fromMedianAndDiminishingReturns = | |
792 function(median, diminishingReturns) { | |
793 diminishingReturns = Math.log(diminishingReturns / median); | |
794 var shape = Math.sqrt(1 - 3 * diminishingReturns - | |
795 Math.sqrt(Math.pow(diminishingReturns - 3, 2) - 8)) / 2; | |
796 var location = Math.log(median); | |
797 return new Statistics.LogNormalDistribution(location, shape); | |
798 }; | |
799 | |
800 // p-values less than this indicate statistical significance. | |
801 Statistics.DEFAULT_ALPHA = 0.01; | |
802 | |
803 // If a statistical significant difference has not been established with | |
804 // this many observations per sample, we'll assume none exists. | |
805 Statistics.MAX_SUGGESTED_SAMPLE_SIZE = 20; | |
806 | |
807 /** @enum */ | |
808 Statistics.Significance = { | |
809 SIGNIFICANT: 'REJECT', | |
810 INSIGNIFICANT: 'FAIL_TO_REJECT', | |
811 NEED_MORE_DATA: 'NEED_MORE_DATA', | |
812 DONT_CARE: 'DONT_CARE', | |
813 }; | |
814 | |
815 | |
816 /** | |
817 * @typedef {Object} HypothesisTestResult | |
818 * @property {number} p | |
819 * @property {number} U | |
820 * @property {!tr.b.Statistics.Significance} significance | |
821 */ | |
822 | |
823 /** | |
824 * @param {!Array.<number>} a | |
825 * @param {!Array.<number>} b | |
826 * @param {number=} opt_alpha | |
827 * @param {number=} opt_reqSampleSize | |
828 * @return {!HypothesisTestResult} | |
829 */ | |
830 Statistics.mwu = function(a, b, opt_alpha, opt_reqSampleSize) { | |
831 var result = mannwhitneyu.test(a, b); | |
832 var alpha = opt_alpha || Statistics.DEFAULT_ALPHA; | |
833 if (result.p < alpha) { | |
834 result.significance = Statistics.Significance.SIGNIFICANT; | |
835 } else if (opt_reqSampleSize && (a.length < opt_reqSampleSize || | |
836 b.length < opt_reqSampleSize)) { | |
837 result.significance = Statistics.Significance.NEED_MORE_DATA; | |
838 } else { | |
839 result.significance = Statistics.Significance.INSIGNIFICANT; | |
840 } | |
841 return result; | |
842 }; | |
843 | |
844 return { | |
845 Statistics, | |
846 }; | |
847 }); | |
848 </script> | |
OLD | NEW |