Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(360)

Side by Side Diff: tracing/tracing/base/statistics.html

Issue 2771723003: [tracing] Move math utilities from base into their own subdirectory (attempt 2) (Closed)
Patch Set: rebase Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 <!DOCTYPE html>
2 <!--
3 Copyright 2014 The Chromium Authors. All rights reserved.
4 Use of this source code is governed by a BSD-style license that can be
5 found in the LICENSE file.
6 -->
7
8 <link rel="import" href="/tracing/base/math.html">
9 <link rel="import" href="/tracing/base/range.html">
10 <script src="/mannwhitneyu/mannwhitneyu.js"></script>
11
12 <script>
13 'use strict';
14
15 // In node, the script-src for mannwhitneyu above brings in mannwhitneyui
16 // into a module, instead of into the global scope. Whereas this file
17 // assumes that mannwhitneyu is in the global scope. So, in Node only, we
18 // require() it in, and then take all its exports and shove them into the
19 // global scope by hand.
20 (function(global) {
21 if (tr.isNode) {
22 var mwuAbsPath = HTMLImportsLoader.hrefToAbsolutePath(
23 '/mannwhitneyu.js');
24 var mwuModule = require(mwuAbsPath);
25 for (var exportName in mwuModule) {
26 global[exportName] = mwuModule[exportName];
27 }
28 }
29 })(this);
30 </script>
31
32 <script>
33 'use strict';
34
35 // TODO(charliea): Remove:
36 /* eslint-disable catapult-camelcase */
37
38 tr.exportTo('tr.b', function() {
39 var identity = x => x;
40
41 var Statistics = {};
42
43 /* Returns the quotient, or zero if the denominator is zero.*/
44 Statistics.divideIfPossibleOrZero = function(numerator, denominator) {
45 if (denominator === 0)
46 return 0;
47 return numerator / denominator;
48 };
49
50 Statistics.sum = function(ary, opt_func, opt_this) {
51 var func = opt_func || identity;
52 var ret = 0;
53 var i = 0;
54 for (var elt of ary)
55 ret += func.call(opt_this, elt, i++);
56 return ret;
57 };
58
59 Statistics.mean = function(ary, opt_func, opt_this) {
60 var func = opt_func || identity;
61 var sum = 0;
62 var i = 0;
63
64 for (var elt of ary)
65 sum += func.call(opt_this, elt, i++);
66
67 if (i === 0)
68 return undefined;
69
70 return sum / i;
71 };
72
73 Statistics.geometricMean = function(ary, opt_func, opt_this) {
74 var func = opt_func || identity;
75 var i = 0;
76 var logsum = 0;
77
78 // The geometric mean is expressed as the arithmetic mean of logarithms
79 // in order to prevent overflow.
80 for (var elt of ary) {
81 var x = func.call(opt_this, elt, i++);
82 if (x <= 0)
83 return 0;
84 logsum += Math.log(Math.abs(x));
85 }
86
87 if (i === 0)
88 return 1;
89
90 return Math.exp(logsum / i);
91 };
92
93 // Returns undefined if the sum of the weights is zero.
94 Statistics.weightedMean = function(
95 ary, weightCallback, opt_valueCallback, opt_this) {
96 var valueCallback = opt_valueCallback || identity;
97 var numerator = 0;
98 var denominator = 0;
99 var i = -1;
100
101 for (var elt of ary) {
102 i++;
103 var value = valueCallback.call(opt_this, elt, i);
104 if (value === undefined)
105 continue;
106 var weight = weightCallback.call(opt_this, elt, i, value);
107 numerator += weight * value;
108 denominator += weight;
109 }
110
111 if (denominator === 0)
112 return undefined;
113
114 return numerator / denominator;
115 };
116
117 Statistics.variance = function(ary, opt_func, opt_this) {
118 if (ary.length === 0)
119 return undefined;
120 if (ary.length === 1)
121 return 0;
122 var func = opt_func || identity;
123 var mean = Statistics.mean(ary, func, opt_this);
124 var sumOfSquaredDistances = Statistics.sum(
125 ary,
126 function(d, i) {
127 var v = func.call(this, d, i) - mean;
128 return v * v;
129 },
130 opt_this);
131 return sumOfSquaredDistances / (ary.length - 1);
132 };
133
134 Statistics.stddev = function(ary, opt_func, opt_this) {
135 if (ary.length === 0)
136 return undefined;
137 return Math.sqrt(
138 Statistics.variance(ary, opt_func, opt_this));
139 };
140
141 Statistics.max = function(ary, opt_func, opt_this) {
142 var func = opt_func || identity;
143 var ret = -Infinity;
144 var i = 0;
145 for (var elt of ary)
146 ret = Math.max(ret, func.call(opt_this, elt, i++));
147 return ret;
148 };
149
150 Statistics.min = function(ary, opt_func, opt_this) {
151 var func = opt_func || identity;
152 var ret = Infinity;
153 var i = 0;
154 for (var elt of ary)
155 ret = Math.min(ret, func.call(opt_this, elt, i++));
156 return ret;
157 };
158
159 Statistics.range = function(ary, opt_func, opt_this) {
160 var func = opt_func || identity;
161 var ret = new tr.b.Range();
162 var i = 0;
163 for (var elt of ary)
164 ret.addValue(func.call(opt_this, elt, i++));
165 return ret;
166 };
167
168 Statistics.percentile = function(ary, percent, opt_func, opt_this) {
169 if (!(percent >= 0 && percent <= 1))
170 throw new Error('percent must be [0,1]');
171
172 var func = opt_func || identity;
173 var tmp = new Array(ary.length);
174 var i = 0;
175 for (var elt of ary)
176 tmp[i] = func.call(opt_this, elt, i++);
177 tmp.sort((a, b) => a - b);
178 var idx = Math.floor((ary.length - 1) * percent);
179 return tmp[idx];
180 };
181
182 /**
183 * Sorts the samples, and map them linearly to the range [0,1].
184 *
185 * They're mapped such that for the N samples, the first sample is 0.5/N and
186 * the last sample is (N-0.5)/N.
187 *
188 * Background: The discrepancy of the sample set i/(N-1); i=0, ..., N-1 is
189 * 2/N, twice the discrepancy of the sample set (i+1/2)/N; i=0, ..., N-1. In
190 * our case we don't want to distinguish between these two cases, as our
191 * original domain is not bounded (it is for Monte Carlo integration, where
192 * discrepancy was first used).
193 **/
194 Statistics.normalizeSamples = function(samples) {
195 if (samples.length === 0) {
196 return {
197 normalized_samples: samples,
198 scale: 1.0
199 };
200 }
201 // Create a copy to make sure that we don't mutate original |samples| input.
202 samples = samples.slice().sort(
203 function(a, b) {
204 return a - b;
205 }
206 );
207 var low = Math.min.apply(null, samples);
208 var high = Math.max.apply(null, samples);
209 var newLow = 0.5 / samples.length;
210 var newHigh = (samples.length - 0.5) / samples.length;
211 if (high - low === 0.0) {
212 // Samples is an array of 0.5 in this case.
213 samples = Array.apply(null, new Array(samples.length)).map(
214 function() { return 0.5;});
215 return {
216 normalized_samples: samples,
217 scale: 1.0
218 };
219 }
220 var scale = (newHigh - newLow) / (high - low);
221 for (var i = 0; i < samples.length; i++) {
222 samples[i] = (samples[i] - low) * scale + newLow;
223 }
224 return {
225 normalized_samples: samples,
226 scale: scale
227 };
228 };
229
230 /**
231 * Computes the discrepancy of a set of 1D samples from the interval [0,1].
232 *
233 * The samples must be sorted. We define the discrepancy of an empty set
234 * of samples to be zero.
235 *
236 * http://en.wikipedia.org/wiki/Low-discrepancy_sequence
237 * http://mathworld.wolfram.com/Discrepancy.html
238 */
239 Statistics.discrepancy = function(samples, opt_locationCount) {
240 if (samples.length === 0)
241 return 0.0;
242
243 var maxLocalDiscrepancy = 0;
244 var invSampleCount = 1.0 / samples.length;
245 var locations = [];
246 // For each location, stores the number of samples less than that location.
247 var countLess = [];
248 // For each location, stores the number of samples less than or equal to
249 // that location.
250 var countLessEqual = [];
251
252 if (opt_locationCount !== undefined) {
253 // Generate list of equally spaced locations.
254 var sampleIndex = 0;
255 for (var i = 0; i < opt_locationCount; i++) {
256 var location = i / (opt_locationCount - 1);
257 locations.push(location);
258 while (sampleIndex < samples.length &&
259 samples[sampleIndex] < location) {
260 sampleIndex += 1;
261 }
262 countLess.push(sampleIndex);
263 while (sampleIndex < samples.length &&
264 samples[sampleIndex] <= location) {
265 sampleIndex += 1;
266 }
267 countLessEqual.push(sampleIndex);
268 }
269 } else {
270 // Populate locations with sample positions. Append 0 and 1 if necessary.
271 if (samples[0] > 0.0) {
272 locations.push(0.0);
273 countLess.push(0);
274 countLessEqual.push(0);
275 }
276 for (var i = 0; i < samples.length; i++) {
277 locations.push(samples[i]);
278 countLess.push(i);
279 countLessEqual.push(i + 1);
280 }
281 if (samples[-1] < 1.0) {
282 locations.push(1.0);
283 countLess.push(samples.length);
284 countLessEqual.push(samples.length);
285 }
286 }
287
288 // Compute discrepancy as max(overshoot, -undershoot), where
289 // overshoot = max(countClosed(i, j)/N - length(i, j)) for all i < j,
290 // undershoot = min(countOpen(i, j)/N - length(i, j)) for all i < j,
291 // N = len(samples),
292 // countClosed(i, j) is the number of points between i and j
293 // including ends,
294 // countOpen(i, j) is the number of points between i and j excluding ends,
295 // length(i, j) is locations[i] - locations[j].
296
297 // The following algorithm is modification of Kadane's algorithm,
298 // see https://en.wikipedia.org/wiki/Maximum_subarray_problem.
299
300 // The maximum of (countClosed(k, i-1)/N - length(k, i-1)) for any k < i-1.
301 var maxDiff = 0;
302 // The minimum of (countOpen(k, i-1)/N - length(k, i-1)) for any k < i-1.
303 var minDiff = 0;
304 for (var i = 1; i < locations.length; i++) {
305 var length = locations[i] - locations[i - 1];
306 var countClosed = countLessEqual[i] - countLess[i - 1];
307 var countOpen = countLess[i] - countLessEqual[i - 1];
308 // Number of points that are added if we extend a closed range that
309 // ends at location (i-1).
310 var countClosedIncrement =
311 countLessEqual[i] - countLessEqual[i - 1];
312 // Number of points that are added if we extend an open range that
313 // ends at location (i-1).
314 var countOpenIncrement = countLess[i] - countLess[i - 1];
315
316 // Either extend the previous optimal range or start a new one.
317 maxDiff = Math.max(
318 countClosedIncrement * invSampleCount - length + maxDiff,
319 countClosed * invSampleCount - length);
320 minDiff = Math.min(
321 countOpenIncrement * invSampleCount - length + minDiff,
322 countOpen * invSampleCount - length);
323
324 maxLocalDiscrepancy = Math.max(
325 maxDiff, -minDiff, maxLocalDiscrepancy);
326 }
327 return maxLocalDiscrepancy;
328 };
329
330 /**
331 * A discrepancy based metric for measuring timestamp jank.
332 *
333 * timestampsDiscrepancy quantifies the largest area of jank observed in a
334 * series of timestamps. Note that this is different from metrics based on
335 * the max_time_interval. For example, the time stamp series A = [0,1,2,3,5,6]
336 * and B = [0,1,2,3,5,7] have the same max_time_interval = 2, but
337 * Discrepancy(B) > Discrepancy(A).
338 *
339 * Two variants of discrepancy can be computed:
340 *
341 * Relative discrepancy is following the original definition of
342 * discrepancy. It characterized the largest area of jank, relative to the
343 * duration of the entire time stamp series. We normalize the raw results,
344 * because the best case discrepancy for a set of N samples is 1/N (for
345 * equally spaced samples), and we want our metric to report 0.0 in that
346 * case.
347 *
348 * Absolute discrepancy also characterizes the largest area of jank, but its
349 * value wouldn't change (except for imprecisions due to a low
350 * |interval_multiplier|) if additional 'good' intervals were added to an
351 * exisiting list of time stamps. Its range is [0,inf] and the unit is
352 * milliseconds.
353 *
354 * The time stamp series C = [0,2,3,4] and D = [0,2,3,4,5] have the same
355 * absolute discrepancy, but D has lower relative discrepancy than C.
356 *
357 * |timestamps| may be a list of lists S = [S_1, S_2, ..., S_N], where each
358 * S_i is a time stamp series. In that case, the discrepancy D(S) is:
359 * D(S) = max(D(S_1), D(S_2), ..., D(S_N))
360 **/
361 Statistics.timestampsDiscrepancy = function(timestamps, opt_absolute,
362 opt_locationCount) {
363 if (timestamps.length === 0)
364 return 0.0;
365
366 if (opt_absolute === undefined)
367 opt_absolute = true;
368
369 if (Array.isArray(timestamps[0])) {
370 var rangeDiscrepancies = timestamps.map(function(r) {
371 return Statistics.timestampsDiscrepancy(r);
372 });
373 return Math.max.apply(null, rangeDiscrepancies);
374 }
375
376 var s = Statistics.normalizeSamples(timestamps);
377 var samples = s.normalized_samples;
378 var sampleScale = s.scale;
379 var discrepancy = Statistics.discrepancy(samples, opt_locationCount);
380 var invSampleCount = 1.0 / samples.length;
381 if (opt_absolute === true) {
382 // Compute absolute discrepancy
383 discrepancy /= sampleScale;
384 } else {
385 // Compute relative discrepancy
386 discrepancy = tr.b.clamp(
387 (discrepancy - invSampleCount) / (1.0 - invSampleCount), 0.0, 1.0);
388 }
389 return discrepancy;
390 };
391
392 /**
393 * A discrepancy based metric for measuring duration jank.
394 *
395 * DurationsDiscrepancy computes a jank metric which measures how irregular a
396 * given sequence of intervals is. In order to minimize jank, each duration
397 * should be equally long. This is similar to how timestamp jank works,
398 * and we therefore reuse the timestamp discrepancy function above to compute
399 * a similar duration discrepancy number.
400 *
401 * Because timestamp discrepancy is defined in terms of timestamps, we first
402 * convert the list of durations to monotonically increasing timestamps.
403 *
404 * Args:
405 * durations: List of interval lengths in milliseconds.
406 * absolute: See TimestampsDiscrepancy.
407 * opt_locationCount: See TimestampsDiscrepancy.
408 **/
409 Statistics.durationsDiscrepancy = function(
410 durations, opt_absolute, opt_locationCount) {
411 if (durations.length === 0)
412 return 0.0;
413
414 var timestamps = durations.reduce(function(prev, curr, index, array) {
415 prev.push(prev[prev.length - 1] + curr);
416 return prev;
417 }, [0]);
418 return Statistics.timestampsDiscrepancy(
419 timestamps, opt_absolute, opt_locationCount);
420 };
421
422 /**
423 * Modifies |samples| in-place to reduce its length down to |count|.
424 *
425 * @param {!Array} samples
426 * @param {number} count
427 * @return {!Array}
428 */
429 Statistics.uniformlySampleArray = function(samples, count) {
430 if (samples.length <= count) {
431 return samples;
432 }
433 while (samples.length > count) {
434 var i = parseInt(Math.random() * samples.length);
435 samples.splice(i, 1);
436 }
437 return samples;
438 };
439
440 /**
441 * A mechanism to uniformly sample elements from an arbitrary long stream.
442 *
443 * Call this method every time a new element is obtained from the stream,
444 * passing always the same |samples| array and the |numSamples| you desire.
445 * Also pass in the current |streamLength|, which is the same as the index of
446 * |newElement| within that stream.
447 *
448 * The |samples| array will possibly be updated, replacing one of its element
449 * with |newElements|. The length of |samples| will not be more than
450 * |numSamples|.
451 *
452 * This method guarantees that after |streamLength| elements have been
453 * processed each one has equal probability of being in |samples|. The order
454 * of samples is not preserved though.
455 *
456 * Args:
457 * samples: Array of elements that have already been selected. Start with [].
458 * streamLength: The current length of the stream, up to |newElement|.
459 * newElement: The element that was just extracted from the stream.
460 * numSamples: The total number of samples desired.
461 **/
462 Statistics.uniformlySampleStream = function(samples, streamLength, newElement,
463 numSamples) {
464 if (streamLength <= numSamples) {
465 if (samples.length >= streamLength)
466 samples[streamLength - 1] = newElement;
467 else
468 samples.push(newElement);
469 return;
470 }
471
472 var probToKeep = numSamples / streamLength;
473 if (Math.random() > probToKeep)
474 return; // New sample was rejected.
475
476 // Keeping it, replace an alement randomly.
477 var index = Math.floor(Math.random() * numSamples);
478 samples[index] = newElement;
479 };
480
481 /**
482 * A mechanism to merge two arrays of uniformly sampled elements in a way that
483 * ensures elements in the final array are still sampled uniformly.
484 *
485 * This works similarly to sampleStreamUniform. The |samplesA| array will be
486 * updated, some of its elements replaced by elements from |samplesB| in a
487 * way that ensure that elements will be sampled uniformly.
488 *
489 * Args:
490 * samplesA: Array of uniformly sampled elements, will be updated.
491 * streamLengthA: The length of the stream from which |samplesA| was sampled.
492 * samplesB: Other array of uniformly sampled elements, will NOT be updated.
493 * streamLengthB: The length of the stream from which |samplesB| was sampled.
494 * numSamples: The total number of samples desired, both in |samplesA| and
495 * |samplesB|.
496 **/
497 Statistics.mergeSampledStreams = function(
498 samplesA, streamLengthA,
499 samplesB, streamLengthB, numSamples) {
500 if (streamLengthB < numSamples) {
501 // samplesB has not reached max capacity so every sample of stream B were
502 // chosen with certainty. Add them one by one into samplesA.
503 var nbElements = Math.min(streamLengthB, samplesB.length);
504 for (var i = 0; i < nbElements; ++i) {
505 Statistics.uniformlySampleStream(samplesA, streamLengthA + i + 1,
506 samplesB[i], numSamples);
507 }
508 return;
509 }
510 if (streamLengthA < numSamples) {
511 // samplesA has not reached max capacity so every sample of stream A were
512 // chosen with certainty. Add them one by one into samplesB.
513 var nbElements = Math.min(streamLengthA, samplesA.length);
514 var tempSamples = samplesB.slice();
515 for (var i = 0; i < nbElements; ++i) {
516 Statistics.uniformlySampleStream(tempSamples, streamLengthB + i + 1,
517 samplesA[i], numSamples);
518 }
519 // Copy that back into the first vector.
520 for (var i = 0; i < tempSamples.length; ++i) {
521 samplesA[i] = tempSamples[i];
522 }
523 return;
524 }
525
526 // Both sample arrays are at max capacity, use the power of maths!
527 // Elements in samplesA have been selected with probability
528 // numSamples / streamLengthA. Same for samplesB. For each index of the
529 // array we keep samplesA[i] with probability
530 // P = streamLengthA / (streamLengthA + streamLengthB)
531 // and replace it with samplesB[i] with probability 1-P.
532 // The total probability of keeping it is therefore
533 // numSamples / streamLengthA *
534 // streamLengthA / (streamLengthA + streamLengthB)
535 // = numSamples / (streamLengthA + streamLengthB)
536 // A similar computation shows we have the same probability of keeping any
537 // element in samplesB. Magic!
538 var nbElements = Math.min(numSamples, samplesB.length);
539 var probOfSwapping = streamLengthB / (streamLengthA + streamLengthB);
540 for (var i = 0; i < nbElements; ++i) {
541 if (Math.random() < probOfSwapping) {
542 samplesA[i] = samplesB[i];
543 }
544 }
545 };
546
547 /* Continuous distributions are defined by probability density functions.
548 *
549 * Random variables are referred to by capital letters: X, Y, Z.
550 * Particular values from these distributions are referred to by lowercase
551 * letters like |x|.
552 * The probability that |X| ever exactly equals |x| is P(X==x) = 0.
553 *
554 * For a discrete probability distribution, see tr.v.Histogram.
555 */
556 function Distribution() {
557 }
558
559 Distribution.prototype = {
560 /* The probability density of the random variable at value |x| is the
561 * relative likelihood for this random variable to take on the given value
562 * |x|.
563 *
564 * @param {number} x A value from the random distribution.
565 * @return {number} probability density at x.
566 */
567 computeDensity: function(x) {
568 throw Error('Not implemented');
569 },
570
571 /* A percentile is the probability that a sample from the distribution is
572 * less than the given value |x|. This function is monotonically increasing.
573 *
574 * @param {number} x A value from the random distribution.
575 * @return {number} P(X<x).
576 */
577 computePercentile: function(x) {
578 throw Error('Not implemented');
579 },
580
581 /* A complementary percentile is the probability that a sample from the
582 * distribution is greater than the given value |x|. This function is
583 * monotonically decreasing.
584 *
585 * @param {number} x A value from the random distribution.
586 * @return {number} P(X>x).
587 */
588 computeComplementaryPercentile: function(x) {
589 return 1 - this.computePercentile(x);
590 },
591
592 /* Compute the mean of the probability distribution.
593 *
594 * @return {number} mean.
595 */
596 get mean() {
597 throw Error('Not implemented');
598 },
599
600 /* The mode of a distribution is the most likely value.
601 * The maximum of the computeDensity() function is at this mode.
602 * @return {number} mode.
603 */
604 get mode() {
605 throw Error('Not implemented');
606 },
607
608 /* The median is the center value of the distribution.
609 * computePercentile(median) = computeComplementaryPercentile(median) = 0.5
610 *
611 * @return {number} median.
612 */
613 get median() {
614 throw Error('Not implemented');
615 },
616
617 /* The standard deviation is a measure of how dispersed or spread out the
618 * distribution is (this statistic has the same units as the values).
619 *
620 * @return {number} standard deviation.
621 */
622 get standardDeviation() {
623 throw Error('Not implemented');
624 },
625
626 /* An alternative measure of how spread out the distribution is,
627 * the variance is the square of the standard deviation.
628 * @return {number} variance.
629 */
630 get variance() {
631 throw Error('Not implemented');
632 }
633 };
634
635 Statistics.UniformDistribution = function(opt_range) {
636 if (!opt_range)
637 opt_range = tr.b.Range.fromExplicitRange(0, 1);
638 this.range = opt_range;
639 };
640
641 Statistics.UniformDistribution.prototype = {
642 __proto__: Distribution.prototype,
643
644 computeDensity: function(x) {
645 return 1 / this.range.range;
646 },
647
648 computePercentile: function(x) {
649 return tr.b.normalize(x, this.range.min, this.range.max);
650 },
651
652 get mean() {
653 return this.range.center;
654 },
655
656 get mode() {
657 return undefined;
658 },
659
660 get median() {
661 return this.mean;
662 },
663
664 get standardDeviation() {
665 return Math.sqrt(this.variance);
666 },
667
668 get variance() {
669 return Math.pow(this.range.range, 2) / 12;
670 }
671 };
672
673 /* The Normal or Gaussian distribution, or bell curve, is common in complex
674 * processes such as are found in many of the natural sciences. If Z is the
675 * standard normal distribution with mean = 0 and variance = 1, then the
676 * general normal distribution is Y = mean + Z*sqrt(variance).
677 * https://www.desmos.com/calculator/tqtbjm4s3z
678 */
679 Statistics.NormalDistribution = function(opt_mean, opt_variance) {
680 this.mean_ = opt_mean || 0;
681 this.variance_ = opt_variance || 1;
682 this.standardDeviation_ = Math.sqrt(this.variance_);
683 };
684
685 Statistics.NormalDistribution.prototype = {
686 __proto__: Distribution.prototype,
687
688 computeDensity: function(x) {
689 var scale = (1.0 / (this.standardDeviation * Math.sqrt(2.0 * Math.PI)));
690 var exponent = -Math.pow(x - this.mean, 2) / (2.0 * this.variance);
691 return scale * Math.exp(exponent);
692 },
693
694 computePercentile: function(x) {
695 var standardizedX = ((x - this.mean) /
696 Math.sqrt(2.0 * this.variance));
697 return (1.0 + tr.b.erf(standardizedX)) / 2.0;
698 },
699
700 get mean() {
701 return this.mean_;
702 },
703
704 get median() {
705 return this.mean;
706 },
707
708 get mode() {
709 return this.mean;
710 },
711
712 get standardDeviation() {
713 return this.standardDeviation_;
714 },
715
716 get variance() {
717 return this.variance_;
718 }
719 };
720
721 /* The log-normal distribution is a continuous probability distribution of a
722 * random variable whose logarithm is normally distributed.
723 * If Y is the general normal distribution, then X = exp(Y) is the general
724 * log-normal distribution.
725 * X will have different parameters from Y,
726 * so the mean of Y is called the "location" of X,
727 * and the standard deviation of Y is called the "shape" of X.
728 * The standard lognormal distribution exp(Z) has location = 0 and shape = 1.
729 * https://www.desmos.com/calculator/tqtbjm4s3z
730 */
731 Statistics.LogNormalDistribution = function(opt_location, opt_shape) {
732 this.normalDistribution_ = new Statistics.NormalDistribution(
733 opt_location, Math.pow(opt_shape || 1, 2));
734 };
735
736 Statistics.LogNormalDistribution.prototype = {
737 __proto__: Statistics.NormalDistribution.prototype,
738
739 computeDensity: function(x) {
740 return this.normalDistribution_.computeDensity(Math.log(x)) / x;
741 },
742
743 computePercentile: function(x) {
744 return this.normalDistribution_.computePercentile(Math.log(x));
745 },
746
747 get mean() {
748 return Math.exp(this.normalDistribution_.mean +
749 (this.normalDistribution_.variance / 2));
750 },
751
752 get variance() {
753 var nm = this.normalDistribution_.mean;
754 var nv = this.normalDistribution_.variance;
755 return (Math.exp(2 * (nm + nv)) -
756 Math.exp(2 * nm + nv));
757 },
758
759 get standardDeviation() {
760 return Math.sqrt(this.variance);
761 },
762
763 get median() {
764 return Math.exp(this.normalDistribution_.mean);
765 },
766
767 get mode() {
768 return Math.exp(this.normalDistribution_.mean -
769 this.normalDistribution_.variance);
770 }
771 };
772
773 /**
774 * Instead of describing a LogNormalDistribution in terms of its "location"
775 * and "shape", it can also be described in terms of its median
776 * and the point at which its complementary cumulative distribution
777 * function bends between the linear-ish region in the middle and the
778 * exponential-ish region. When the distribution is used to compute
779 * percentiles for log-normal random processes such as latency, as the latency
780 * improves, it hits a point of diminishing returns, when it becomes
781 * relatively difficult to improve the score further. This point of
782 * diminishing returns is the first x-intercept of the third derivative of the
783 * CDF, which is the second derivative of the PDF.
784 *
785 * https://www.desmos.com/calculator/cg5rnftabn
786 *
787 * @param {number} median The median of the distribution.
788 * @param {number} diminishingReturns The point of diminishing returns.
789 * @return {LogNormalDistribution}
790 */
791 Statistics.LogNormalDistribution.fromMedianAndDiminishingReturns =
792 function(median, diminishingReturns) {
793 diminishingReturns = Math.log(diminishingReturns / median);
794 var shape = Math.sqrt(1 - 3 * diminishingReturns -
795 Math.sqrt(Math.pow(diminishingReturns - 3, 2) - 8)) / 2;
796 var location = Math.log(median);
797 return new Statistics.LogNormalDistribution(location, shape);
798 };
799
800 // p-values less than this indicate statistical significance.
801 Statistics.DEFAULT_ALPHA = 0.01;
802
803 // If a statistical significant difference has not been established with
804 // this many observations per sample, we'll assume none exists.
805 Statistics.MAX_SUGGESTED_SAMPLE_SIZE = 20;
806
807 /** @enum */
808 Statistics.Significance = {
809 SIGNIFICANT: 'REJECT',
810 INSIGNIFICANT: 'FAIL_TO_REJECT',
811 NEED_MORE_DATA: 'NEED_MORE_DATA',
812 DONT_CARE: 'DONT_CARE',
813 };
814
815
816 /**
817 * @typedef {Object} HypothesisTestResult
818 * @property {number} p
819 * @property {number} U
820 * @property {!tr.b.Statistics.Significance} significance
821 */
822
823 /**
824 * @param {!Array.<number>} a
825 * @param {!Array.<number>} b
826 * @param {number=} opt_alpha
827 * @param {number=} opt_reqSampleSize
828 * @return {!HypothesisTestResult}
829 */
830 Statistics.mwu = function(a, b, opt_alpha, opt_reqSampleSize) {
831 var result = mannwhitneyu.test(a, b);
832 var alpha = opt_alpha || Statistics.DEFAULT_ALPHA;
833 if (result.p < alpha) {
834 result.significance = Statistics.Significance.SIGNIFICANT;
835 } else if (opt_reqSampleSize && (a.length < opt_reqSampleSize ||
836 b.length < opt_reqSampleSize)) {
837 result.significance = Statistics.Significance.NEED_MORE_DATA;
838 } else {
839 result.significance = Statistics.Significance.INSIGNIFICANT;
840 }
841 return result;
842 };
843
844 return {
845 Statistics,
846 };
847 });
848 </script>
OLDNEW
« no previous file with comments | « tracing/tracing/base/sorted_array_utils_test.html ('k') | tracing/tracing/base/statistics_test.html » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698