| OLD | NEW |
| (Empty) |
| 1 <!DOCTYPE html> | |
| 2 <!-- | |
| 3 Copyright 2014 The Chromium Authors. All rights reserved. | |
| 4 Use of this source code is governed by a BSD-style license that can be | |
| 5 found in the LICENSE file. | |
| 6 --> | |
| 7 | |
| 8 <link rel="import" href="/tracing/base/math.html"> | |
| 9 <link rel="import" href="/tracing/base/range.html"> | |
| 10 <script src="/mannwhitneyu/mannwhitneyu.js"></script> | |
| 11 | |
| 12 <script> | |
| 13 'use strict'; | |
| 14 | |
| 15 // In node, the script-src for mannwhitneyu above brings in mannwhitneyui | |
| 16 // into a module, instead of into the global scope. Whereas this file | |
| 17 // assumes that mannwhitneyu is in the global scope. So, in Node only, we | |
| 18 // require() it in, and then take all its exports and shove them into the | |
| 19 // global scope by hand. | |
| 20 (function(global) { | |
| 21 if (tr.isNode) { | |
| 22 var mwuAbsPath = HTMLImportsLoader.hrefToAbsolutePath( | |
| 23 '/mannwhitneyu.js'); | |
| 24 var mwuModule = require(mwuAbsPath); | |
| 25 for (var exportName in mwuModule) { | |
| 26 global[exportName] = mwuModule[exportName]; | |
| 27 } | |
| 28 } | |
| 29 })(this); | |
| 30 </script> | |
| 31 | |
| 32 <script> | |
| 33 'use strict'; | |
| 34 | |
| 35 // TODO(charliea): Remove: | |
| 36 /* eslint-disable catapult-camelcase */ | |
| 37 | |
| 38 tr.exportTo('tr.b', function() { | |
| 39 var identity = x => x; | |
| 40 | |
| 41 var Statistics = {}; | |
| 42 | |
| 43 /* Returns the quotient, or zero if the denominator is zero.*/ | |
| 44 Statistics.divideIfPossibleOrZero = function(numerator, denominator) { | |
| 45 if (denominator === 0) | |
| 46 return 0; | |
| 47 return numerator / denominator; | |
| 48 }; | |
| 49 | |
| 50 Statistics.sum = function(ary, opt_func, opt_this) { | |
| 51 var func = opt_func || identity; | |
| 52 var ret = 0; | |
| 53 var i = 0; | |
| 54 for (var elt of ary) | |
| 55 ret += func.call(opt_this, elt, i++); | |
| 56 return ret; | |
| 57 }; | |
| 58 | |
| 59 Statistics.mean = function(ary, opt_func, opt_this) { | |
| 60 var func = opt_func || identity; | |
| 61 var sum = 0; | |
| 62 var i = 0; | |
| 63 | |
| 64 for (var elt of ary) | |
| 65 sum += func.call(opt_this, elt, i++); | |
| 66 | |
| 67 if (i === 0) | |
| 68 return undefined; | |
| 69 | |
| 70 return sum / i; | |
| 71 }; | |
| 72 | |
| 73 Statistics.geometricMean = function(ary, opt_func, opt_this) { | |
| 74 var func = opt_func || identity; | |
| 75 var i = 0; | |
| 76 var logsum = 0; | |
| 77 | |
| 78 // The geometric mean is expressed as the arithmetic mean of logarithms | |
| 79 // in order to prevent overflow. | |
| 80 for (var elt of ary) { | |
| 81 var x = func.call(opt_this, elt, i++); | |
| 82 if (x <= 0) | |
| 83 return 0; | |
| 84 logsum += Math.log(Math.abs(x)); | |
| 85 } | |
| 86 | |
| 87 if (i === 0) | |
| 88 return 1; | |
| 89 | |
| 90 return Math.exp(logsum / i); | |
| 91 }; | |
| 92 | |
| 93 // Returns undefined if the sum of the weights is zero. | |
| 94 Statistics.weightedMean = function( | |
| 95 ary, weightCallback, opt_valueCallback, opt_this) { | |
| 96 var valueCallback = opt_valueCallback || identity; | |
| 97 var numerator = 0; | |
| 98 var denominator = 0; | |
| 99 var i = -1; | |
| 100 | |
| 101 for (var elt of ary) { | |
| 102 i++; | |
| 103 var value = valueCallback.call(opt_this, elt, i); | |
| 104 if (value === undefined) | |
| 105 continue; | |
| 106 var weight = weightCallback.call(opt_this, elt, i, value); | |
| 107 numerator += weight * value; | |
| 108 denominator += weight; | |
| 109 } | |
| 110 | |
| 111 if (denominator === 0) | |
| 112 return undefined; | |
| 113 | |
| 114 return numerator / denominator; | |
| 115 }; | |
| 116 | |
| 117 Statistics.variance = function(ary, opt_func, opt_this) { | |
| 118 if (ary.length === 0) | |
| 119 return undefined; | |
| 120 if (ary.length === 1) | |
| 121 return 0; | |
| 122 var func = opt_func || identity; | |
| 123 var mean = Statistics.mean(ary, func, opt_this); | |
| 124 var sumOfSquaredDistances = Statistics.sum( | |
| 125 ary, | |
| 126 function(d, i) { | |
| 127 var v = func.call(this, d, i) - mean; | |
| 128 return v * v; | |
| 129 }, | |
| 130 opt_this); | |
| 131 return sumOfSquaredDistances / (ary.length - 1); | |
| 132 }; | |
| 133 | |
| 134 Statistics.stddev = function(ary, opt_func, opt_this) { | |
| 135 if (ary.length === 0) | |
| 136 return undefined; | |
| 137 return Math.sqrt( | |
| 138 Statistics.variance(ary, opt_func, opt_this)); | |
| 139 }; | |
| 140 | |
| 141 Statistics.max = function(ary, opt_func, opt_this) { | |
| 142 var func = opt_func || identity; | |
| 143 var ret = -Infinity; | |
| 144 var i = 0; | |
| 145 for (var elt of ary) | |
| 146 ret = Math.max(ret, func.call(opt_this, elt, i++)); | |
| 147 return ret; | |
| 148 }; | |
| 149 | |
| 150 Statistics.min = function(ary, opt_func, opt_this) { | |
| 151 var func = opt_func || identity; | |
| 152 var ret = Infinity; | |
| 153 var i = 0; | |
| 154 for (var elt of ary) | |
| 155 ret = Math.min(ret, func.call(opt_this, elt, i++)); | |
| 156 return ret; | |
| 157 }; | |
| 158 | |
| 159 Statistics.range = function(ary, opt_func, opt_this) { | |
| 160 var func = opt_func || identity; | |
| 161 var ret = new tr.b.Range(); | |
| 162 var i = 0; | |
| 163 for (var elt of ary) | |
| 164 ret.addValue(func.call(opt_this, elt, i++)); | |
| 165 return ret; | |
| 166 }; | |
| 167 | |
| 168 Statistics.percentile = function(ary, percent, opt_func, opt_this) { | |
| 169 if (!(percent >= 0 && percent <= 1)) | |
| 170 throw new Error('percent must be [0,1]'); | |
| 171 | |
| 172 var func = opt_func || identity; | |
| 173 var tmp = new Array(ary.length); | |
| 174 var i = 0; | |
| 175 for (var elt of ary) | |
| 176 tmp[i] = func.call(opt_this, elt, i++); | |
| 177 tmp.sort((a, b) => a - b); | |
| 178 var idx = Math.floor((ary.length - 1) * percent); | |
| 179 return tmp[idx]; | |
| 180 }; | |
| 181 | |
| 182 /** | |
| 183 * Sorts the samples, and map them linearly to the range [0,1]. | |
| 184 * | |
| 185 * They're mapped such that for the N samples, the first sample is 0.5/N and | |
| 186 * the last sample is (N-0.5)/N. | |
| 187 * | |
| 188 * Background: The discrepancy of the sample set i/(N-1); i=0, ..., N-1 is | |
| 189 * 2/N, twice the discrepancy of the sample set (i+1/2)/N; i=0, ..., N-1. In | |
| 190 * our case we don't want to distinguish between these two cases, as our | |
| 191 * original domain is not bounded (it is for Monte Carlo integration, where | |
| 192 * discrepancy was first used). | |
| 193 **/ | |
| 194 Statistics.normalizeSamples = function(samples) { | |
| 195 if (samples.length === 0) { | |
| 196 return { | |
| 197 normalized_samples: samples, | |
| 198 scale: 1.0 | |
| 199 }; | |
| 200 } | |
| 201 // Create a copy to make sure that we don't mutate original |samples| input. | |
| 202 samples = samples.slice().sort( | |
| 203 function(a, b) { | |
| 204 return a - b; | |
| 205 } | |
| 206 ); | |
| 207 var low = Math.min.apply(null, samples); | |
| 208 var high = Math.max.apply(null, samples); | |
| 209 var newLow = 0.5 / samples.length; | |
| 210 var newHigh = (samples.length - 0.5) / samples.length; | |
| 211 if (high - low === 0.0) { | |
| 212 // Samples is an array of 0.5 in this case. | |
| 213 samples = Array.apply(null, new Array(samples.length)).map( | |
| 214 function() { return 0.5;}); | |
| 215 return { | |
| 216 normalized_samples: samples, | |
| 217 scale: 1.0 | |
| 218 }; | |
| 219 } | |
| 220 var scale = (newHigh - newLow) / (high - low); | |
| 221 for (var i = 0; i < samples.length; i++) { | |
| 222 samples[i] = (samples[i] - low) * scale + newLow; | |
| 223 } | |
| 224 return { | |
| 225 normalized_samples: samples, | |
| 226 scale: scale | |
| 227 }; | |
| 228 }; | |
| 229 | |
| 230 /** | |
| 231 * Computes the discrepancy of a set of 1D samples from the interval [0,1]. | |
| 232 * | |
| 233 * The samples must be sorted. We define the discrepancy of an empty set | |
| 234 * of samples to be zero. | |
| 235 * | |
| 236 * http://en.wikipedia.org/wiki/Low-discrepancy_sequence | |
| 237 * http://mathworld.wolfram.com/Discrepancy.html | |
| 238 */ | |
| 239 Statistics.discrepancy = function(samples, opt_locationCount) { | |
| 240 if (samples.length === 0) | |
| 241 return 0.0; | |
| 242 | |
| 243 var maxLocalDiscrepancy = 0; | |
| 244 var invSampleCount = 1.0 / samples.length; | |
| 245 var locations = []; | |
| 246 // For each location, stores the number of samples less than that location. | |
| 247 var countLess = []; | |
| 248 // For each location, stores the number of samples less than or equal to | |
| 249 // that location. | |
| 250 var countLessEqual = []; | |
| 251 | |
| 252 if (opt_locationCount !== undefined) { | |
| 253 // Generate list of equally spaced locations. | |
| 254 var sampleIndex = 0; | |
| 255 for (var i = 0; i < opt_locationCount; i++) { | |
| 256 var location = i / (opt_locationCount - 1); | |
| 257 locations.push(location); | |
| 258 while (sampleIndex < samples.length && | |
| 259 samples[sampleIndex] < location) { | |
| 260 sampleIndex += 1; | |
| 261 } | |
| 262 countLess.push(sampleIndex); | |
| 263 while (sampleIndex < samples.length && | |
| 264 samples[sampleIndex] <= location) { | |
| 265 sampleIndex += 1; | |
| 266 } | |
| 267 countLessEqual.push(sampleIndex); | |
| 268 } | |
| 269 } else { | |
| 270 // Populate locations with sample positions. Append 0 and 1 if necessary. | |
| 271 if (samples[0] > 0.0) { | |
| 272 locations.push(0.0); | |
| 273 countLess.push(0); | |
| 274 countLessEqual.push(0); | |
| 275 } | |
| 276 for (var i = 0; i < samples.length; i++) { | |
| 277 locations.push(samples[i]); | |
| 278 countLess.push(i); | |
| 279 countLessEqual.push(i + 1); | |
| 280 } | |
| 281 if (samples[-1] < 1.0) { | |
| 282 locations.push(1.0); | |
| 283 countLess.push(samples.length); | |
| 284 countLessEqual.push(samples.length); | |
| 285 } | |
| 286 } | |
| 287 | |
| 288 // Compute discrepancy as max(overshoot, -undershoot), where | |
| 289 // overshoot = max(countClosed(i, j)/N - length(i, j)) for all i < j, | |
| 290 // undershoot = min(countOpen(i, j)/N - length(i, j)) for all i < j, | |
| 291 // N = len(samples), | |
| 292 // countClosed(i, j) is the number of points between i and j | |
| 293 // including ends, | |
| 294 // countOpen(i, j) is the number of points between i and j excluding ends, | |
| 295 // length(i, j) is locations[i] - locations[j]. | |
| 296 | |
| 297 // The following algorithm is modification of Kadane's algorithm, | |
| 298 // see https://en.wikipedia.org/wiki/Maximum_subarray_problem. | |
| 299 | |
| 300 // The maximum of (countClosed(k, i-1)/N - length(k, i-1)) for any k < i-1. | |
| 301 var maxDiff = 0; | |
| 302 // The minimum of (countOpen(k, i-1)/N - length(k, i-1)) for any k < i-1. | |
| 303 var minDiff = 0; | |
| 304 for (var i = 1; i < locations.length; i++) { | |
| 305 var length = locations[i] - locations[i - 1]; | |
| 306 var countClosed = countLessEqual[i] - countLess[i - 1]; | |
| 307 var countOpen = countLess[i] - countLessEqual[i - 1]; | |
| 308 // Number of points that are added if we extend a closed range that | |
| 309 // ends at location (i-1). | |
| 310 var countClosedIncrement = | |
| 311 countLessEqual[i] - countLessEqual[i - 1]; | |
| 312 // Number of points that are added if we extend an open range that | |
| 313 // ends at location (i-1). | |
| 314 var countOpenIncrement = countLess[i] - countLess[i - 1]; | |
| 315 | |
| 316 // Either extend the previous optimal range or start a new one. | |
| 317 maxDiff = Math.max( | |
| 318 countClosedIncrement * invSampleCount - length + maxDiff, | |
| 319 countClosed * invSampleCount - length); | |
| 320 minDiff = Math.min( | |
| 321 countOpenIncrement * invSampleCount - length + minDiff, | |
| 322 countOpen * invSampleCount - length); | |
| 323 | |
| 324 maxLocalDiscrepancy = Math.max( | |
| 325 maxDiff, -minDiff, maxLocalDiscrepancy); | |
| 326 } | |
| 327 return maxLocalDiscrepancy; | |
| 328 }; | |
| 329 | |
| 330 /** | |
| 331 * A discrepancy based metric for measuring timestamp jank. | |
| 332 * | |
| 333 * timestampsDiscrepancy quantifies the largest area of jank observed in a | |
| 334 * series of timestamps. Note that this is different from metrics based on | |
| 335 * the max_time_interval. For example, the time stamp series A = [0,1,2,3,5,6] | |
| 336 * and B = [0,1,2,3,5,7] have the same max_time_interval = 2, but | |
| 337 * Discrepancy(B) > Discrepancy(A). | |
| 338 * | |
| 339 * Two variants of discrepancy can be computed: | |
| 340 * | |
| 341 * Relative discrepancy is following the original definition of | |
| 342 * discrepancy. It characterized the largest area of jank, relative to the | |
| 343 * duration of the entire time stamp series. We normalize the raw results, | |
| 344 * because the best case discrepancy for a set of N samples is 1/N (for | |
| 345 * equally spaced samples), and we want our metric to report 0.0 in that | |
| 346 * case. | |
| 347 * | |
| 348 * Absolute discrepancy also characterizes the largest area of jank, but its | |
| 349 * value wouldn't change (except for imprecisions due to a low | |
| 350 * |interval_multiplier|) if additional 'good' intervals were added to an | |
| 351 * exisiting list of time stamps. Its range is [0,inf] and the unit is | |
| 352 * milliseconds. | |
| 353 * | |
| 354 * The time stamp series C = [0,2,3,4] and D = [0,2,3,4,5] have the same | |
| 355 * absolute discrepancy, but D has lower relative discrepancy than C. | |
| 356 * | |
| 357 * |timestamps| may be a list of lists S = [S_1, S_2, ..., S_N], where each | |
| 358 * S_i is a time stamp series. In that case, the discrepancy D(S) is: | |
| 359 * D(S) = max(D(S_1), D(S_2), ..., D(S_N)) | |
| 360 **/ | |
| 361 Statistics.timestampsDiscrepancy = function(timestamps, opt_absolute, | |
| 362 opt_locationCount) { | |
| 363 if (timestamps.length === 0) | |
| 364 return 0.0; | |
| 365 | |
| 366 if (opt_absolute === undefined) | |
| 367 opt_absolute = true; | |
| 368 | |
| 369 if (Array.isArray(timestamps[0])) { | |
| 370 var rangeDiscrepancies = timestamps.map(function(r) { | |
| 371 return Statistics.timestampsDiscrepancy(r); | |
| 372 }); | |
| 373 return Math.max.apply(null, rangeDiscrepancies); | |
| 374 } | |
| 375 | |
| 376 var s = Statistics.normalizeSamples(timestamps); | |
| 377 var samples = s.normalized_samples; | |
| 378 var sampleScale = s.scale; | |
| 379 var discrepancy = Statistics.discrepancy(samples, opt_locationCount); | |
| 380 var invSampleCount = 1.0 / samples.length; | |
| 381 if (opt_absolute === true) { | |
| 382 // Compute absolute discrepancy | |
| 383 discrepancy /= sampleScale; | |
| 384 } else { | |
| 385 // Compute relative discrepancy | |
| 386 discrepancy = tr.b.clamp( | |
| 387 (discrepancy - invSampleCount) / (1.0 - invSampleCount), 0.0, 1.0); | |
| 388 } | |
| 389 return discrepancy; | |
| 390 }; | |
| 391 | |
| 392 /** | |
| 393 * A discrepancy based metric for measuring duration jank. | |
| 394 * | |
| 395 * DurationsDiscrepancy computes a jank metric which measures how irregular a | |
| 396 * given sequence of intervals is. In order to minimize jank, each duration | |
| 397 * should be equally long. This is similar to how timestamp jank works, | |
| 398 * and we therefore reuse the timestamp discrepancy function above to compute | |
| 399 * a similar duration discrepancy number. | |
| 400 * | |
| 401 * Because timestamp discrepancy is defined in terms of timestamps, we first | |
| 402 * convert the list of durations to monotonically increasing timestamps. | |
| 403 * | |
| 404 * Args: | |
| 405 * durations: List of interval lengths in milliseconds. | |
| 406 * absolute: See TimestampsDiscrepancy. | |
| 407 * opt_locationCount: See TimestampsDiscrepancy. | |
| 408 **/ | |
| 409 Statistics.durationsDiscrepancy = function( | |
| 410 durations, opt_absolute, opt_locationCount) { | |
| 411 if (durations.length === 0) | |
| 412 return 0.0; | |
| 413 | |
| 414 var timestamps = durations.reduce(function(prev, curr, index, array) { | |
| 415 prev.push(prev[prev.length - 1] + curr); | |
| 416 return prev; | |
| 417 }, [0]); | |
| 418 return Statistics.timestampsDiscrepancy( | |
| 419 timestamps, opt_absolute, opt_locationCount); | |
| 420 }; | |
| 421 | |
| 422 /** | |
| 423 * Modifies |samples| in-place to reduce its length down to |count|. | |
| 424 * | |
| 425 * @param {!Array} samples | |
| 426 * @param {number} count | |
| 427 * @return {!Array} | |
| 428 */ | |
| 429 Statistics.uniformlySampleArray = function(samples, count) { | |
| 430 if (samples.length <= count) { | |
| 431 return samples; | |
| 432 } | |
| 433 while (samples.length > count) { | |
| 434 var i = parseInt(Math.random() * samples.length); | |
| 435 samples.splice(i, 1); | |
| 436 } | |
| 437 return samples; | |
| 438 }; | |
| 439 | |
| 440 /** | |
| 441 * A mechanism to uniformly sample elements from an arbitrary long stream. | |
| 442 * | |
| 443 * Call this method every time a new element is obtained from the stream, | |
| 444 * passing always the same |samples| array and the |numSamples| you desire. | |
| 445 * Also pass in the current |streamLength|, which is the same as the index of | |
| 446 * |newElement| within that stream. | |
| 447 * | |
| 448 * The |samples| array will possibly be updated, replacing one of its element | |
| 449 * with |newElements|. The length of |samples| will not be more than | |
| 450 * |numSamples|. | |
| 451 * | |
| 452 * This method guarantees that after |streamLength| elements have been | |
| 453 * processed each one has equal probability of being in |samples|. The order | |
| 454 * of samples is not preserved though. | |
| 455 * | |
| 456 * Args: | |
| 457 * samples: Array of elements that have already been selected. Start with []. | |
| 458 * streamLength: The current length of the stream, up to |newElement|. | |
| 459 * newElement: The element that was just extracted from the stream. | |
| 460 * numSamples: The total number of samples desired. | |
| 461 **/ | |
| 462 Statistics.uniformlySampleStream = function(samples, streamLength, newElement, | |
| 463 numSamples) { | |
| 464 if (streamLength <= numSamples) { | |
| 465 if (samples.length >= streamLength) | |
| 466 samples[streamLength - 1] = newElement; | |
| 467 else | |
| 468 samples.push(newElement); | |
| 469 return; | |
| 470 } | |
| 471 | |
| 472 var probToKeep = numSamples / streamLength; | |
| 473 if (Math.random() > probToKeep) | |
| 474 return; // New sample was rejected. | |
| 475 | |
| 476 // Keeping it, replace an alement randomly. | |
| 477 var index = Math.floor(Math.random() * numSamples); | |
| 478 samples[index] = newElement; | |
| 479 }; | |
| 480 | |
| 481 /** | |
| 482 * A mechanism to merge two arrays of uniformly sampled elements in a way that | |
| 483 * ensures elements in the final array are still sampled uniformly. | |
| 484 * | |
| 485 * This works similarly to sampleStreamUniform. The |samplesA| array will be | |
| 486 * updated, some of its elements replaced by elements from |samplesB| in a | |
| 487 * way that ensure that elements will be sampled uniformly. | |
| 488 * | |
| 489 * Args: | |
| 490 * samplesA: Array of uniformly sampled elements, will be updated. | |
| 491 * streamLengthA: The length of the stream from which |samplesA| was sampled. | |
| 492 * samplesB: Other array of uniformly sampled elements, will NOT be updated. | |
| 493 * streamLengthB: The length of the stream from which |samplesB| was sampled. | |
| 494 * numSamples: The total number of samples desired, both in |samplesA| and | |
| 495 * |samplesB|. | |
| 496 **/ | |
| 497 Statistics.mergeSampledStreams = function( | |
| 498 samplesA, streamLengthA, | |
| 499 samplesB, streamLengthB, numSamples) { | |
| 500 if (streamLengthB < numSamples) { | |
| 501 // samplesB has not reached max capacity so every sample of stream B were | |
| 502 // chosen with certainty. Add them one by one into samplesA. | |
| 503 var nbElements = Math.min(streamLengthB, samplesB.length); | |
| 504 for (var i = 0; i < nbElements; ++i) { | |
| 505 Statistics.uniformlySampleStream(samplesA, streamLengthA + i + 1, | |
| 506 samplesB[i], numSamples); | |
| 507 } | |
| 508 return; | |
| 509 } | |
| 510 if (streamLengthA < numSamples) { | |
| 511 // samplesA has not reached max capacity so every sample of stream A were | |
| 512 // chosen with certainty. Add them one by one into samplesB. | |
| 513 var nbElements = Math.min(streamLengthA, samplesA.length); | |
| 514 var tempSamples = samplesB.slice(); | |
| 515 for (var i = 0; i < nbElements; ++i) { | |
| 516 Statistics.uniformlySampleStream(tempSamples, streamLengthB + i + 1, | |
| 517 samplesA[i], numSamples); | |
| 518 } | |
| 519 // Copy that back into the first vector. | |
| 520 for (var i = 0; i < tempSamples.length; ++i) { | |
| 521 samplesA[i] = tempSamples[i]; | |
| 522 } | |
| 523 return; | |
| 524 } | |
| 525 | |
| 526 // Both sample arrays are at max capacity, use the power of maths! | |
| 527 // Elements in samplesA have been selected with probability | |
| 528 // numSamples / streamLengthA. Same for samplesB. For each index of the | |
| 529 // array we keep samplesA[i] with probability | |
| 530 // P = streamLengthA / (streamLengthA + streamLengthB) | |
| 531 // and replace it with samplesB[i] with probability 1-P. | |
| 532 // The total probability of keeping it is therefore | |
| 533 // numSamples / streamLengthA * | |
| 534 // streamLengthA / (streamLengthA + streamLengthB) | |
| 535 // = numSamples / (streamLengthA + streamLengthB) | |
| 536 // A similar computation shows we have the same probability of keeping any | |
| 537 // element in samplesB. Magic! | |
| 538 var nbElements = Math.min(numSamples, samplesB.length); | |
| 539 var probOfSwapping = streamLengthB / (streamLengthA + streamLengthB); | |
| 540 for (var i = 0; i < nbElements; ++i) { | |
| 541 if (Math.random() < probOfSwapping) { | |
| 542 samplesA[i] = samplesB[i]; | |
| 543 } | |
| 544 } | |
| 545 }; | |
| 546 | |
| 547 /* Continuous distributions are defined by probability density functions. | |
| 548 * | |
| 549 * Random variables are referred to by capital letters: X, Y, Z. | |
| 550 * Particular values from these distributions are referred to by lowercase | |
| 551 * letters like |x|. | |
| 552 * The probability that |X| ever exactly equals |x| is P(X==x) = 0. | |
| 553 * | |
| 554 * For a discrete probability distribution, see tr.v.Histogram. | |
| 555 */ | |
| 556 function Distribution() { | |
| 557 } | |
| 558 | |
| 559 Distribution.prototype = { | |
| 560 /* The probability density of the random variable at value |x| is the | |
| 561 * relative likelihood for this random variable to take on the given value | |
| 562 * |x|. | |
| 563 * | |
| 564 * @param {number} x A value from the random distribution. | |
| 565 * @return {number} probability density at x. | |
| 566 */ | |
| 567 computeDensity: function(x) { | |
| 568 throw Error('Not implemented'); | |
| 569 }, | |
| 570 | |
| 571 /* A percentile is the probability that a sample from the distribution is | |
| 572 * less than the given value |x|. This function is monotonically increasing. | |
| 573 * | |
| 574 * @param {number} x A value from the random distribution. | |
| 575 * @return {number} P(X<x). | |
| 576 */ | |
| 577 computePercentile: function(x) { | |
| 578 throw Error('Not implemented'); | |
| 579 }, | |
| 580 | |
| 581 /* A complementary percentile is the probability that a sample from the | |
| 582 * distribution is greater than the given value |x|. This function is | |
| 583 * monotonically decreasing. | |
| 584 * | |
| 585 * @param {number} x A value from the random distribution. | |
| 586 * @return {number} P(X>x). | |
| 587 */ | |
| 588 computeComplementaryPercentile: function(x) { | |
| 589 return 1 - this.computePercentile(x); | |
| 590 }, | |
| 591 | |
| 592 /* Compute the mean of the probability distribution. | |
| 593 * | |
| 594 * @return {number} mean. | |
| 595 */ | |
| 596 get mean() { | |
| 597 throw Error('Not implemented'); | |
| 598 }, | |
| 599 | |
| 600 /* The mode of a distribution is the most likely value. | |
| 601 * The maximum of the computeDensity() function is at this mode. | |
| 602 * @return {number} mode. | |
| 603 */ | |
| 604 get mode() { | |
| 605 throw Error('Not implemented'); | |
| 606 }, | |
| 607 | |
| 608 /* The median is the center value of the distribution. | |
| 609 * computePercentile(median) = computeComplementaryPercentile(median) = 0.5 | |
| 610 * | |
| 611 * @return {number} median. | |
| 612 */ | |
| 613 get median() { | |
| 614 throw Error('Not implemented'); | |
| 615 }, | |
| 616 | |
| 617 /* The standard deviation is a measure of how dispersed or spread out the | |
| 618 * distribution is (this statistic has the same units as the values). | |
| 619 * | |
| 620 * @return {number} standard deviation. | |
| 621 */ | |
| 622 get standardDeviation() { | |
| 623 throw Error('Not implemented'); | |
| 624 }, | |
| 625 | |
| 626 /* An alternative measure of how spread out the distribution is, | |
| 627 * the variance is the square of the standard deviation. | |
| 628 * @return {number} variance. | |
| 629 */ | |
| 630 get variance() { | |
| 631 throw Error('Not implemented'); | |
| 632 } | |
| 633 }; | |
| 634 | |
| 635 Statistics.UniformDistribution = function(opt_range) { | |
| 636 if (!opt_range) | |
| 637 opt_range = tr.b.Range.fromExplicitRange(0, 1); | |
| 638 this.range = opt_range; | |
| 639 }; | |
| 640 | |
| 641 Statistics.UniformDistribution.prototype = { | |
| 642 __proto__: Distribution.prototype, | |
| 643 | |
| 644 computeDensity: function(x) { | |
| 645 return 1 / this.range.range; | |
| 646 }, | |
| 647 | |
| 648 computePercentile: function(x) { | |
| 649 return tr.b.normalize(x, this.range.min, this.range.max); | |
| 650 }, | |
| 651 | |
| 652 get mean() { | |
| 653 return this.range.center; | |
| 654 }, | |
| 655 | |
| 656 get mode() { | |
| 657 return undefined; | |
| 658 }, | |
| 659 | |
| 660 get median() { | |
| 661 return this.mean; | |
| 662 }, | |
| 663 | |
| 664 get standardDeviation() { | |
| 665 return Math.sqrt(this.variance); | |
| 666 }, | |
| 667 | |
| 668 get variance() { | |
| 669 return Math.pow(this.range.range, 2) / 12; | |
| 670 } | |
| 671 }; | |
| 672 | |
| 673 /* The Normal or Gaussian distribution, or bell curve, is common in complex | |
| 674 * processes such as are found in many of the natural sciences. If Z is the | |
| 675 * standard normal distribution with mean = 0 and variance = 1, then the | |
| 676 * general normal distribution is Y = mean + Z*sqrt(variance). | |
| 677 * https://www.desmos.com/calculator/tqtbjm4s3z | |
| 678 */ | |
| 679 Statistics.NormalDistribution = function(opt_mean, opt_variance) { | |
| 680 this.mean_ = opt_mean || 0; | |
| 681 this.variance_ = opt_variance || 1; | |
| 682 this.standardDeviation_ = Math.sqrt(this.variance_); | |
| 683 }; | |
| 684 | |
| 685 Statistics.NormalDistribution.prototype = { | |
| 686 __proto__: Distribution.prototype, | |
| 687 | |
| 688 computeDensity: function(x) { | |
| 689 var scale = (1.0 / (this.standardDeviation * Math.sqrt(2.0 * Math.PI))); | |
| 690 var exponent = -Math.pow(x - this.mean, 2) / (2.0 * this.variance); | |
| 691 return scale * Math.exp(exponent); | |
| 692 }, | |
| 693 | |
| 694 computePercentile: function(x) { | |
| 695 var standardizedX = ((x - this.mean) / | |
| 696 Math.sqrt(2.0 * this.variance)); | |
| 697 return (1.0 + tr.b.erf(standardizedX)) / 2.0; | |
| 698 }, | |
| 699 | |
| 700 get mean() { | |
| 701 return this.mean_; | |
| 702 }, | |
| 703 | |
| 704 get median() { | |
| 705 return this.mean; | |
| 706 }, | |
| 707 | |
| 708 get mode() { | |
| 709 return this.mean; | |
| 710 }, | |
| 711 | |
| 712 get standardDeviation() { | |
| 713 return this.standardDeviation_; | |
| 714 }, | |
| 715 | |
| 716 get variance() { | |
| 717 return this.variance_; | |
| 718 } | |
| 719 }; | |
| 720 | |
| 721 /* The log-normal distribution is a continuous probability distribution of a | |
| 722 * random variable whose logarithm is normally distributed. | |
| 723 * If Y is the general normal distribution, then X = exp(Y) is the general | |
| 724 * log-normal distribution. | |
| 725 * X will have different parameters from Y, | |
| 726 * so the mean of Y is called the "location" of X, | |
| 727 * and the standard deviation of Y is called the "shape" of X. | |
| 728 * The standard lognormal distribution exp(Z) has location = 0 and shape = 1. | |
| 729 * https://www.desmos.com/calculator/tqtbjm4s3z | |
| 730 */ | |
| 731 Statistics.LogNormalDistribution = function(opt_location, opt_shape) { | |
| 732 this.normalDistribution_ = new Statistics.NormalDistribution( | |
| 733 opt_location, Math.pow(opt_shape || 1, 2)); | |
| 734 }; | |
| 735 | |
| 736 Statistics.LogNormalDistribution.prototype = { | |
| 737 __proto__: Statistics.NormalDistribution.prototype, | |
| 738 | |
| 739 computeDensity: function(x) { | |
| 740 return this.normalDistribution_.computeDensity(Math.log(x)) / x; | |
| 741 }, | |
| 742 | |
| 743 computePercentile: function(x) { | |
| 744 return this.normalDistribution_.computePercentile(Math.log(x)); | |
| 745 }, | |
| 746 | |
| 747 get mean() { | |
| 748 return Math.exp(this.normalDistribution_.mean + | |
| 749 (this.normalDistribution_.variance / 2)); | |
| 750 }, | |
| 751 | |
| 752 get variance() { | |
| 753 var nm = this.normalDistribution_.mean; | |
| 754 var nv = this.normalDistribution_.variance; | |
| 755 return (Math.exp(2 * (nm + nv)) - | |
| 756 Math.exp(2 * nm + nv)); | |
| 757 }, | |
| 758 | |
| 759 get standardDeviation() { | |
| 760 return Math.sqrt(this.variance); | |
| 761 }, | |
| 762 | |
| 763 get median() { | |
| 764 return Math.exp(this.normalDistribution_.mean); | |
| 765 }, | |
| 766 | |
| 767 get mode() { | |
| 768 return Math.exp(this.normalDistribution_.mean - | |
| 769 this.normalDistribution_.variance); | |
| 770 } | |
| 771 }; | |
| 772 | |
| 773 /** | |
| 774 * Instead of describing a LogNormalDistribution in terms of its "location" | |
| 775 * and "shape", it can also be described in terms of its median | |
| 776 * and the point at which its complementary cumulative distribution | |
| 777 * function bends between the linear-ish region in the middle and the | |
| 778 * exponential-ish region. When the distribution is used to compute | |
| 779 * percentiles for log-normal random processes such as latency, as the latency | |
| 780 * improves, it hits a point of diminishing returns, when it becomes | |
| 781 * relatively difficult to improve the score further. This point of | |
| 782 * diminishing returns is the first x-intercept of the third derivative of the | |
| 783 * CDF, which is the second derivative of the PDF. | |
| 784 * | |
| 785 * https://www.desmos.com/calculator/cg5rnftabn | |
| 786 * | |
| 787 * @param {number} median The median of the distribution. | |
| 788 * @param {number} diminishingReturns The point of diminishing returns. | |
| 789 * @return {LogNormalDistribution} | |
| 790 */ | |
| 791 Statistics.LogNormalDistribution.fromMedianAndDiminishingReturns = | |
| 792 function(median, diminishingReturns) { | |
| 793 diminishingReturns = Math.log(diminishingReturns / median); | |
| 794 var shape = Math.sqrt(1 - 3 * diminishingReturns - | |
| 795 Math.sqrt(Math.pow(diminishingReturns - 3, 2) - 8)) / 2; | |
| 796 var location = Math.log(median); | |
| 797 return new Statistics.LogNormalDistribution(location, shape); | |
| 798 }; | |
| 799 | |
| 800 // p-values less than this indicate statistical significance. | |
| 801 Statistics.DEFAULT_ALPHA = 0.01; | |
| 802 | |
| 803 // If a statistical significant difference has not been established with | |
| 804 // this many observations per sample, we'll assume none exists. | |
| 805 Statistics.MAX_SUGGESTED_SAMPLE_SIZE = 20; | |
| 806 | |
| 807 /** @enum */ | |
| 808 Statistics.Significance = { | |
| 809 SIGNIFICANT: 'REJECT', | |
| 810 INSIGNIFICANT: 'FAIL_TO_REJECT', | |
| 811 NEED_MORE_DATA: 'NEED_MORE_DATA', | |
| 812 DONT_CARE: 'DONT_CARE', | |
| 813 }; | |
| 814 | |
| 815 | |
| 816 /** | |
| 817 * @typedef {Object} HypothesisTestResult | |
| 818 * @property {number} p | |
| 819 * @property {number} U | |
| 820 * @property {!tr.b.Statistics.Significance} significance | |
| 821 */ | |
| 822 | |
| 823 /** | |
| 824 * @param {!Array.<number>} a | |
| 825 * @param {!Array.<number>} b | |
| 826 * @param {number=} opt_alpha | |
| 827 * @param {number=} opt_reqSampleSize | |
| 828 * @return {!HypothesisTestResult} | |
| 829 */ | |
| 830 Statistics.mwu = function(a, b, opt_alpha, opt_reqSampleSize) { | |
| 831 var result = mannwhitneyu.test(a, b); | |
| 832 var alpha = opt_alpha || Statistics.DEFAULT_ALPHA; | |
| 833 if (result.p < alpha) { | |
| 834 result.significance = Statistics.Significance.SIGNIFICANT; | |
| 835 } else if (opt_reqSampleSize && (a.length < opt_reqSampleSize || | |
| 836 b.length < opt_reqSampleSize)) { | |
| 837 result.significance = Statistics.Significance.NEED_MORE_DATA; | |
| 838 } else { | |
| 839 result.significance = Statistics.Significance.INSIGNIFICANT; | |
| 840 } | |
| 841 return result; | |
| 842 }; | |
| 843 | |
| 844 return { | |
| 845 Statistics, | |
| 846 }; | |
| 847 }); | |
| 848 </script> | |
| OLD | NEW |