tracing/tracing/base/statistics.html - Issue 2771723003: [tracing] Move math utilities from base into their own subdirectory (attempt 2)

Side by Side Diff: tracing/tracing/base/statistics.html

Issue 2771723003: [tracing] Move math utilities from base into their own subdirectory (attempt 2) (Closed)

Patch Set: rebase Created 3 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
	(Empty)
1 <!DOCTYPE html>

2 <!--

3 Copyright 2014 The Chromium Authors. All rights reserved.

4 Use of this source code is governed by a BSD-style license that can be

5 found in the LICENSE file.

6 -->

7

8 <link rel="import" href="/tracing/base/math.html">

9 <link rel="import" href="/tracing/base/range.html">

10 <script src="/mannwhitneyu/mannwhitneyu.js"></script>

11

12 <script>

13 'use strict';

14

15 // In node, the script-src for mannwhitneyu above brings in mannwhitneyui

16 // into a module, instead of into the global scope. Whereas this file

17 // assumes that mannwhitneyu is in the global scope. So, in Node only, we

18 // require() it in, and then take all its exports and shove them into the

19 // global scope by hand.

20 (function(global) {

21 if (tr.isNode) {

22 var mwuAbsPath = HTMLImportsLoader.hrefToAbsolutePath(

23 '/mannwhitneyu.js');

24 var mwuModule = require(mwuAbsPath);

25 for (var exportName in mwuModule) {

26 global[exportName] = mwuModule[exportName];

27 }

28 }

29 })(this);

30 </script>

31

32 <script>

33 'use strict';

34

35 // TODO(charliea): Remove:

36 /* eslint-disable catapult-camelcase */

37

38 tr.exportTo('tr.b', function() {

39 var identity = x => x;

40

41 var Statistics = {};

42

43 /* Returns the quotient, or zero if the denominator is zero.*/

44 Statistics.divideIfPossibleOrZero = function(numerator, denominator) {

45 if (denominator === 0)

46 return 0;

47 return numerator / denominator;

48 };

49

50 Statistics.sum = function(ary, opt_func, opt_this) {

51 var func = opt_func \|\| identity;

52 var ret = 0;

53 var i = 0;

54 for (var elt of ary)

55 ret += func.call(opt_this, elt, i++);

56 return ret;

57 };

58

59 Statistics.mean = function(ary, opt_func, opt_this) {

60 var func = opt_func \|\| identity;

61 var sum = 0;

62 var i = 0;

63

64 for (var elt of ary)

65 sum += func.call(opt_this, elt, i++);

66

67 if (i === 0)

68 return undefined;

69

70 return sum / i;

71 };

72

73 Statistics.geometricMean = function(ary, opt_func, opt_this) {

74 var func = opt_func \|\| identity;

75 var i = 0;

76 var logsum = 0;

77

78 // The geometric mean is expressed as the arithmetic mean of logarithms

79 // in order to prevent overflow.

80 for (var elt of ary) {

81 var x = func.call(opt_this, elt, i++);

82 if (x <= 0)

83 return 0;

84 logsum += Math.log(Math.abs(x));

85 }

86

87 if (i === 0)

88 return 1;

89

90 return Math.exp(logsum / i);

91 };

92

93 // Returns undefined if the sum of the weights is zero.

94 Statistics.weightedMean = function(

95 ary, weightCallback, opt_valueCallback, opt_this) {

96 var valueCallback = opt_valueCallback \|\| identity;

97 var numerator = 0;

98 var denominator = 0;

99 var i = -1;

100

101 for (var elt of ary) {

102 i++;

103 var value = valueCallback.call(opt_this, elt, i);

104 if (value === undefined)

105 continue;

106 var weight = weightCallback.call(opt_this, elt, i, value);

107 numerator += weight * value;

108 denominator += weight;

109 }

110

111 if (denominator === 0)

112 return undefined;

113

114 return numerator / denominator;

115 };

116

117 Statistics.variance = function(ary, opt_func, opt_this) {

118 if (ary.length === 0)

119 return undefined;

120 if (ary.length === 1)

121 return 0;

122 var func = opt_func \|\| identity;

123 var mean = Statistics.mean(ary, func, opt_this);

124 var sumOfSquaredDistances = Statistics.sum(

125 ary,

126 function(d, i) {

127 var v = func.call(this, d, i) - mean;

128 return v * v;

129 },

130 opt_this);

131 return sumOfSquaredDistances / (ary.length - 1);

132 };

133

134 Statistics.stddev = function(ary, opt_func, opt_this) {

135 if (ary.length === 0)

136 return undefined;

137 return Math.sqrt(

138 Statistics.variance(ary, opt_func, opt_this));

139 };

140

141 Statistics.max = function(ary, opt_func, opt_this) {

142 var func = opt_func \|\| identity;

143 var ret = -Infinity;

144 var i = 0;

145 for (var elt of ary)

146 ret = Math.max(ret, func.call(opt_this, elt, i++));

147 return ret;

148 };

149

150 Statistics.min = function(ary, opt_func, opt_this) {

151 var func = opt_func \|\| identity;

152 var ret = Infinity;

153 var i = 0;

154 for (var elt of ary)

155 ret = Math.min(ret, func.call(opt_this, elt, i++));

156 return ret;

157 };

158

159 Statistics.range = function(ary, opt_func, opt_this) {

160 var func = opt_func \|\| identity;

161 var ret = new tr.b.Range();

162 var i = 0;

163 for (var elt of ary)

164 ret.addValue(func.call(opt_this, elt, i++));

165 return ret;

166 };

167

168 Statistics.percentile = function(ary, percent, opt_func, opt_this) {

169 if (!(percent >= 0 && percent <= 1))

170 throw new Error('percent must be [0,1]');

171

172 var func = opt_func \|\| identity;

173 var tmp = new Array(ary.length);

174 var i = 0;

175 for (var elt of ary)

176 tmp[i] = func.call(opt_this, elt, i++);

177 tmp.sort((a, b) => a - b);

178 var idx = Math.floor((ary.length - 1) * percent);

179 return tmp[idx];

180 };

181

182 /**

183 * Sorts the samples, and map them linearly to the range [0,1].

184 *

185 * They're mapped such that for the N samples, the first sample is 0.5/N and

186 * the last sample is (N-0.5)/N.

187 *

188 * Background: The discrepancy of the sample set i/(N-1); i=0, ..., N-1 is

189 * 2/N, twice the discrepancy of the sample set (i+1/2)/N; i=0, ..., N-1. In

190 * our case we don't want to distinguish between these two cases, as our

191 * original domain is not bounded (it is for Monte Carlo integration, where

192 * discrepancy was first used).

193 **/

194 Statistics.normalizeSamples = function(samples) {

195 if (samples.length === 0) {

196 return {

197 normalized_samples: samples,

198 scale: 1.0

199 };

200 }

201 // Create a copy to make sure that we don't mutate original \|samples\| input.

202 samples = samples.slice().sort(

203 function(a, b) {

204 return a - b;

205 }

206 );

207 var low = Math.min.apply(null, samples);

208 var high = Math.max.apply(null, samples);

209 var newLow = 0.5 / samples.length;

210 var newHigh = (samples.length - 0.5) / samples.length;

211 if (high - low === 0.0) {

212 // Samples is an array of 0.5 in this case.

213 samples = Array.apply(null, new Array(samples.length)).map(

214 function() { return 0.5;});

215 return {

216 normalized_samples: samples,

217 scale: 1.0

218 };

219 }

220 var scale = (newHigh - newLow) / (high - low);

221 for (var i = 0; i < samples.length; i++) {

222 samples[i] = (samples[i] - low) * scale + newLow;

223 }

224 return {

225 normalized_samples: samples,

226 scale: scale

227 };

228 };

229

230 /**

231 * Computes the discrepancy of a set of 1D samples from the interval [0,1].

232 *

233 * The samples must be sorted. We define the discrepancy of an empty set

234 * of samples to be zero.

235 *

236 * http://en.wikipedia.org/wiki/Low-discrepancy_sequence

237 * http://mathworld.wolfram.com/Discrepancy.html

238 */

239 Statistics.discrepancy = function(samples, opt_locationCount) {

240 if (samples.length === 0)

241 return 0.0;

242

243 var maxLocalDiscrepancy = 0;

244 var invSampleCount = 1.0 / samples.length;

245 var locations = [];

246 // For each location, stores the number of samples less than that location.

247 var countLess = [];

248 // For each location, stores the number of samples less than or equal to

249 // that location.

250 var countLessEqual = [];

251

252 if (opt_locationCount !== undefined) {

253 // Generate list of equally spaced locations.

254 var sampleIndex = 0;

255 for (var i = 0; i < opt_locationCount; i++) {

256 var location = i / (opt_locationCount - 1);

257 locations.push(location);

258 while (sampleIndex < samples.length &&

259 samples[sampleIndex] < location) {

260 sampleIndex += 1;

261 }

262 countLess.push(sampleIndex);

263 while (sampleIndex < samples.length &&

264 samples[sampleIndex] <= location) {

265 sampleIndex += 1;

266 }

267 countLessEqual.push(sampleIndex);

268 }

269 } else {

270 // Populate locations with sample positions. Append 0 and 1 if necessary.

271 if (samples[0] > 0.0) {

272 locations.push(0.0);

273 countLess.push(0);

274 countLessEqual.push(0);

275 }

276 for (var i = 0; i < samples.length; i++) {

277 locations.push(samples[i]);

278 countLess.push(i);

279 countLessEqual.push(i + 1);

280 }

281 if (samples[-1] < 1.0) {

282 locations.push(1.0);

283 countLess.push(samples.length);

284 countLessEqual.push(samples.length);

285 }

286 }

287

288 // Compute discrepancy as max(overshoot, -undershoot), where

289 // overshoot = max(countClosed(i, j)/N - length(i, j)) for all i < j,

290 // undershoot = min(countOpen(i, j)/N - length(i, j)) for all i < j,

291 // N = len(samples),

292 // countClosed(i, j) is the number of points between i and j

293 // including ends,

294 // countOpen(i, j) is the number of points between i and j excluding ends,

295 // length(i, j) is locations[i] - locations[j].

296

297 // The following algorithm is modification of Kadane's algorithm,

298 // see https://en.wikipedia.org/wiki/Maximum_subarray_problem.

299

300 // The maximum of (countClosed(k, i-1)/N - length(k, i-1)) for any k < i-1.

301 var maxDiff = 0;

302 // The minimum of (countOpen(k, i-1)/N - length(k, i-1)) for any k < i-1.

303 var minDiff = 0;

304 for (var i = 1; i < locations.length; i++) {

305 var length = locations[i] - locations[i - 1];

306 var countClosed = countLessEqual[i] - countLess[i - 1];

307 var countOpen = countLess[i] - countLessEqual[i - 1];

308 // Number of points that are added if we extend a closed range that

309 // ends at location (i-1).

310 var countClosedIncrement =

311 countLessEqual[i] - countLessEqual[i - 1];

312 // Number of points that are added if we extend an open range that

313 // ends at location (i-1).

314 var countOpenIncrement = countLess[i] - countLess[i - 1];

315

316 // Either extend the previous optimal range or start a new one.

317 maxDiff = Math.max(

318 countClosedIncrement * invSampleCount - length + maxDiff,

319 countClosed * invSampleCount - length);

320 minDiff = Math.min(

321 countOpenIncrement * invSampleCount - length + minDiff,

322 countOpen * invSampleCount - length);

323

324 maxLocalDiscrepancy = Math.max(

325 maxDiff, -minDiff, maxLocalDiscrepancy);

326 }

327 return maxLocalDiscrepancy;

328 };

329

330 /**

331 * A discrepancy based metric for measuring timestamp jank.

332 *

333 * timestampsDiscrepancy quantifies the largest area of jank observed in a

334 * series of timestamps. Note that this is different from metrics based on

335 * the max_time_interval. For example, the time stamp series A = [0,1,2,3,5,6]

336 * and B = [0,1,2,3,5,7] have the same max_time_interval = 2, but

337 * Discrepancy(B) > Discrepancy(A).

338 *

339 * Two variants of discrepancy can be computed:

340 *

341 * Relative discrepancy is following the original definition of

342 * discrepancy. It characterized the largest area of jank, relative to the

343 * duration of the entire time stamp series. We normalize the raw results,

344 * because the best case discrepancy for a set of N samples is 1/N (for

345 * equally spaced samples), and we want our metric to report 0.0 in that

346 * case.

347 *

348 * Absolute discrepancy also characterizes the largest area of jank, but its

349 * value wouldn't change (except for imprecisions due to a low

350 * \|interval_multiplier\|) if additional 'good' intervals were added to an

351 * exisiting list of time stamps. Its range is [0,inf] and the unit is

352 * milliseconds.

353 *

354 * The time stamp series C = [0,2,3,4] and D = [0,2,3,4,5] have the same

355 * absolute discrepancy, but D has lower relative discrepancy than C.

356 *

357 * \|timestamps\| may be a list of lists S = [S_1, S_2, ..., S_N], where each

358 * S_i is a time stamp series. In that case, the discrepancy D(S) is:

359 * D(S) = max(D(S_1), D(S_2), ..., D(S_N))

360 **/

361 Statistics.timestampsDiscrepancy = function(timestamps, opt_absolute,

362 opt_locationCount) {

363 if (timestamps.length === 0)

364 return 0.0;

365

366 if (opt_absolute === undefined)

367 opt_absolute = true;

368

369 if (Array.isArray(timestamps[0])) {

370 var rangeDiscrepancies = timestamps.map(function(r) {

371 return Statistics.timestampsDiscrepancy(r);

372 });

373 return Math.max.apply(null, rangeDiscrepancies);

374 }

375

376 var s = Statistics.normalizeSamples(timestamps);

377 var samples = s.normalized_samples;

378 var sampleScale = s.scale;

379 var discrepancy = Statistics.discrepancy(samples, opt_locationCount);

380 var invSampleCount = 1.0 / samples.length;

381 if (opt_absolute === true) {

382 // Compute absolute discrepancy

383 discrepancy /= sampleScale;

384 } else {

385 // Compute relative discrepancy

386 discrepancy = tr.b.clamp(

387 (discrepancy - invSampleCount) / (1.0 - invSampleCount), 0.0, 1.0);

388 }

389 return discrepancy;

390 };

391

392 /**

393 * A discrepancy based metric for measuring duration jank.

394 *

395 * DurationsDiscrepancy computes a jank metric which measures how irregular a

396 * given sequence of intervals is. In order to minimize jank, each duration

397 * should be equally long. This is similar to how timestamp jank works,

398 * and we therefore reuse the timestamp discrepancy function above to compute

399 * a similar duration discrepancy number.

400 *

401 * Because timestamp discrepancy is defined in terms of timestamps, we first

402 * convert the list of durations to monotonically increasing timestamps.

403 *

404 * Args:

405 * durations: List of interval lengths in milliseconds.

406 * absolute: See TimestampsDiscrepancy.

407 * opt_locationCount: See TimestampsDiscrepancy.

408 **/

409 Statistics.durationsDiscrepancy = function(

410 durations, opt_absolute, opt_locationCount) {

411 if (durations.length === 0)

412 return 0.0;

413

414 var timestamps = durations.reduce(function(prev, curr, index, array) {

415 prev.push(prev[prev.length - 1] + curr);

416 return prev;

417 }, [0]);

418 return Statistics.timestampsDiscrepancy(

419 timestamps, opt_absolute, opt_locationCount);

420 };

421

422 /**

423 * Modifies \|samples\| in-place to reduce its length down to \|count\|.

424 *

425 * @param {!Array} samples

426 * @param {number} count

427 * @return {!Array}

428 */

429 Statistics.uniformlySampleArray = function(samples, count) {

430 if (samples.length <= count) {

431 return samples;

432 }

433 while (samples.length > count) {

434 var i = parseInt(Math.random() * samples.length);

435 samples.splice(i, 1);

436 }

437 return samples;

438 };

439

440 /**

441 * A mechanism to uniformly sample elements from an arbitrary long stream.

442 *

443 * Call this method every time a new element is obtained from the stream,

444 * passing always the same \|samples\| array and the \|numSamples\| you desire.

445 * Also pass in the current \|streamLength\|, which is the same as the index of

446 * \|newElement\| within that stream.

447 *

448 * The \|samples\| array will possibly be updated, replacing one of its element

449 * with \|newElements\|. The length of \|samples\| will not be more than

450 * \|numSamples\|.

451 *

452 * This method guarantees that after \|streamLength\| elements have been

453 * processed each one has equal probability of being in \|samples\|. The order

454 * of samples is not preserved though.

455 *

456 * Args:

457 * samples: Array of elements that have already been selected. Start with [].

458 * streamLength: The current length of the stream, up to \|newElement\|.

459 * newElement: The element that was just extracted from the stream.

460 * numSamples: The total number of samples desired.

461 **/

462 Statistics.uniformlySampleStream = function(samples, streamLength, newElement,

463 numSamples) {

464 if (streamLength <= numSamples) {

465 if (samples.length >= streamLength)

466 samples[streamLength - 1] = newElement;

467 else

468 samples.push(newElement);

469 return;

470 }

471

472 var probToKeep = numSamples / streamLength;

473 if (Math.random() > probToKeep)

474 return; // New sample was rejected.

475

476 // Keeping it, replace an alement randomly.

477 var index = Math.floor(Math.random() * numSamples);

478 samples[index] = newElement;

479 };

480

481 /**

482 * A mechanism to merge two arrays of uniformly sampled elements in a way that

483 * ensures elements in the final array are still sampled uniformly.

484 *

485 * This works similarly to sampleStreamUniform. The \|samplesA\| array will be

486 * updated, some of its elements replaced by elements from \|samplesB\| in a

487 * way that ensure that elements will be sampled uniformly.

488 *

489 * Args:

490 * samplesA: Array of uniformly sampled elements, will be updated.

491 * streamLengthA: The length of the stream from which \|samplesA\| was sampled.

492 * samplesB: Other array of uniformly sampled elements, will NOT be updated.

493 * streamLengthB: The length of the stream from which \|samplesB\| was sampled.

494 * numSamples: The total number of samples desired, both in \|samplesA\| and

495 * \|samplesB\|.

496 **/

497 Statistics.mergeSampledStreams = function(

498 samplesA, streamLengthA,

499 samplesB, streamLengthB, numSamples) {

500 if (streamLengthB < numSamples) {

501 // samplesB has not reached max capacity so every sample of stream B were

502 // chosen with certainty. Add them one by one into samplesA.

503 var nbElements = Math.min(streamLengthB, samplesB.length);

504 for (var i = 0; i < nbElements; ++i) {

505 Statistics.uniformlySampleStream(samplesA, streamLengthA + i + 1,

506 samplesB[i], numSamples);

507 }

508 return;

509 }

510 if (streamLengthA < numSamples) {

511 // samplesA has not reached max capacity so every sample of stream A were

512 // chosen with certainty. Add them one by one into samplesB.

513 var nbElements = Math.min(streamLengthA, samplesA.length);

514 var tempSamples = samplesB.slice();

515 for (var i = 0; i < nbElements; ++i) {

516 Statistics.uniformlySampleStream(tempSamples, streamLengthB + i + 1,

517 samplesA[i], numSamples);

518 }

519 // Copy that back into the first vector.

520 for (var i = 0; i < tempSamples.length; ++i) {

521 samplesA[i] = tempSamples[i];

522 }

523 return;

524 }

525

526 // Both sample arrays are at max capacity, use the power of maths!

527 // Elements in samplesA have been selected with probability

528 // numSamples / streamLengthA. Same for samplesB. For each index of the

529 // array we keep samplesA[i] with probability

530 // P = streamLengthA / (streamLengthA + streamLengthB)

531 // and replace it with samplesB[i] with probability 1-P.

532 // The total probability of keeping it is therefore

533 // numSamples / streamLengthA *

534 // streamLengthA / (streamLengthA + streamLengthB)

535 // = numSamples / (streamLengthA + streamLengthB)

536 // A similar computation shows we have the same probability of keeping any

537 // element in samplesB. Magic!

538 var nbElements = Math.min(numSamples, samplesB.length);

539 var probOfSwapping = streamLengthB / (streamLengthA + streamLengthB);

540 for (var i = 0; i < nbElements; ++i) {

541 if (Math.random() < probOfSwapping) {

542 samplesA[i] = samplesB[i];

543 }

544 }

545 };

546

547 /* Continuous distributions are defined by probability density functions.

548 *

549 * Random variables are referred to by capital letters: X, Y, Z.

550 * Particular values from these distributions are referred to by lowercase

551 * letters like \|x\|.

552 * The probability that \|X\| ever exactly equals \|x\| is P(X==x) = 0.

553 *

554 * For a discrete probability distribution, see tr.v.Histogram.

555 */

556 function Distribution() {

557 }

558

559 Distribution.prototype = {

560 /* The probability density of the random variable at value \|x\| is the

561 * relative likelihood for this random variable to take on the given value

562 * \|x\|.

563 *

564 * @param {number} x A value from the random distribution.

565 * @return {number} probability density at x.

566 */

567 computeDensity: function(x) {

568 throw Error('Not implemented');

569 },

570

571 /* A percentile is the probability that a sample from the distribution is

572 * less than the given value \|x\|. This function is monotonically increasing.

573 *

574 * @param {number} x A value from the random distribution.

575 * @return {number} P(X<x).

576 */

577 computePercentile: function(x) {

578 throw Error('Not implemented');

579 },

580

581 /* A complementary percentile is the probability that a sample from the

582 * distribution is greater than the given value \|x\|. This function is

583 * monotonically decreasing.

584 *

585 * @param {number} x A value from the random distribution.

586 * @return {number} P(X>x).

587 */

588 computeComplementaryPercentile: function(x) {

589 return 1 - this.computePercentile(x);

590 },

591

592 /* Compute the mean of the probability distribution.

593 *

594 * @return {number} mean.

595 */

596 get mean() {

597 throw Error('Not implemented');

598 },

599

600 /* The mode of a distribution is the most likely value.

601 * The maximum of the computeDensity() function is at this mode.

602 * @return {number} mode.

603 */

604 get mode() {

605 throw Error('Not implemented');

606 },

607

608 /* The median is the center value of the distribution.

609 * computePercentile(median) = computeComplementaryPercentile(median) = 0.5

610 *

611 * @return {number} median.

612 */

613 get median() {

614 throw Error('Not implemented');

615 },

616

617 /* The standard deviation is a measure of how dispersed or spread out the

618 * distribution is (this statistic has the same units as the values).

619 *

620 * @return {number} standard deviation.

621 */

622 get standardDeviation() {

623 throw Error('Not implemented');

624 },

625

626 /* An alternative measure of how spread out the distribution is,

627 * the variance is the square of the standard deviation.

628 * @return {number} variance.

629 */

630 get variance() {

631 throw Error('Not implemented');

632 }

633 };

634

635 Statistics.UniformDistribution = function(opt_range) {

636 if (!opt_range)

637 opt_range = tr.b.Range.fromExplicitRange(0, 1);

638 this.range = opt_range;

639 };

640

641 Statistics.UniformDistribution.prototype = {

642 __proto__: Distribution.prototype,

643

644 computeDensity: function(x) {

645 return 1 / this.range.range;

646 },

647

648 computePercentile: function(x) {

649 return tr.b.normalize(x, this.range.min, this.range.max);

650 },

651

652 get mean() {

653 return this.range.center;

654 },

655

656 get mode() {

657 return undefined;

658 },

659

660 get median() {

661 return this.mean;

662 },

663

664 get standardDeviation() {

665 return Math.sqrt(this.variance);

666 },

667

668 get variance() {

669 return Math.pow(this.range.range, 2) / 12;

670 }

671 };

672

673 /* The Normal or Gaussian distribution, or bell curve, is common in complex

674 * processes such as are found in many of the natural sciences. If Z is the

675 * standard normal distribution with mean = 0 and variance = 1, then the

676 * general normal distribution is Y = mean + Z*sqrt(variance).

677 * https://www.desmos.com/calculator/tqtbjm4s3z

678 */

679 Statistics.NormalDistribution = function(opt_mean, opt_variance) {

680 this.mean_ = opt_mean \|\| 0;

681 this.variance_ = opt_variance \|\| 1;

682 this.standardDeviation_ = Math.sqrt(this.variance_);

683 };

684

685 Statistics.NormalDistribution.prototype = {

686 __proto__: Distribution.prototype,

687

688 computeDensity: function(x) {

689 var scale = (1.0 / (this.standardDeviation * Math.sqrt(2.0 * Math.PI)));

690 var exponent = -Math.pow(x - this.mean, 2) / (2.0 * this.variance);

691 return scale * Math.exp(exponent);

692 },

693

694 computePercentile: function(x) {

695 var standardizedX = ((x - this.mean) /

696 Math.sqrt(2.0 * this.variance));

697 return (1.0 + tr.b.erf(standardizedX)) / 2.0;

698 },

699

700 get mean() {

701 return this.mean_;

702 },

703

704 get median() {

705 return this.mean;

706 },

707

708 get mode() {

709 return this.mean;

710 },

711

712 get standardDeviation() {

713 return this.standardDeviation_;

714 },

715

716 get variance() {

717 return this.variance_;

718 }

719 };

720

721 /* The log-normal distribution is a continuous probability distribution of a

722 * random variable whose logarithm is normally distributed.

723 * If Y is the general normal distribution, then X = exp(Y) is the general

724 * log-normal distribution.

725 * X will have different parameters from Y,

726 * so the mean of Y is called the "location" of X,

727 * and the standard deviation of Y is called the "shape" of X.

728 * The standard lognormal distribution exp(Z) has location = 0 and shape = 1.

729 * https://www.desmos.com/calculator/tqtbjm4s3z

730 */

731 Statistics.LogNormalDistribution = function(opt_location, opt_shape) {

732 this.normalDistribution_ = new Statistics.NormalDistribution(

733 opt_location, Math.pow(opt_shape \|\| 1, 2));

734 };

735

736 Statistics.LogNormalDistribution.prototype = {

737 __proto__: Statistics.NormalDistribution.prototype,

738

739 computeDensity: function(x) {

740 return this.normalDistribution_.computeDensity(Math.log(x)) / x;

741 },

742

743 computePercentile: function(x) {

744 return this.normalDistribution_.computePercentile(Math.log(x));

745 },

746

747 get mean() {

748 return Math.exp(this.normalDistribution_.mean +

749 (this.normalDistribution_.variance / 2));

750 },

751

752 get variance() {

753 var nm = this.normalDistribution_.mean;

754 var nv = this.normalDistribution_.variance;

755 return (Math.exp(2 * (nm + nv)) -

756 Math.exp(2 * nm + nv));

757 },

758

759 get standardDeviation() {

760 return Math.sqrt(this.variance);

761 },

762

763 get median() {

764 return Math.exp(this.normalDistribution_.mean);

765 },

766

767 get mode() {

768 return Math.exp(this.normalDistribution_.mean -

769 this.normalDistribution_.variance);

770 }

771 };

772

773 /**

774 * Instead of describing a LogNormalDistribution in terms of its "location"

775 * and "shape", it can also be described in terms of its median

776 * and the point at which its complementary cumulative distribution

777 * function bends between the linear-ish region in the middle and the

778 * exponential-ish region. When the distribution is used to compute

779 * percentiles for log-normal random processes such as latency, as the latency

780 * improves, it hits a point of diminishing returns, when it becomes

781 * relatively difficult to improve the score further. This point of

782 * diminishing returns is the first x-intercept of the third derivative of the

783 * CDF, which is the second derivative of the PDF.

784 *

785 * https://www.desmos.com/calculator/cg5rnftabn

786 *

787 * @param {number} median The median of the distribution.

788 * @param {number} diminishingReturns The point of diminishing returns.

789 * @return {LogNormalDistribution}

790 */

791 Statistics.LogNormalDistribution.fromMedianAndDiminishingReturns =

792 function(median, diminishingReturns) {

793 diminishingReturns = Math.log(diminishingReturns / median);

794 var shape = Math.sqrt(1 - 3 * diminishingReturns -

795 Math.sqrt(Math.pow(diminishingReturns - 3, 2) - 8)) / 2;

796 var location = Math.log(median);

797 return new Statistics.LogNormalDistribution(location, shape);

798 };

799

800 // p-values less than this indicate statistical significance.

801 Statistics.DEFAULT_ALPHA = 0.01;

802

803 // If a statistical significant difference has not been established with

804 // this many observations per sample, we'll assume none exists.

805 Statistics.MAX_SUGGESTED_SAMPLE_SIZE = 20;

806

807 /** @enum */

808 Statistics.Significance = {

809 SIGNIFICANT: 'REJECT',

810 INSIGNIFICANT: 'FAIL_TO_REJECT',

811 NEED_MORE_DATA: 'NEED_MORE_DATA',

812 DONT_CARE: 'DONT_CARE',

813 };

814

815

816 /**

817 * @typedef {Object} HypothesisTestResult

818 * @property {number} p

819 * @property {number} U

820 * @property {!tr.b.Statistics.Significance} significance

821 */

822

823 /**

824 * @param {!Array.<number>} a

825 * @param {!Array.<number>} b

826 * @param {number=} opt_alpha

827 * @param {number=} opt_reqSampleSize

828 * @return {!HypothesisTestResult}

829 */

830 Statistics.mwu = function(a, b, opt_alpha, opt_reqSampleSize) {

831 var result = mannwhitneyu.test(a, b);

832 var alpha = opt_alpha \|\| Statistics.DEFAULT_ALPHA;

833 if (result.p < alpha) {

834 result.significance = Statistics.Significance.SIGNIFICANT;

835 } else if (opt_reqSampleSize && (a.length < opt_reqSampleSize \|\|

836 b.length < opt_reqSampleSize)) {

837 result.significance = Statistics.Significance.NEED_MORE_DATA;

838 } else {

839 result.significance = Statistics.Significance.INSIGNIFICANT;

840 }

841 return result;

842 };

843

844 return {

845 Statistics,

846 };

847 });

848 </script>

OLD	NEW

« no previous file with comments | « tracing/tracing/base/sorted_array_utils_test.html ('k') | tracing/tracing/base/statistics_test.html » ('j') | no next file with comments »