Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(394)

Unified Diff: dashboard/dashboard/pinpoint/mann_whitney_u.py

Issue 3019503002: [pinpoint] Move mann_whitney_u.py to models/ (Closed)
Patch Set: Created 3 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | dashboard/dashboard/pinpoint/mann_whitney_u_test.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: dashboard/dashboard/pinpoint/mann_whitney_u.py
diff --git a/dashboard/dashboard/pinpoint/mann_whitney_u.py b/dashboard/dashboard/pinpoint/mann_whitney_u.py
deleted file mode 100644
index 770f5aadd4370e91552fb9d2e2dce2be5e11566f..0000000000000000000000000000000000000000
--- a/dashboard/dashboard/pinpoint/mann_whitney_u.py
+++ /dev/null
@@ -1,90 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-"""Pure Python implementation of the Mann-Whitney U test.
-
-This code is adapted from SciPy:
- https://github.com/scipy/scipy/blob/master/scipy/stats/stats.py
-Which is provided under a BSD-style license.
-
-There is also a JavaScript version in Catapult:
- https://github.com/catapult-project/catapult/blob/master/tracing/third_party/mannwhitneyu/mannwhitneyu.js
-"""
-
-import itertools
-import math
-
-
-def MannWhitneyU(x, y):
- """Computes the Mann-Whitney rank test on samples x and y.
-
- The distribution of U is approximately normal for large samples. This
- implementation uses the normal approximation, so it's recommended to have
- sample sizes > 20.
- """
- n1 = len(x)
- n2 = len(y)
- ranked = _RankData(x + y)
- rankx = ranked[0:n1] # get the x-ranks
- u1 = n1*n2 + n1*(n1+1)/2.0 - sum(rankx) # calc U for x
- u2 = n1*n2 - u1 # remainder is U for y
- t = _TieCorrectionFactor(ranked)
- if t == 0:
- raise ValueError('All numbers are identical in mannwhitneyu')
- sd = math.sqrt(t * n1 * n2 * (n1+n2+1) / 12.0)
-
- mean_rank = n1*n2/2.0 + 0.5
- big_u = max(u1, u2)
-
- z = (big_u - mean_rank) / sd
- return 2 * _NormSf(abs(z))
-
-
-def _RankData(a):
- """Assigns ranks to data. Ties are given the mean of the ranks of the items.
-
- This is called "fractional ranking":
- https://en.wikipedia.org/wiki/Ranking
- """
- sorter = _ArgSortReverse(a)
- ranked_min = [0] * len(sorter)
- for i, j in reversed(list(enumerate(sorter))):
- ranked_min[j] = i
-
- sorter = _ArgSort(a)
- ranked_max = [0] * len(sorter)
- for i, j in enumerate(sorter):
- ranked_max[j] = i
-
- return [1 + (x+y)/2.0 for x, y in zip(ranked_min, ranked_max)]
-
-
-def _ArgSort(a):
- """Returns the indices that would sort an array.
-
- Ties are given indices in ordinal order."""
- return sorted(range(len(a)), key=a.__getitem__)
-
-
-def _ArgSortReverse(a):
- """Returns the indices that would sort an array.
-
- Ties are given indices in reverse ordinal order."""
- return list(reversed(sorted(range(len(a)), key=a.__getitem__, reverse=True)))
-
-
-def _TieCorrectionFactor(rankvals):
- """Tie correction factor for ties in the Mann-Whitney U test."""
- arr = sorted(rankvals)
- cnt = [len(list(group)) for _, group in itertools.groupby(arr)]
- size = len(arr)
- if size < 2:
- return 1.0
- else:
- return 1.0 - sum(x**3 - x for x in cnt) / float(size**3 - size)
-
-
-def _NormSf(x):
- """Survival function of the standard normal distribution. (1 - cdf)"""
- return (1 - math.erf(x/math.sqrt(2))) / 2
« no previous file with comments | « no previous file | dashboard/dashboard/pinpoint/mann_whitney_u_test.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698