dashboard/dashboard/pinpoint/mann_whitney_u.py - Issue 3019503002: [pinpoint] Move mann_whitney_u.py to models/

Unified Diff: dashboard/dashboard/pinpoint/mann_whitney_u.py

Issue 3019503002: [pinpoint] Move mann_whitney_u.py to models/ (Closed)

Patch Set: Created 3 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: dashboard/dashboard/pinpoint/mann_whitney_u.py

diff --git a/dashboard/dashboard/pinpoint/mann_whitney_u.py b/dashboard/dashboard/pinpoint/mann_whitney_u.py

deleted file mode 100644

index 770f5aadd4370e91552fb9d2e2dce2be5e11566f..0000000000000000000000000000000000000000

--- a/dashboard/dashboard/pinpoint/mann_whitney_u.py

+++ /dev/null

@@ -1,90 +0,0 @@

-# Use of this source code is governed by a BSD-style license that can be

-# found in the LICENSE file.

-"""Pure Python implementation of the Mann-Whitney U test.

-This code is adapted from SciPy:

- https://github.com/scipy/scipy/blob/master/scipy/stats/stats.py

-Which is provided under a BSD-style license.

-There is also a JavaScript version in Catapult:

- https://github.com/catapult-project/catapult/blob/master/tracing/third_party/mannwhitneyu/mannwhitneyu.js

-"""

-import itertools

-import math

-def MannWhitneyU(x, y):

- """Computes the Mann-Whitney rank test on samples x and y.

- The distribution of U is approximately normal for large samples. This

- implementation uses the normal approximation, so it's recommended to have

- sample sizes > 20.

- """

- n1 = len(x)

- n2 = len(y)

- ranked = _RankData(x + y)

- rankx = ranked[0:n1] # get the x-ranks

- u1 = n1*n2 + n1*(n1+1)/2.0 - sum(rankx) # calc U for x

- u2 = n1*n2 - u1 # remainder is U for y

- t = _TieCorrectionFactor(ranked)

- if t == 0:

- raise ValueError('All numbers are identical in mannwhitneyu')

- sd = math.sqrt(t * n1 * n2 * (n1+n2+1) / 12.0)

- mean_rank = n1*n2/2.0 + 0.5

- big_u = max(u1, u2)

- z = (big_u - mean_rank) / sd

- return 2 * _NormSf(abs(z))

-def _RankData(a):

- """Assigns ranks to data. Ties are given the mean of the ranks of the items.

- This is called "fractional ranking":

- https://en.wikipedia.org/wiki/Ranking

- """

- sorter = _ArgSortReverse(a)

- ranked_min = [0] * len(sorter)

- for i, j in reversed(list(enumerate(sorter))):

- ranked_min[j] = i

- sorter = _ArgSort(a)

- ranked_max = [0] * len(sorter)

- for i, j in enumerate(sorter):

- ranked_max[j] = i

- return [1 + (x+y)/2.0 for x, y in zip(ranked_min, ranked_max)]

-def _ArgSort(a):

- """Returns the indices that would sort an array.

- Ties are given indices in ordinal order."""

- return sorted(range(len(a)), key=a.__getitem__)

-def _ArgSortReverse(a):

- """Returns the indices that would sort an array.

- Ties are given indices in reverse ordinal order."""

- return list(reversed(sorted(range(len(a)), key=a.__getitem__, reverse=True)))

-def _TieCorrectionFactor(rankvals):

- """Tie correction factor for ties in the Mann-Whitney U test."""

- arr = sorted(rankvals)

- cnt = [len(list(group)) for _, group in itertools.groupby(arr)]

- size = len(arr)

- if size < 2:

- return 1.0

- else:

- return 1.0 - sum(x**3 - x for x in cnt) / float(size**3 - size)

-def _NormSf(x):

- """Survival function of the standard normal distribution. (1 - cdf)"""

- return (1 - math.erf(x/math.sqrt(2))) / 2

« no previous file with comments | « no previous file | dashboard/dashboard/pinpoint/mann_whitney_u_test.py » ('j') | no next file with comments »