scripts/slave/recipe_modules/auto_bisect/resources/significantly_different.py - Issue 1610203003: Iteratively increase sample size for good/bad classification.

Unified Diff: scripts/slave/recipe_modules/auto_bisect/resources/significantly_different.py

Issue 1610203003: Iteratively increase sample size for good/bad classification. (Closed) Base URL: https://chromium.googlesource.com/chromium/tools/build.git@master

Patch Set: Fixing multiple problems Created 4 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« scripts/slave/recipe_modules/auto_bisect/perf_revision_state.py ('K') | « scripts/slave/recipe_modules/auto_bisect/perf_revision_state.py ('k') | scripts/slave/recipe_modules/auto_bisect/resources/significantly_different_test.py » ('j') | scripts/slave/recipe_modules/auto_bisect/resources/significantly_different_test.py » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: scripts/slave/recipe_modules/auto_bisect/resources/significantly_different.py

diff --git a/scripts/slave/recipe_modules/auto_bisect/resources/significantly_different.py b/scripts/slave/recipe_modules/auto_bisect/resources/significantly_different.py

new file mode 100644

index 0000000000000000000000000000000000000000..c1c9580b099f89a1066cd9a0e698431cbf8b3dcc

--- /dev/null

+++ b/scripts/slave/recipe_modules/auto_bisect/resources/significantly_different.py

@@ -0,0 +1,62 @@

+"""Launches an anaconda environment to run some scipy hypothesis tests."""

+import json

+import os

+import subprocess

+import sys

+CONDA_SCRIPT = """

+import json

+import sys

+from scipy import stats

+_, list_a, list_b = sys.argv

+list_a = json.loads(list_a)

+list_b = json.loads(list_b)

+shapiro_p_value = stats.shapiro(list_a)[1], stats.shapiro(list_b)[1]

+mann_whitney_p_value = stats.mannwhitneyu(list_a, list_b).pvalue

+anderson_p_value = stats.anderson_ksamp([list_a, list_b]).significance_level

+welch_p_value = stats.ttest_ind(list_a, list_b, equal_var=False)[1]

+results = {

+ 'first_sample': list_a,

+ 'second_sample': list_b,

+ 'shapiro_p_value': shapiro_p_value,

+ 'mann_p_value': mann_whitney_p_value,

+ 'anderson_p_value': mann_whitney_p_value,

+ 'welch_p_value': welch_p_value,

+print json.dumps(results)

+sys.exit(0)

+"""

+def main(argv, anaconda_path=None):

+ if not anaconda_path:

+ if os.name == 'nt':

+ anaconda_path = r'c:\conda-py-scientific\bin\python'

+ else:

+ anaconda_path = '/opt/conda-py-scientific/bin/python'

+ _, list_a, list_b, significance = argv

+ p = subprocess.Popen([anaconda_path, '-', list_a, list_b],

+ stdin=subprocess.PIPE, stdout=subprocess.PIPE)

qyearsley 2016/01/26 19:26:00 Why is it necessary to do this through anaconda? O

RobertoCN 2016/02/01 17:29:51 Added the explanation to the docstring of bisector

+ output, _ = p.communicate(input=CONDA_SCRIPT)

+ results = json.loads(output.decode())

+ if (results['shapiro_p_value'][0] < significance and

+ results['shapiro_p_value'][1] < significance):

+ results['normal-y'] = True

+ else:

+ results['normal-y'] = False

+ results['significantly_different'] = bool(

+ float(results['mann_p_value']) < float(significance))

+ print json.dumps(results, indent=4)

+if __name__ == '__main__':

+ sys.exit(main(sys.argv))