Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(90)

Unified Diff: scripts/slave/recipe_modules/auto_bisect/resources/significantly_different.py

Issue 1610203003: Iteratively increase sample size for good/bad classification. (Closed) Base URL: https://chromium.googlesource.com/chromium/tools/build.git@master
Patch Set: Fixing multiple problems Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: scripts/slave/recipe_modules/auto_bisect/resources/significantly_different.py
diff --git a/scripts/slave/recipe_modules/auto_bisect/resources/significantly_different.py b/scripts/slave/recipe_modules/auto_bisect/resources/significantly_different.py
new file mode 100644
index 0000000000000000000000000000000000000000..c1c9580b099f89a1066cd9a0e698431cbf8b3dcc
--- /dev/null
+++ b/scripts/slave/recipe_modules/auto_bisect/resources/significantly_different.py
@@ -0,0 +1,62 @@
+"""Launches an anaconda environment to run some scipy hypothesis tests."""
+
+import json
+import os
+import subprocess
+import sys
+
+CONDA_SCRIPT = """
+import json
+import sys
+
+from scipy import stats
+
+_, list_a, list_b = sys.argv
+list_a = json.loads(list_a)
+list_b = json.loads(list_b)
+
+shapiro_p_value = stats.shapiro(list_a)[1], stats.shapiro(list_b)[1]
+mann_whitney_p_value = stats.mannwhitneyu(list_a, list_b).pvalue
+anderson_p_value = stats.anderson_ksamp([list_a, list_b]).significance_level
+welch_p_value = stats.ttest_ind(list_a, list_b, equal_var=False)[1]
+
+results = {
+ 'first_sample': list_a,
+ 'second_sample': list_b,
+ 'shapiro_p_value': shapiro_p_value,
+ 'mann_p_value': mann_whitney_p_value,
+ 'anderson_p_value': mann_whitney_p_value,
+ 'welch_p_value': welch_p_value,
+}
+
+print json.dumps(results)
+sys.exit(0)
+"""
+
+def main(argv, anaconda_path=None):
+ if not anaconda_path:
+ if os.name == 'nt':
+ anaconda_path = r'c:\conda-py-scientific\bin\python'
+ else:
+ anaconda_path = '/opt/conda-py-scientific/bin/python'
+
+ _, list_a, list_b, significance = argv
+
+ p = subprocess.Popen([anaconda_path, '-', list_a, list_b],
+ stdin=subprocess.PIPE, stdout=subprocess.PIPE)
qyearsley 2016/01/26 19:26:00 Why is it necessary to do this through anaconda? O
RobertoCN 2016/02/01 17:29:51 Added the explanation to the docstring of bisector
+ output, _ = p.communicate(input=CONDA_SCRIPT)
+ results = json.loads(output.decode())
+ if (results['shapiro_p_value'][0] < significance and
+ results['shapiro_p_value'][1] < significance):
+ results['normal-y'] = True
+ else:
+ results['normal-y'] = False
+ results['significantly_different'] = bool(
+ float(results['mann_p_value']) < float(significance))
+
+ print json.dumps(results, indent=4)
+
+if __name__ == '__main__':
+ sys.exit(main(sys.argv))
+
+

Powered by Google App Engine
This is Rietveld 408576698