Chromium Code Reviews| Index: scripts/slave/recipe_modules/auto_bisect/resources/significantly_different.py |
| diff --git a/scripts/slave/recipe_modules/auto_bisect/resources/significantly_different.py b/scripts/slave/recipe_modules/auto_bisect/resources/significantly_different.py |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..c1c9580b099f89a1066cd9a0e698431cbf8b3dcc |
| --- /dev/null |
| +++ b/scripts/slave/recipe_modules/auto_bisect/resources/significantly_different.py |
| @@ -0,0 +1,62 @@ |
| +"""Launches an anaconda environment to run some scipy hypothesis tests.""" |
| + |
| +import json |
| +import os |
| +import subprocess |
| +import sys |
| + |
| +CONDA_SCRIPT = """ |
| +import json |
| +import sys |
| + |
| +from scipy import stats |
| + |
| +_, list_a, list_b = sys.argv |
| +list_a = json.loads(list_a) |
| +list_b = json.loads(list_b) |
| + |
| +shapiro_p_value = stats.shapiro(list_a)[1], stats.shapiro(list_b)[1] |
| +mann_whitney_p_value = stats.mannwhitneyu(list_a, list_b).pvalue |
| +anderson_p_value = stats.anderson_ksamp([list_a, list_b]).significance_level |
| +welch_p_value = stats.ttest_ind(list_a, list_b, equal_var=False)[1] |
| + |
| +results = { |
| + 'first_sample': list_a, |
| + 'second_sample': list_b, |
| + 'shapiro_p_value': shapiro_p_value, |
| + 'mann_p_value': mann_whitney_p_value, |
| + 'anderson_p_value': mann_whitney_p_value, |
| + 'welch_p_value': welch_p_value, |
| +} |
| + |
| +print json.dumps(results) |
| +sys.exit(0) |
| +""" |
| + |
| +def main(argv, anaconda_path=None): |
| + if not anaconda_path: |
| + if os.name == 'nt': |
| + anaconda_path = r'c:\conda-py-scientific\bin\python' |
| + else: |
| + anaconda_path = '/opt/conda-py-scientific/bin/python' |
| + |
| + _, list_a, list_b, significance = argv |
| + |
| + p = subprocess.Popen([anaconda_path, '-', list_a, list_b], |
| + stdin=subprocess.PIPE, stdout=subprocess.PIPE) |
|
qyearsley
2016/01/26 19:26:00
Why is it necessary to do this through anaconda? O
RobertoCN
2016/02/01 17:29:51
Added the explanation to the docstring of bisector
|
| + output, _ = p.communicate(input=CONDA_SCRIPT) |
| + results = json.loads(output.decode()) |
| + if (results['shapiro_p_value'][0] < significance and |
| + results['shapiro_p_value'][1] < significance): |
| + results['normal-y'] = True |
| + else: |
| + results['normal-y'] = False |
| + results['significantly_different'] = bool( |
| + float(results['mann_p_value']) < float(significance)) |
| + |
| + print json.dumps(results, indent=4) |
| + |
| +if __name__ == '__main__': |
| + sys.exit(main(sys.argv)) |
| + |
| + |