OLD | NEW |
1 """This file is meant to be run in an environment where scipy is available.""" | 1 """This file is meant to be run in an environment where scipy is available.""" |
2 import json | 2 import json |
3 import logging | 3 import logging |
4 import sys | 4 import sys |
5 | 5 |
6 try: | 6 try: |
7 from scipy import stats | 7 from scipy import stats |
8 except ImportError: | 8 except ImportError: |
9 def main(): | 9 def main(): |
10 # scipy required, see module docstring. | 10 # scipy required, see module docstring. |
11 logging.warning(sys.modules[__name__].__doc__) | 11 logging.warning(sys.modules[__name__].__doc__) |
12 return 1 | 12 return 1 |
13 else: | 13 else: |
14 | 14 |
15 def main(): | 15 def main(): |
16 if len(sys.argv) < 4: | 16 if len(sys.argv) < 3: |
17 return 1 | 17 return 1 |
18 _, list_a, list_b, significance = sys.argv[:4] | 18 _, list_a, list_b = sys.argv[:3] |
19 list_a = json.loads(list_a) | 19 list_a = json.loads(list_a) |
20 list_b = json.loads(list_b) | 20 list_b = json.loads(list_b) |
21 significance = float(significance) | |
22 | 21 |
23 shapiro_p_value = stats.shapiro(list_a)[1], stats.shapiro(list_b)[1] | 22 shapiro_p_value = stats.shapiro(list_a)[1], stats.shapiro(list_b)[1] |
24 mann_whitney_p_value = stats.mannwhitneyu(list_a, list_b).pvalue | 23 mann_whitney_p_value = stats.mannwhitneyu(list_a, list_b).pvalue |
25 anderson_p_value = stats.anderson_ksamp([list_a, list_b]).significance_level | 24 anderson_p_value = stats.anderson_ksamp([list_a, list_b]).significance_level |
26 welch_p_value = stats.ttest_ind(list_a, list_b, equal_var=False)[1] | 25 welch_p_value = stats.ttest_ind(list_a, list_b, equal_var=False)[1] |
27 | 26 |
28 results = { | 27 results = { |
29 'first_sample': list_a, | 28 'first_sample': list_a, |
30 'second_sample': list_b, | 29 'second_sample': list_b, |
31 'shapiro_p_value': shapiro_p_value, | 30 'shapiro_p_value': shapiro_p_value, |
32 'mann_p_value': mann_whitney_p_value, | 31 'mann_p_value': mann_whitney_p_value, |
33 'anderson_p_value': anderson_p_value, | 32 'anderson_p_value': anderson_p_value, |
34 'welch_p_value': welch_p_value, | 33 'welch_p_value': welch_p_value, |
35 } | 34 } |
36 | 35 |
37 # TODO(robertocn): It seems we haven't used the results of shapiro test for | |
38 # normality. We should remove this along with anderson darling and welch's. | |
39 if (results['shapiro_p_value'][0] < significance and | |
40 results['shapiro_p_value'][1] < significance): | |
41 results['normal-y'] = True | |
42 else: | |
43 results['normal-y'] = False | |
44 results['significantly_different'] = bool( | |
45 float(results['mann_p_value']) < float(significance)) | |
46 | |
47 print json.dumps(results) | 36 print json.dumps(results) |
48 return 0 | 37 return 0 |
49 | 38 |
50 if __name__ == '__main__': | 39 if __name__ == '__main__': |
51 sys.exit(main()) | 40 sys.exit(main()) |
OLD | NEW |