| OLD | NEW |
| 1 """This file is meant to be run in an environment where scipy is available.""" | 1 """This file is meant to be run in an environment where scipy is available.""" |
| 2 import json | 2 import json |
| 3 import logging | 3 import logging |
| 4 import sys | 4 import sys |
| 5 | 5 |
| 6 try: | 6 try: |
| 7 from scipy import stats | 7 from scipy import stats |
| 8 except ImportError: | 8 except ImportError: |
| 9 def main(): | 9 def main(): |
| 10 # scipy required, see module docstring. | 10 # scipy required, see module docstring. |
| 11 logging.warning(sys.modules[__name__].__doc__) | 11 logging.warning(sys.modules[__name__].__doc__) |
| 12 return 1 | 12 return 1 |
| 13 else: | 13 else: |
| 14 | 14 |
| 15 def main(): | 15 def main(): |
| 16 if len(sys.argv) < 4: | 16 if len(sys.argv) < 3: |
| 17 return 1 | 17 return 1 |
| 18 _, list_a, list_b, significance = sys.argv[:4] | 18 _, list_a, list_b = sys.argv[:3] |
| 19 list_a = json.loads(list_a) | 19 list_a = json.loads(list_a) |
| 20 list_b = json.loads(list_b) | 20 list_b = json.loads(list_b) |
| 21 significance = float(significance) | |
| 22 | 21 |
| 23 shapiro_p_value = stats.shapiro(list_a)[1], stats.shapiro(list_b)[1] | 22 shapiro_p_value = stats.shapiro(list_a)[1], stats.shapiro(list_b)[1] |
| 24 mann_whitney_p_value = stats.mannwhitneyu(list_a, list_b).pvalue | 23 mann_whitney_p_value = stats.mannwhitneyu(list_a, list_b).pvalue |
| 25 anderson_p_value = stats.anderson_ksamp([list_a, list_b]).significance_level | 24 anderson_p_value = stats.anderson_ksamp([list_a, list_b]).significance_level |
| 26 welch_p_value = stats.ttest_ind(list_a, list_b, equal_var=False)[1] | 25 welch_p_value = stats.ttest_ind(list_a, list_b, equal_var=False)[1] |
| 27 | 26 |
| 28 results = { | 27 results = { |
| 29 'first_sample': list_a, | 28 'first_sample': list_a, |
| 30 'second_sample': list_b, | 29 'second_sample': list_b, |
| 31 'shapiro_p_value': shapiro_p_value, | 30 'shapiro_p_value': shapiro_p_value, |
| 32 'mann_p_value': mann_whitney_p_value, | 31 'mann_p_value': mann_whitney_p_value, |
| 33 'anderson_p_value': anderson_p_value, | 32 'anderson_p_value': anderson_p_value, |
| 34 'welch_p_value': welch_p_value, | 33 'welch_p_value': welch_p_value, |
| 35 } | 34 } |
| 36 | 35 |
| 37 # TODO(robertocn): It seems we haven't used the results of shapiro test for | |
| 38 # normality. We should remove this along with anderson darling and welch's. | |
| 39 if (results['shapiro_p_value'][0] < significance and | |
| 40 results['shapiro_p_value'][1] < significance): | |
| 41 results['normal-y'] = True | |
| 42 else: | |
| 43 results['normal-y'] = False | |
| 44 results['significantly_different'] = bool( | |
| 45 float(results['mann_p_value']) < float(significance)) | |
| 46 | |
| 47 print json.dumps(results) | 36 print json.dumps(results) |
| 48 return 0 | 37 return 0 |
| 49 | 38 |
| 50 if __name__ == '__main__': | 39 if __name__ == '__main__': |
| 51 sys.exit(main()) | 40 sys.exit(main()) |
| OLD | NEW |