Chromium Code Reviews| Index: appengine/findit/util_scripts/remote_queries/check_confidence_level_heuristic.py |
| diff --git a/appengine/findit/util_scripts/remote_queries/check_confidence_level_heuristic.py b/appengine/findit/util_scripts/remote_queries/check_confidence_level_heuristic.py |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..8eb74d04e2da3641c57ce3ab42e8170976fea8c7 |
| --- /dev/null |
| +++ b/appengine/findit/util_scripts/remote_queries/check_confidence_level_heuristic.py |
| @@ -0,0 +1,102 @@ |
| +# Copyright 2016 The Chromium Authors. All rights reserved. |
| +# Use of this source code is governed by a BSD-style license that can be |
| +# found in the LICENSE file. |
| + |
| +"""Checks confidence level for Findit heuristic results.""" |
| +from collections import Counter |
| +from collections import defaultdict |
| +import datetime |
| +import json |
| +import os |
| +import sys |
| + |
| +_REMOTE_API_DIR = os.path.join(os.path.dirname(__file__), os.path.pardir) |
| +sys.path.insert(1, _REMOTE_API_DIR) |
| + |
| +import remote_api |
| + |
| +from model import result_status |
| +from model.wf_analysis import WfAnalysis |
| + |
| + |
| +DAYS = 180 |
| + |
| +SCORE_ADD_DELETE_FILE = 5 |
| +SCORE_CHANGE_LINE = 4 |
| +SCORE_CHANGE_FILE = 2 |
| +SCORE_CHANGE_RELATED_LINE = 1 |
|
stgao
2016/06/17 18:38:59
related file?
chanli
2016/06/17 22:01:28
Done.
|
| + |
| +CORRECT_RESULT_STATUSES = [result_status.FOUND_CORRECT, |
|
stgao
2016/06/17 18:38:59
For failures in a row on the same builder, do we i
chanli
2016/06/17 22:01:28
Done.
|
| + result_status.FOUND_CORRECT_DUPLICATE] |
| +INCORRECT_RESULT_STATUSES = [result_status.FOUND_INCORRECT, |
| + result_status.FOUND_INCORRECT_DUPLICATE] |
| + |
| + |
| +def _GetHighestScoreInHints(hints): |
| + result = 0 |
| + for hint_score in hints.itervalues(): |
|
stgao
2016/06/17 18:38:59
built-in function "max" should do the same thing h
lijeffrey
2016/06/17 19:43:11
+1
chanli
2016/06/17 22:01:28
Done.
|
| + result = hint_score if hint_score > result else result |
| + return result |
| + |
| + |
| +if __name__ == '__main__': |
|
stgao
2016/06/17 18:38:59
Move the logic to a function and call it from the
chanli
2016/06/17 22:01:28
Done.
|
| + # Set up the Remote API to use services on the live App Engine. |
| + remote_api.EnableRemoteApi(app_id='findit-for-me') |
| + |
| + end_date = datetime.datetime.utcnow().replace( |
| + hour=0, minute=0, second=0, microsecond=0) |
| + start_date = end_date - datetime.timedelta(days=DAYS) |
| + |
| + analysis_query = WfAnalysis.query( |
|
stgao
2016/06/17 18:38:59
Unused?
chanli
2016/06/17 22:01:28
Done.
|
| + WfAnalysis.build_start_time > start_date, |
| + WfAnalysis.build_start_time <= end_date) |
| + |
| + analysis_query = WfAnalysis.query(remote_api.ndb.OR( |
| + WfAnalysis.result_status == result_status.FOUND_CORRECT, |
| + WfAnalysis.result_status == result_status.FOUND_CORRECT_DUPLICATE, |
| + WfAnalysis.result_status == result_status.FOUND_INCORRECT, |
| + WfAnalysis.result_status == result_status.FOUND_INCORRECT_DUPLICATE)) |
| + |
| + analysis_list = analysis_query.filter( |
| + WfAnalysis.build_start_time > start_date, |
| + WfAnalysis.build_start_time <= end_date).fetch() |
| + |
| + total_result_number_dict = { |
| + SCORE_ADD_DELETE_FILE: 0, |
| + SCORE_CHANGE_LINE: 0, |
| + SCORE_CHANGE_FILE: 0, |
| + SCORE_CHANGE_RELATED_LINE: 0 |
| + } |
| + |
| + correct_result_number_dict = { |
| + SCORE_ADD_DELETE_FILE: 0, |
| + SCORE_CHANGE_LINE: 0, |
| + SCORE_CHANGE_FILE: 0, |
| + SCORE_CHANGE_RELATED_LINE: 0 |
| + } |
| + |
| + for analysis in analysis_list: |
| + if not analysis.suspected_cls or len(analysis.suspected_cls) != 1: |
| + # Bails out if there are multiple suspected_cls |
|
lijeffrey
2016/06/17 19:43:11
nit: comment ends with . Also add bails out if the
chanli
2016/06/17 22:01:28
Done.
|
| + continue |
| + |
| + analysis_result = analysis.result |
| + if analysis.result_status in CORRECT_RESULT_STATUSES: |
| + correct = True |
| + elif analysis.result_status in INCORRECT_RESULT_STATUSES: |
| + correct = False |
| + |
| + if analysis_result: |
| + for failure in analysis_result.get('failures', []): |
| + for suspected_cl in failure.get('suspected_cls', []): |
| + highest_score = _GetHighestScoreInHints(suspected_cl['hints']) |
|
stgao
2016/06/17 18:38:59
This is per-failure, right?
chanli
2016/06/17 22:01:28
We just consider failures with only one suspected
stgao
2016/06/17 23:39:21
In the current code, if the same culprit is respon
chanli
2016/06/17 23:57:53
Done.
|
| + total_result_number_dict[highest_score] += 1 |
| + if correct: |
| + correct_result_number_dict[highest_score] += 1 |
| + |
| + accuracy_rate_dict = defaultdict(float) |
| + for score, correct_number in correct_result_number_dict.iteritems(): |
| + accuracy_rate_dict[score] = ( |
| + float(correct_number) / total_result_number_dict[score] if |
| + total_result_number_dict[score] != 0 else None) |
|
lijeffrey
2016/06/17 19:43:11
nit: I think you can just do if total_Result_numbe
chanli
2016/06/17 22:01:28
Done.
|
| + print json.dumps(accuracy_rate_dict, indent=2) |