Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(371)

Unified Diff: appengine/findit/util_scripts/remote_queries/check_confidence_level_heuristic.py

Issue 2072893002: [Findit] scripts to calculate confidence level of Findit results. (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master
Patch Set: . Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: appengine/findit/util_scripts/remote_queries/check_confidence_level_heuristic.py
diff --git a/appengine/findit/util_scripts/remote_queries/check_confidence_level_heuristic.py b/appengine/findit/util_scripts/remote_queries/check_confidence_level_heuristic.py
new file mode 100644
index 0000000000000000000000000000000000000000..8eb74d04e2da3641c57ce3ab42e8170976fea8c7
--- /dev/null
+++ b/appengine/findit/util_scripts/remote_queries/check_confidence_level_heuristic.py
@@ -0,0 +1,102 @@
+# Copyright 2016 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Checks confidence level for Findit heuristic results."""
+from collections import Counter
+from collections import defaultdict
+import datetime
+import json
+import os
+import sys
+
+_REMOTE_API_DIR = os.path.join(os.path.dirname(__file__), os.path.pardir)
+sys.path.insert(1, _REMOTE_API_DIR)
+
+import remote_api
+
+from model import result_status
+from model.wf_analysis import WfAnalysis
+
+
+DAYS = 180
+
+SCORE_ADD_DELETE_FILE = 5
+SCORE_CHANGE_LINE = 4
+SCORE_CHANGE_FILE = 2
+SCORE_CHANGE_RELATED_LINE = 1
stgao 2016/06/17 18:38:59 related file?
chanli 2016/06/17 22:01:28 Done.
+
+CORRECT_RESULT_STATUSES = [result_status.FOUND_CORRECT,
stgao 2016/06/17 18:38:59 For failures in a row on the same builder, do we i
chanli 2016/06/17 22:01:28 Done.
+ result_status.FOUND_CORRECT_DUPLICATE]
+INCORRECT_RESULT_STATUSES = [result_status.FOUND_INCORRECT,
+ result_status.FOUND_INCORRECT_DUPLICATE]
+
+
+def _GetHighestScoreInHints(hints):
+ result = 0
+ for hint_score in hints.itervalues():
stgao 2016/06/17 18:38:59 built-in function "max" should do the same thing h
lijeffrey 2016/06/17 19:43:11 +1
chanli 2016/06/17 22:01:28 Done.
+ result = hint_score if hint_score > result else result
+ return result
+
+
+if __name__ == '__main__':
stgao 2016/06/17 18:38:59 Move the logic to a function and call it from the
chanli 2016/06/17 22:01:28 Done.
+ # Set up the Remote API to use services on the live App Engine.
+ remote_api.EnableRemoteApi(app_id='findit-for-me')
+
+ end_date = datetime.datetime.utcnow().replace(
+ hour=0, minute=0, second=0, microsecond=0)
+ start_date = end_date - datetime.timedelta(days=DAYS)
+
+ analysis_query = WfAnalysis.query(
stgao 2016/06/17 18:38:59 Unused?
chanli 2016/06/17 22:01:28 Done.
+ WfAnalysis.build_start_time > start_date,
+ WfAnalysis.build_start_time <= end_date)
+
+ analysis_query = WfAnalysis.query(remote_api.ndb.OR(
+ WfAnalysis.result_status == result_status.FOUND_CORRECT,
+ WfAnalysis.result_status == result_status.FOUND_CORRECT_DUPLICATE,
+ WfAnalysis.result_status == result_status.FOUND_INCORRECT,
+ WfAnalysis.result_status == result_status.FOUND_INCORRECT_DUPLICATE))
+
+ analysis_list = analysis_query.filter(
+ WfAnalysis.build_start_time > start_date,
+ WfAnalysis.build_start_time <= end_date).fetch()
+
+ total_result_number_dict = {
+ SCORE_ADD_DELETE_FILE: 0,
+ SCORE_CHANGE_LINE: 0,
+ SCORE_CHANGE_FILE: 0,
+ SCORE_CHANGE_RELATED_LINE: 0
+ }
+
+ correct_result_number_dict = {
+ SCORE_ADD_DELETE_FILE: 0,
+ SCORE_CHANGE_LINE: 0,
+ SCORE_CHANGE_FILE: 0,
+ SCORE_CHANGE_RELATED_LINE: 0
+ }
+
+ for analysis in analysis_list:
+ if not analysis.suspected_cls or len(analysis.suspected_cls) != 1:
+ # Bails out if there are multiple suspected_cls
lijeffrey 2016/06/17 19:43:11 nit: comment ends with . Also add bails out if the
chanli 2016/06/17 22:01:28 Done.
+ continue
+
+ analysis_result = analysis.result
+ if analysis.result_status in CORRECT_RESULT_STATUSES:
+ correct = True
+ elif analysis.result_status in INCORRECT_RESULT_STATUSES:
+ correct = False
+
+ if analysis_result:
+ for failure in analysis_result.get('failures', []):
+ for suspected_cl in failure.get('suspected_cls', []):
+ highest_score = _GetHighestScoreInHints(suspected_cl['hints'])
stgao 2016/06/17 18:38:59 This is per-failure, right?
chanli 2016/06/17 22:01:28 We just consider failures with only one suspected
stgao 2016/06/17 23:39:21 In the current code, if the same culprit is respon
chanli 2016/06/17 23:57:53 Done.
+ total_result_number_dict[highest_score] += 1
+ if correct:
+ correct_result_number_dict[highest_score] += 1
+
+ accuracy_rate_dict = defaultdict(float)
+ for score, correct_number in correct_result_number_dict.iteritems():
+ accuracy_rate_dict[score] = (
+ float(correct_number) / total_result_number_dict[score] if
+ total_result_number_dict[score] != 0 else None)
lijeffrey 2016/06/17 19:43:11 nit: I think you can just do if total_Result_numbe
chanli 2016/06/17 22:01:28 Done.
+ print json.dumps(accuracy_rate_dict, indent=2)

Powered by Google App Engine
This is Rietveld 408576698