appengine/findit/util_scripts/remote_queries/check_confidence_level_heuristic.py - Issue 2072893002: [Findit] scripts to calculate confidence level of Findit results.

Side by Side Diff: appengine/findit/util_scripts/remote_queries/check_confidence_level_heuristic.py

Issue 2072893002: [Findit] scripts to calculate confidence level of Findit results. (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master

Patch Set: . Created 4 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« no previous file with comments | « no previous file | appengine/findit/util_scripts/remote_queries/check_confidence_level_try_job.py » ('j') | appengine/findit/util_scripts/remote_queries/check_confidence_level_try_job.py » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
(Empty)
	1 # Copyright 2016 The Chromium Authors. All rights reserved.

	2 # Use of this source code is governed by a BSD-style license that can be

	3 # found in the LICENSE file.

	4

	5 """Checks confidence level for Findit heuristic results."""

	6 from collections import Counter

	7 from collections import defaultdict

	8 import datetime

	9 import json

	10 import os

	11 import sys

	12

	13 _REMOTE_API_DIR = os.path.join(os.path.dirname(__file__), os.path.pardir)

	14 sys.path.insert(1, _REMOTE_API_DIR)

	15

	16 import remote_api

	17

	18 from model import result_status

	19 from model.wf_analysis import WfAnalysis

	20

	21

	22 DAYS = 180

	23

	24 SCORE_ADD_DELETE_FILE = 5

	25 SCORE_CHANGE_LINE = 4

	26 SCORE_CHANGE_FILE = 2

	27 SCORE_CHANGE_RELATED_LINE = 1
	stgao 2016/06/17 18:38:59 related file? related file? chanli 2016/06/17 22:01:28 Done. Show quoted text On 2016/06/17 18:38:59, stgao wrote: > related file? Done.
	28

	29 CORRECT_RESULT_STATUSES = [result_status.FOUND_CORRECT,
	stgao 2016/06/17 18:38:59 For failures in a row on the same builder, do we i For failures in a row on the same builder, do we include only the first one or all of them? Should we deduplicate? chanli 2016/06/17 22:01:28 Done. Show quoted text On 2016/06/17 18:38:59, stgao wrote: > For failures in a row on the same builder, do we include only the first one or > all of them? Should we deduplicate? Done.
	30 result_status.FOUND_CORRECT_DUPLICATE]

	31 INCORRECT_RESULT_STATUSES = [result_status.FOUND_INCORRECT,

	32 result_status.FOUND_INCORRECT_DUPLICATE]

	33

	34

	35 def _GetHighestScoreInHints(hints):

	36 result = 0

	37 for hint_score in hints.itervalues():
	stgao 2016/06/17 18:38:59 built-in function "max" should do the same thing h built-in function "max" should do the same thing here. lijeffrey 2016/06/17 19:43:11 +1 Show quoted text On 2016/06/17 18:38:59, stgao wrote: > built-in function "max" should do the same thing here. +1 chanli 2016/06/17 22:01:28 Done. Show quoted text On 2016/06/17 19:43:11, lijeffrey wrote: > On 2016/06/17 18:38:59, stgao wrote: > > built-in function "max" should do the same thing here. > > +1 Done.
	38 result = hint_score if hint_score > result else result

	39 return result

	40

	41

	42 if __name__ == '__main__':
	stgao 2016/06/17 18:38:59 Move the logic to a function and call it from the Move the logic to a function and call it from the __main__. chanli 2016/06/17 22:01:28 Done. Show quoted text On 2016/06/17 18:38:59, stgao wrote: > Move the logic to a function and call it from the __main__. Done.
	43 # Set up the Remote API to use services on the live App Engine.

	44 remote_api.EnableRemoteApi(app_id='findit-for-me')

	45

	46 end_date = datetime.datetime.utcnow().replace(

	47 hour=0, minute=0, second=0, microsecond=0)

	48 start_date = end_date - datetime.timedelta(days=DAYS)

	49

	50 analysis_query = WfAnalysis.query(
	stgao 2016/06/17 18:38:59 Unused? Unused? chanli 2016/06/17 22:01:28 Done. Show quoted text On 2016/06/17 18:38:59, stgao wrote: > Unused? Done.
	51 WfAnalysis.build_start_time > start_date,

	52 WfAnalysis.build_start_time <= end_date)

	53

	54 analysis_query = WfAnalysis.query(remote_api.ndb.OR(

	55 WfAnalysis.result_status == result_status.FOUND_CORRECT,

	56 WfAnalysis.result_status == result_status.FOUND_CORRECT_DUPLICATE,

	57 WfAnalysis.result_status == result_status.FOUND_INCORRECT,

	58 WfAnalysis.result_status == result_status.FOUND_INCORRECT_DUPLICATE))

	59

	60 analysis_list = analysis_query.filter(

	61 WfAnalysis.build_start_time > start_date,

	62 WfAnalysis.build_start_time <= end_date).fetch()

	63

	64 total_result_number_dict = {

	65 SCORE_ADD_DELETE_FILE: 0,

	66 SCORE_CHANGE_LINE: 0,

	67 SCORE_CHANGE_FILE: 0,

	68 SCORE_CHANGE_RELATED_LINE: 0

	69 }

	70

	71 correct_result_number_dict = {

	72 SCORE_ADD_DELETE_FILE: 0,

	73 SCORE_CHANGE_LINE: 0,

	74 SCORE_CHANGE_FILE: 0,

	75 SCORE_CHANGE_RELATED_LINE: 0

	76 }

	77

	78 for analysis in analysis_list:

	79 if not analysis.suspected_cls or len(analysis.suspected_cls) != 1:

	80 # Bails out if there are multiple suspected_cls
	lijeffrey 2016/06/17 19:43:11 nit: comment ends with . Also add bails out if the nit: comment ends with . Also add bails out if there are no suspected_cls too chanli 2016/06/17 22:01:28 Done. Show quoted text On 2016/06/17 19:43:11, lijeffrey wrote: > nit: comment ends with . Also add bails out if there are no suspected_cls too Done.
	81 continue

	82

	83 analysis_result = analysis.result

	84 if analysis.result_status in CORRECT_RESULT_STATUSES:

	85 correct = True

	86 elif analysis.result_status in INCORRECT_RESULT_STATUSES:

	87 correct = False

	88

	89 if analysis_result:

	90 for failure in analysis_result.get('failures', []):

	91 for suspected_cl in failure.get('suspected_cls', []):

	92 highest_score = _GetHighestScoreInHints(suspected_cl['hints'])
	stgao 2016/06/17 18:38:59 This is per-failure, right? This is per-failure, right? chanli 2016/06/17 22:01:28 We just consider failures with only one suspected Show quoted text On 2016/06/17 18:38:59, stgao wrote: > This is per-failure, right? We just consider failures with only one suspected cl. stgao 2016/06/17 23:39:21 In the current code, if the same culprit is respon Show quoted text On 2016/06/17 22:01:28, chanli wrote: > On 2016/06/17 18:38:59, stgao wrote: > > This is per-failure, right? > > We just consider failures with only one suspected cl. In the current code, if the same culprit is responsible for 2+ failures in the same build, it will be counted 2+ times for the total result number and correct result number. Is that expected? Should we do per-build instead? chanli 2016/06/17 23:57:53 Done. Show quoted text On 2016/06/17 23:39:21, stgao wrote: > On 2016/06/17 22:01:28, chanli wrote: > > On 2016/06/17 18:38:59, stgao wrote: > > > This is per-failure, right? > > > > We just consider failures with only one suspected cl. > > In the current code, if the same culprit is responsible for 2+ failures in the > same build, it will be counted 2+ times for the total result number and correct > result number. > > Is that expected? Should we do per-build instead? Done.
	93 total_result_number_dict[highest_score] += 1

	94 if correct:

	95 correct_result_number_dict[highest_score] += 1

	96

	97 accuracy_rate_dict = defaultdict(float)

	98 for score, correct_number in correct_result_number_dict.iteritems():

	99 accuracy_rate_dict[score] = (

	100 float(correct_number) / total_result_number_dict[score] if

	101 total_result_number_dict[score] != 0 else None)
	lijeffrey 2016/06/17 19:43:11 nit: I think you can just do if total_Result_numbe nit: I think you can just do if total_Result_number_dict[score] else None chanli 2016/06/17 22:01:28 Done. Show quoted text On 2016/06/17 19:43:11, lijeffrey wrote: > nit: I think you can just do if total_Result_number_dict[score] else None Done.
	102 print json.dumps(accuracy_rate_dict, indent=2)

OLD	NEW