appengine/findit/util_scripts/remote_queries/calculate_confidence_scores.py - Issue 2072893002: [Findit] scripts to calculate confidence level of Findit results.

Unified Diff: appengine/findit/util_scripts/remote_queries/calculate_confidence_scores.py

Issue 2072893002: [Findit] scripts to calculate confidence level of Findit results. (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master

Patch Set: Merge code and add data model. Created 4 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: appengine/findit/util_scripts/remote_queries/calculate_confidence_scores.py

diff --git a/appengine/findit/util_scripts/remote_queries/calculate_confidence_scores.py b/appengine/findit/util_scripts/remote_queries/calculate_confidence_scores.py

new file mode 100644

index 0000000000000000000000000000000000000000..61ee7f0b57ea412511c603688d1541d8141e8767

--- /dev/null

+++ b/appengine/findit/util_scripts/remote_queries/calculate_confidence_scores.py

@@ -0,0 +1,369 @@

+# Use of this source code is governed by a BSD-style license that can be

+# found in the LICENSE file.

+"""Calculates confidence scores for all suspected CLs so far and save the scores

+ to data store.

+"""

+import argparse

+from collections import defaultdict

+import copy

+import datetime

+import json

+import os

+import sys

+_REMOTE_API_DIR = os.path.join(os.path.dirname(__file__), os.path.pardir)

+sys.path.insert(1, _REMOTE_API_DIR)

+import remote_api

+from google.appengine.ext import ndb

+from common.waterfall import failure_type

+from model import analysis_approach_type

+from model import suspected_cl_status

+from model.cl_confidence import CLConfidence

+from model.wf_suspected_cl import WfSuspectedCL

+TRIAGED_STATUS = [

+ suspected_cl_status.CORRECT,

+ suspected_cl_status.INCORRECT,

+ suspected_cl_status.PARTIALLY_CORRECT,

+ suspected_cl_status.PARTIALLY_TRIAGED

+def _CalculateConfidenceLevelsForHeuristic(

+ original_cl_confidence, cl_failure_type, new_results):

+ updated_results = {}

+ original_results = None

+ if original_cl_confidence:

+ original_results = (

+ original_cl_confidence.compile_heuristic if

+ cl_failure_type == failure_type.COMPILE else

+ original_cl_confidence.test_heuristic)

+ if not original_cl_confidence or not original_results:

+ for score, result in new_results.iteritems():

+ correct_number = result[suspected_cl_status.CORRECT]

+ incorrect_number = result[suspected_cl_status.INCORRECT]

+ total_number = correct_number + incorrect_number

+ confidence = float(correct_number) / total_number if total_number else 0.0

+ updated_results[score] = {

+ 'correct': correct_number,

+ 'total': total_number,

+ 'confidence': confidence

+ }

+ else:

+ updated_results = copy.deepcopy(original_results)

+ new_scores = list(set(new_results.keys()) - set(original_results.keys()))

+ if new_scores:

+ for new_score in new_scores:

+ updated_results[new_score] = {

+ 'correct': 0,

+ 'total': 0,

+ 'confidence': 0.0

+ }

+ for score, result in updated_results.iteritems():

+ new_result = new_results.get(score)

+ if not new_result:

+ new_result = {

+ suspected_cl_status.CORRECT: 0,

+ suspected_cl_status.INCORRECT: 0

+ }

+ result['correct'] += new_result[suspected_cl_status.CORRECT]

+ result['total'] += (new_result[suspected_cl_status.CORRECT] +

+ new_result[suspected_cl_status.INCORRECT])

+ result['confidence'] = (

+ float(result['correct']) / result['total'] if result['total'] else 0.0

stgao 2016/10/06 01:57:59 Instead of 0.0, should we do -1 as of an N/A?

chanli 2016/10/07 00:01:12 Done.

+ )

+ return updated_results

+def _CalculateConfidenceLevelsForTryJob(

+ original_cl_confidence, cl_failure_type, approach, new_result):

+ original_result = None

+ if original_cl_confidence:

+ if cl_failure_type == failure_type.COMPILE:

+ original_result = (

+ original_cl_confidence.compile_try_job if approach == 'try_job' else

+ original_cl_confidence.compile_heuristic_try_job)

+ else:

+ original_result = (

+ original_cl_confidence.test_try_job if approach == 'try_job' else

+ original_cl_confidence.test_heuristic_try_job)

+ if not original_cl_confidence or not original_result:

stgao 2016/10/06 01:57:59 The logic to merge both results and calculate the

chanli 2016/10/07 00:01:13 Done.

+ correct_number = new_result[suspected_cl_status.CORRECT]

+ incorrect_number = new_result[suspected_cl_status.INCORRECT]

+ total_number = correct_number + incorrect_number

+ confidence = float(correct_number) / total_number if total_number else 0.0

+ updated_result = {

+ 'correct': correct_number,

+ 'total': total_number,

+ 'confidence': confidence

+ }

+ else:

+ updated_result = copy.deepcopy(original_result)

+ updated_result['correct'] += new_result[suspected_cl_status.CORRECT]

+ updated_result['total'] += (new_result[suspected_cl_status.CORRECT] +

+ new_result[suspected_cl_status.INCORRECT])

+ updated_result['confidence'] = (

+ float(updated_result['correct']) / updated_result['total'] if

+ updated_result['total'] else 0.0

+ )

+ return updated_result

+def _GetMostRecentCLConfidence():

+ cl_confidences = CLConfidence.query().order(-CLConfidence.end_date).fetch(1)

stgao 2016/10/06 01:57:59 This query result won't be cached. Using Versioned

chanli 2016/10/07 00:01:12 Done.

+ return cl_confidences[0] if cl_confidences else None

+@ndb.transactional

+def _SavesNewCLConfidence(

+ _last_cl_confidence, date_start, date_end,

stgao 2016/10/06 01:57:59 why prefix for last_cl_confidence?

chanli 2016/10/07 00:01:12 typo...

+ result_heuristic, result_try_job, result_both):

+ new_cl_confidence = CLConfidence()

+ new_cl_confidence.start_date = date_start

+ new_cl_confidence.end_date = date_end

+ new_cl_confidence.compile_heuristic = _CalculateConfidenceLevelsForHeuristic(

+ _last_cl_confidence, failure_type.COMPILE,

+ result_heuristic[failure_type.COMPILE])

+ new_cl_confidence.test_heuristic = _CalculateConfidenceLevelsForHeuristic(

+ _last_cl_confidence, failure_type.TEST,

+ result_heuristic[failure_type.TEST])

+ new_cl_confidence.compile_try_job = _CalculateConfidenceLevelsForTryJob(

+ _last_cl_confidence, failure_type.COMPILE, 'try_job',

+ result_try_job[failure_type.COMPILE])

+ new_cl_confidence.test_try_job = _CalculateConfidenceLevelsForTryJob(

+ _last_cl_confidence, failure_type.TEST, 'try_job',

+ result_try_job[failure_type.TEST])

+ new_cl_confidence.compile_heuristic_try_job = (

+ _CalculateConfidenceLevelsForTryJob(

+ _last_cl_confidence, failure_type.COMPILE, 'both',

+ result_both[failure_type.COMPILE]))

+ new_cl_confidence.test_heuristic_try_job = (

+ _CalculateConfidenceLevelsForTryJob(

+ _last_cl_confidence, failure_type.TEST, 'both',

+ result_both[failure_type.TEST]))

+ new_cl_confidence.put()

+ return new_cl_confidence

+def _AddMoreConstrainsToQuery(query, failure_args, date_start, date_end):

+ if 'c' in failure_args:

+ query = query.filter(

+ WfSuspectedCL.status == failure_type.COMPILE)

+ elif 't' in failure_args:

+ query = query.filter(

+ WfSuspectedCL.status == failure_type.TEST)

+ if date_start:

+ query = query.filter(

+ WfSuspectedCL.updated_time >= date_start)

+ query = query.filter(

+ WfSuspectedCL.updated_time < date_end)

stgao 2016/10/06 01:57:59 What if the same suspect got included in one query

chanli 2016/10/07 00:01:12 Done.

+ return query

+def _GetCLDataForHeuristic(failure_args, date_start, date_end):

+ suspected_cls_query = WfSuspectedCL.query(remote_api.ndb.AND(

+ WfSuspectedCL.status.IN(TRIAGED_STATUS),

+ WfSuspectedCL.approaches == analysis_approach_type.HEURISTIC))

+ suspected_cls_query = _AddMoreConstrainsToQuery(

+ suspected_cls_query, failure_args, date_start, date_end)

+ suspected_cls = suspected_cls_query.fetch()

+ cl_by_top_score_dict = defaultdict(

+ lambda: defaultdict(lambda: defaultdict(int)))

+ for cl in suspected_cls:

+ if not cl.builds:

+ continue

+ failures = []

+ for build in cl.builds.values():

+ if (build['approaches'] == [analysis_approach_type.TRY_JOB] or

stgao 2016/10/06 01:57:59 Explain why we need this check? The filter above a

chanli 2016/10/07 00:01:12 Removed.

+ build['failures'] in failures or not build['top_score'] or

+ build['status'] is None):

+ continue

+ failures.append(build['failures'])

+ failure = build['failure_type']

+ top_score = build['top_score']

+ status = build['status']

+ cl_by_top_score_dict[failure][top_score][status] += 1

+ return cl_by_top_score_dict

+def _GetCLDataForTryJob(failure_args, date_start, date_end):

+ suspected_cls_query = WfSuspectedCL.query(remote_api.ndb.AND(

+ WfSuspectedCL.status.IN(TRIAGED_STATUS),

+ WfSuspectedCL.approaches == analysis_approach_type.TRY_JOB))

+ suspected_cls_query = _AddMoreConstrainsToQuery(

+ suspected_cls_query, failure_args, date_start, date_end)

+ suspected_cls = suspected_cls_query.fetch()

+ try_job_cls_dict = defaultdict(lambda: defaultdict(int))

+ both_cls_dict = defaultdict(lambda: defaultdict(int))

+ for cl in suspected_cls:

+ if not cl.builds:

+ continue

+ failures = []

+ for build in cl.builds.values():

+ if (build['approaches'] == [analysis_approach_type.HEURISTIC] or

stgao 2016/10/06 01:57:59 same here.

chanli 2016/10/07 00:01:12 Done.

+ build['failures'] in failures):

+ continue

+ failures.append(build['failures'])

+ try_job_cls_dict[build['failure_type']][build['status']] += 1

+ if analysis_approach_type.HEURISTIC in build['approaches']:

+ # Both heuristic and try job found this CL on this build.

+ both_cls_dict[build['failure_type']][build['status']] += 1

+ return try_job_cls_dict, both_cls_dict

+def _PrintResult(

+ date_start, date_end, result_heuristic, result_try_job, result_both):

+ print 'Start Date: ', date_start

+ print 'End Date: ', date_end

+ print '--------------------------------------------------------------------'

+ if result_heuristic:

+ print 'compile_heuristic'

+ print json.dumps(result_heuristic.get(failure_type.COMPILE), indent=2)

+ print

+ print 'test_heuristic'

+ print json.dumps(result_heuristic.get(failure_type.TEST), indent=2)

+ print

+ if result_try_job:

+ print 'compile_try_job'

+ print json.dumps(result_try_job.get(failure_type.COMPILE), indent=2)

+ print

+ print 'test_try_job'

+ print json.dumps(result_try_job.get(failure_type.TEST), indent=2)

+ print

+ if result_both:

+ print 'compile_heuristic_try_job'

+ print json.dumps(result_both.get(failure_type.COMPILE), indent=2)

+ print

+ print 'test_heuristic_try_job'

+ print json.dumps(result_both.get(failure_type.TEST), indent=2)

+ print

+def _ValidDate(date_str):

+ try:

+ return datetime.datetime.strptime(date_str, '%Y-%m-%d')

+ except ValueError:

+ raise argparse.ArgumentTypeError('Type of date is invalid.')

+def _GetArguments():

+ parser = argparse.ArgumentParser()

+ # Uses group to make -c|-t are exclusive from each other, because if both

+ # arguments are there, it means query everything.

+ # Same for -r|-j.

+ failure_group = parser.add_mutually_exclusive_group()

+ failure_group.add_argument('-c', action='store_true',

+ help='get confidence score for compile failures.')

+ failure_group.add_argument('-t', action='store_true',

+ help='get confidence score for test failures.')

+ approach_group = parser.add_mutually_exclusive_group()

+ # Uses -r for heuristic failures because -h is already used for help.

+ approach_group.add_argument('-r', action='store_true',

+ help='get confidence score for heuristic failures.')

+ # Uses -j for try job failures because -t is already used for test failures.

+ approach_group.add_argument('-j', action='store_true',

+ help='get confidence score for try job failures.')

+ parser.add_argument('-s', help='The Start Date - format YYYY-MM-DD',

+ type=_ValidDate)

+ parser.add_argument('-e', help='The End Date - format YYYY-MM-DD',

+ type=_ValidDate)

+ args_dict = vars(parser.parse_args())

stgao 2016/10/06 01:57:59 Hm, can we store the value to some destination var

chanli 2016/10/07 00:01:12 Done.

+ useful_args = {}

+ for arg, value in args_dict.iteritems():

+ if value:

+ useful_args[arg] = value

+ return useful_args

+if __name__ == '__main__':

+ # Set up the Remote API to use services on the live App Engine.

+ remote_api.EnableRemoteApi(app_id='findit-for-me')

+ args = _GetArguments()

+ default_end_date = datetime.datetime.utcnow().replace(

stgao 2016/10/06 01:57:59 use time_util instead?

chanli 2016/10/07 00:01:12 Done.

+ hour=0, minute=0, second=0, microsecond=0)

+ end_date = args.get('e', default_end_date)

+ last_cl_confidence = _GetMostRecentCLConfidence()

+ start_date = args.get('s')

+ if not start_date:

+ start_date = last_cl_confidence.end_date if last_cl_confidence else None

+ heuristic_result = None

+ try_job_result = None

+ both_result = None

+ if 'r' in args: # Only calculates results for heuristic.

+ heuristic_result = _GetCLDataForHeuristic(args, start_date, end_date)

+ elif 'j' in args: # Only calculates results for try job.

+ try_job_result, both_result = _GetCLDataForTryJob(

+ args, start_date, end_date)

+ else: # A full calculation for CLs for both failure types.

+ heuristic_result = _GetCLDataForHeuristic(args, start_date, end_date)

+ try_job_result, both_result = _GetCLDataForTryJob(

+ args, start_date, end_date)

+ if not args: # Saves new confidence score for full calculation only.

+ cl_confidence = _SavesNewCLConfidence(

+ last_cl_confidence, start_date, end_date, heuristic_result,

+ try_job_result, both_result)

+ heuristic_result = {

+ failure_type.COMPILE: cl_confidence.compile_heuristic,

+ failure_type.TEST: cl_confidence.test_heuristic

+ }

+ try_job_result = {

+ failure_type.COMPILE: cl_confidence.compile_try_job,

+ failure_type.TEST: cl_confidence.test_try_job

+ }

+ both_result = {

+ failure_type.COMPILE: cl_confidence.compile_heuristic_try_job,

+ failure_type.TEST: cl_confidence.test_heuristic_try_job

+ }

+ _PrintResult(

+ None, end_date, heuristic_result, try_job_result, both_result)

+ else:

+ _PrintResult(

+ start_date, end_date, heuristic_result, try_job_result, both_result)

« appengine/findit/model/cl_confidence.py ('K') | « appengine/findit/model/cl_confidence.py ('k') | no next file » | no next file with comments »