Chromium Code Reviews| Index: appengine/findit/util_scripts/remote_queries/calculate_confidence_scores.py |
| diff --git a/appengine/findit/util_scripts/remote_queries/calculate_confidence_scores.py b/appengine/findit/util_scripts/remote_queries/calculate_confidence_scores.py |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..61ee7f0b57ea412511c603688d1541d8141e8767 |
| --- /dev/null |
| +++ b/appengine/findit/util_scripts/remote_queries/calculate_confidence_scores.py |
| @@ -0,0 +1,369 @@ |
| +# Copyright 2016 The Chromium Authors. All rights reserved. |
| +# Use of this source code is governed by a BSD-style license that can be |
| +# found in the LICENSE file. |
| + |
| +"""Calculates confidence scores for all suspected CLs so far and save the scores |
| + to data store. |
| +""" |
| + |
| +import argparse |
| +from collections import defaultdict |
| +import copy |
| +import datetime |
| +import json |
| +import os |
| +import sys |
| + |
| +_REMOTE_API_DIR = os.path.join(os.path.dirname(__file__), os.path.pardir) |
| +sys.path.insert(1, _REMOTE_API_DIR) |
| + |
| +import remote_api |
| +from google.appengine.ext import ndb |
| + |
| +from common.waterfall import failure_type |
| +from model import analysis_approach_type |
| +from model import suspected_cl_status |
| +from model.cl_confidence import CLConfidence |
| +from model.wf_suspected_cl import WfSuspectedCL |
| + |
| + |
| +TRIAGED_STATUS = [ |
| + suspected_cl_status.CORRECT, |
| + suspected_cl_status.INCORRECT, |
| + suspected_cl_status.PARTIALLY_CORRECT, |
| + suspected_cl_status.PARTIALLY_TRIAGED |
| +] |
| + |
| + |
| +def _CalculateConfidenceLevelsForHeuristic( |
| + original_cl_confidence, cl_failure_type, new_results): |
| + updated_results = {} |
| + original_results = None |
| + if original_cl_confidence: |
| + original_results = ( |
| + original_cl_confidence.compile_heuristic if |
| + cl_failure_type == failure_type.COMPILE else |
| + original_cl_confidence.test_heuristic) |
| + |
| + if not original_cl_confidence or not original_results: |
| + for score, result in new_results.iteritems(): |
| + correct_number = result[suspected_cl_status.CORRECT] |
| + incorrect_number = result[suspected_cl_status.INCORRECT] |
| + total_number = correct_number + incorrect_number |
| + confidence = float(correct_number) / total_number if total_number else 0.0 |
| + |
| + updated_results[score] = { |
| + 'correct': correct_number, |
| + 'total': total_number, |
| + 'confidence': confidence |
| + } |
| + else: |
| + updated_results = copy.deepcopy(original_results) |
| + |
| + new_scores = list(set(new_results.keys()) - set(original_results.keys())) |
| + if new_scores: |
| + for new_score in new_scores: |
| + updated_results[new_score] = { |
| + 'correct': 0, |
| + 'total': 0, |
| + 'confidence': 0.0 |
| + } |
| + |
| + for score, result in updated_results.iteritems(): |
| + new_result = new_results.get(score) |
| + if not new_result: |
| + new_result = { |
| + suspected_cl_status.CORRECT: 0, |
| + suspected_cl_status.INCORRECT: 0 |
| + } |
| + |
| + result['correct'] += new_result[suspected_cl_status.CORRECT] |
| + result['total'] += (new_result[suspected_cl_status.CORRECT] + |
| + new_result[suspected_cl_status.INCORRECT]) |
| + result['confidence'] = ( |
| + float(result['correct']) / result['total'] if result['total'] else 0.0 |
|
stgao
2016/10/06 01:57:59
Instead of 0.0, should we do -1 as of an N/A?
chanli
2016/10/07 00:01:12
Done.
|
| + ) |
| + |
| + return updated_results |
| + |
| + |
| +def _CalculateConfidenceLevelsForTryJob( |
| + original_cl_confidence, cl_failure_type, approach, new_result): |
| + |
| + original_result = None |
| + if original_cl_confidence: |
| + if cl_failure_type == failure_type.COMPILE: |
| + original_result = ( |
| + original_cl_confidence.compile_try_job if approach == 'try_job' else |
| + original_cl_confidence.compile_heuristic_try_job) |
| + else: |
| + original_result = ( |
| + original_cl_confidence.test_try_job if approach == 'try_job' else |
| + original_cl_confidence.test_heuristic_try_job) |
| + |
| + if not original_cl_confidence or not original_result: |
|
stgao
2016/10/06 01:57:59
The logic to merge both results and calculate the
chanli
2016/10/07 00:01:13
Done.
|
| + correct_number = new_result[suspected_cl_status.CORRECT] |
| + incorrect_number = new_result[suspected_cl_status.INCORRECT] |
| + total_number = correct_number + incorrect_number |
| + confidence = float(correct_number) / total_number if total_number else 0.0 |
| + |
| + updated_result = { |
| + 'correct': correct_number, |
| + 'total': total_number, |
| + 'confidence': confidence |
| + } |
| + else: |
| + updated_result = copy.deepcopy(original_result) |
| + |
| + updated_result['correct'] += new_result[suspected_cl_status.CORRECT] |
| + updated_result['total'] += (new_result[suspected_cl_status.CORRECT] + |
| + new_result[suspected_cl_status.INCORRECT]) |
| + updated_result['confidence'] = ( |
| + float(updated_result['correct']) / updated_result['total'] if |
| + updated_result['total'] else 0.0 |
| + ) |
| + |
| + return updated_result |
| + |
| + |
| +def _GetMostRecentCLConfidence(): |
| + cl_confidences = CLConfidence.query().order(-CLConfidence.end_date).fetch(1) |
|
stgao
2016/10/06 01:57:59
This query result won't be cached. Using Versioned
chanli
2016/10/07 00:01:12
Done.
|
| + |
| + return cl_confidences[0] if cl_confidences else None |
| + |
| + |
| +@ndb.transactional |
| +def _SavesNewCLConfidence( |
| + _last_cl_confidence, date_start, date_end, |
|
stgao
2016/10/06 01:57:59
why prefix for last_cl_confidence?
chanli
2016/10/07 00:01:12
typo...
|
| + result_heuristic, result_try_job, result_both): |
| + |
| + new_cl_confidence = CLConfidence() |
| + new_cl_confidence.start_date = date_start |
| + new_cl_confidence.end_date = date_end |
| + new_cl_confidence.compile_heuristic = _CalculateConfidenceLevelsForHeuristic( |
| + _last_cl_confidence, failure_type.COMPILE, |
| + result_heuristic[failure_type.COMPILE]) |
| + new_cl_confidence.test_heuristic = _CalculateConfidenceLevelsForHeuristic( |
| + _last_cl_confidence, failure_type.TEST, |
| + result_heuristic[failure_type.TEST]) |
| + new_cl_confidence.compile_try_job = _CalculateConfidenceLevelsForTryJob( |
| + _last_cl_confidence, failure_type.COMPILE, 'try_job', |
| + result_try_job[failure_type.COMPILE]) |
| + new_cl_confidence.test_try_job = _CalculateConfidenceLevelsForTryJob( |
| + _last_cl_confidence, failure_type.TEST, 'try_job', |
| + result_try_job[failure_type.TEST]) |
| + new_cl_confidence.compile_heuristic_try_job = ( |
| + _CalculateConfidenceLevelsForTryJob( |
| + _last_cl_confidence, failure_type.COMPILE, 'both', |
| + result_both[failure_type.COMPILE])) |
| + new_cl_confidence.test_heuristic_try_job = ( |
| + _CalculateConfidenceLevelsForTryJob( |
| + _last_cl_confidence, failure_type.TEST, 'both', |
| + result_both[failure_type.TEST])) |
| + new_cl_confidence.put() |
| + return new_cl_confidence |
| + |
| + |
| +def _AddMoreConstrainsToQuery(query, failure_args, date_start, date_end): |
| + if 'c' in failure_args: |
| + query = query.filter( |
| + WfSuspectedCL.status == failure_type.COMPILE) |
| + elif 't' in failure_args: |
| + query = query.filter( |
| + WfSuspectedCL.status == failure_type.TEST) |
| + |
| + if date_start: |
| + query = query.filter( |
| + WfSuspectedCL.updated_time >= date_start) |
| + query = query.filter( |
| + WfSuspectedCL.updated_time < date_end) |
|
stgao
2016/10/06 01:57:59
What if the same suspect got included in one query
chanli
2016/10/07 00:01:12
Done.
|
| + return query |
| + |
| + |
| +def _GetCLDataForHeuristic(failure_args, date_start, date_end): |
| + |
| + suspected_cls_query = WfSuspectedCL.query(remote_api.ndb.AND( |
| + WfSuspectedCL.status.IN(TRIAGED_STATUS), |
| + WfSuspectedCL.approaches == analysis_approach_type.HEURISTIC)) |
| + |
| + suspected_cls_query = _AddMoreConstrainsToQuery( |
| + suspected_cls_query, failure_args, date_start, date_end) |
| + |
| + suspected_cls = suspected_cls_query.fetch() |
| + |
| + cl_by_top_score_dict = defaultdict( |
| + lambda: defaultdict(lambda: defaultdict(int))) |
| + for cl in suspected_cls: |
| + if not cl.builds: |
| + continue |
| + |
| + failures = [] |
| + for build in cl.builds.values(): |
| + if (build['approaches'] == [analysis_approach_type.TRY_JOB] or |
|
stgao
2016/10/06 01:57:59
Explain why we need this check? The filter above a
chanli
2016/10/07 00:01:12
Removed.
|
| + build['failures'] in failures or not build['top_score'] or |
| + build['status'] is None): |
| + continue |
| + |
| + failures.append(build['failures']) |
| + |
| + failure = build['failure_type'] |
| + top_score = build['top_score'] |
| + status = build['status'] |
| + cl_by_top_score_dict[failure][top_score][status] += 1 |
| + |
| + return cl_by_top_score_dict |
| + |
| + |
| +def _GetCLDataForTryJob(failure_args, date_start, date_end): |
| + suspected_cls_query = WfSuspectedCL.query(remote_api.ndb.AND( |
| + WfSuspectedCL.status.IN(TRIAGED_STATUS), |
| + WfSuspectedCL.approaches == analysis_approach_type.TRY_JOB)) |
| + |
| + suspected_cls_query = _AddMoreConstrainsToQuery( |
| + suspected_cls_query, failure_args, date_start, date_end) |
| + |
| + suspected_cls = suspected_cls_query.fetch() |
| + |
| + try_job_cls_dict = defaultdict(lambda: defaultdict(int)) |
| + both_cls_dict = defaultdict(lambda: defaultdict(int)) |
| + for cl in suspected_cls: |
| + if not cl.builds: |
| + continue |
| + |
| + failures = [] |
| + for build in cl.builds.values(): |
| + if (build['approaches'] == [analysis_approach_type.HEURISTIC] or |
|
stgao
2016/10/06 01:57:59
same here.
chanli
2016/10/07 00:01:12
Done.
|
| + build['failures'] in failures): |
| + continue |
| + |
| + failures.append(build['failures']) |
| + |
| + try_job_cls_dict[build['failure_type']][build['status']] += 1 |
| + |
| + if analysis_approach_type.HEURISTIC in build['approaches']: |
| + # Both heuristic and try job found this CL on this build. |
| + both_cls_dict[build['failure_type']][build['status']] += 1 |
| + |
| + return try_job_cls_dict, both_cls_dict |
| + |
| + |
| +def _PrintResult( |
| + date_start, date_end, result_heuristic, result_try_job, result_both): |
| + print 'Start Date: ', date_start |
| + print 'End Date: ', date_end |
| + print '--------------------------------------------------------------------' |
| + if result_heuristic: |
| + print 'compile_heuristic' |
| + print json.dumps(result_heuristic.get(failure_type.COMPILE), indent=2) |
| + print 'test_heuristic' |
| + print json.dumps(result_heuristic.get(failure_type.TEST), indent=2) |
| + if result_try_job: |
| + print 'compile_try_job' |
| + print json.dumps(result_try_job.get(failure_type.COMPILE), indent=2) |
| + print 'test_try_job' |
| + print json.dumps(result_try_job.get(failure_type.TEST), indent=2) |
| + if result_both: |
| + print 'compile_heuristic_try_job' |
| + print json.dumps(result_both.get(failure_type.COMPILE), indent=2) |
| + print 'test_heuristic_try_job' |
| + print json.dumps(result_both.get(failure_type.TEST), indent=2) |
| + |
| + |
| +def _ValidDate(date_str): |
| + try: |
| + return datetime.datetime.strptime(date_str, '%Y-%m-%d') |
| + except ValueError: |
| + raise argparse.ArgumentTypeError('Type of date is invalid.') |
| + |
| + |
| +def _GetArguments(): |
| + parser = argparse.ArgumentParser() |
| + |
| + # Uses group to make -c|-t are exclusive from each other, because if both |
| + # arguments are there, it means query everything. |
| + # Same for -r|-j. |
| + failure_group = parser.add_mutually_exclusive_group() |
| + failure_group.add_argument('-c', action='store_true', |
| + help='get confidence score for compile failures.') |
| + failure_group.add_argument('-t', action='store_true', |
| + help='get confidence score for test failures.') |
| + |
| + approach_group = parser.add_mutually_exclusive_group() |
| + # Uses -r for heuristic failures because -h is already used for help. |
| + approach_group.add_argument('-r', action='store_true', |
| + help='get confidence score for heuristic failures.') |
| + # Uses -j for try job failures because -t is already used for test failures. |
| + approach_group.add_argument('-j', action='store_true', |
| + help='get confidence score for try job failures.') |
| + |
| + parser.add_argument('-s', help='The Start Date - format YYYY-MM-DD', |
| + type=_ValidDate) |
| + parser.add_argument('-e', help='The End Date - format YYYY-MM-DD', |
| + type=_ValidDate) |
| + |
| + args_dict = vars(parser.parse_args()) |
|
stgao
2016/10/06 01:57:59
Hm, can we store the value to some destination var
chanli
2016/10/07 00:01:12
Done.
|
| + useful_args = {} |
| + for arg, value in args_dict.iteritems(): |
| + if value: |
| + useful_args[arg] = value |
| + |
| + return useful_args |
| + |
| + |
| +if __name__ == '__main__': |
| + # Set up the Remote API to use services on the live App Engine. |
| + remote_api.EnableRemoteApi(app_id='findit-for-me') |
| + |
| + args = _GetArguments() |
| + |
| + default_end_date = datetime.datetime.utcnow().replace( |
|
stgao
2016/10/06 01:57:59
use time_util instead?
chanli
2016/10/07 00:01:12
Done.
|
| + hour=0, minute=0, second=0, microsecond=0) |
| + end_date = args.get('e', default_end_date) |
| + |
| + last_cl_confidence = _GetMostRecentCLConfidence() |
| + start_date = args.get('s') |
| + if not start_date: |
| + start_date = last_cl_confidence.end_date if last_cl_confidence else None |
| + |
| + heuristic_result = None |
| + try_job_result = None |
| + both_result = None |
| + if 'r' in args: # Only calculates results for heuristic. |
| + heuristic_result = _GetCLDataForHeuristic(args, start_date, end_date) |
| + elif 'j' in args: # Only calculates results for try job. |
| + try_job_result, both_result = _GetCLDataForTryJob( |
| + args, start_date, end_date) |
| + else: # A full calculation for CLs for both failure types. |
| + heuristic_result = _GetCLDataForHeuristic(args, start_date, end_date) |
| + try_job_result, both_result = _GetCLDataForTryJob( |
| + args, start_date, end_date) |
| + |
| + if not args: # Saves new confidence score for full calculation only. |
| + cl_confidence = _SavesNewCLConfidence( |
| + last_cl_confidence, start_date, end_date, heuristic_result, |
| + try_job_result, both_result) |
| + |
| + heuristic_result = { |
| + failure_type.COMPILE: cl_confidence.compile_heuristic, |
| + failure_type.TEST: cl_confidence.test_heuristic |
| + } |
| + try_job_result = { |
| + failure_type.COMPILE: cl_confidence.compile_try_job, |
| + failure_type.TEST: cl_confidence.test_try_job |
| + } |
| + both_result = { |
| + failure_type.COMPILE: cl_confidence.compile_heuristic_try_job, |
| + failure_type.TEST: cl_confidence.test_heuristic_try_job |
| + } |
| + _PrintResult( |
| + None, end_date, heuristic_result, try_job_result, both_result) |
| + |
| + else: |
| + _PrintResult( |
| + start_date, end_date, heuristic_result, try_job_result, both_result) |