Chromium Code Reviews| Index: appengine/findit/waterfall/monitor_try_job_pipeline.py |
| diff --git a/appengine/findit/waterfall/monitor_try_job_pipeline.py b/appengine/findit/waterfall/monitor_try_job_pipeline.py |
| index ffda4499036eea99a5335b5d0e8e7a6d3a8f526c..6d36303dd32d6506f979cc755ea50e3d7d5300a9 100644 |
| --- a/appengine/findit/waterfall/monitor_try_job_pipeline.py |
| +++ b/appengine/findit/waterfall/monitor_try_job_pipeline.py |
| @@ -3,6 +3,8 @@ |
| # found in the LICENSE file. |
| from datetime import datetime |
| +import json |
| +import re |
| import time |
| from common.pipeline_wrapper import BasePipeline |
| @@ -10,6 +12,7 @@ from common.pipeline_wrapper import pipeline |
| from common.waterfall import buildbucket_client |
| from common.waterfall.buildbucket_client import BuildbucketBuild |
| from model import analysis_status |
| +from model import try_job_error |
| from model.wf_try_job import WfTryJob |
| from model.wf_try_job_data import WfTryJobData |
| from waterfall import waterfall_config |
| @@ -23,7 +26,7 @@ class MonitorTryJobPipeline(BasePipeline): |
| which type of build failure we are running try job for. |
| """ |
| - TIMEOUT = 'TIMEOUT' |
| + UNKNOWN = 'UNKNOWN' |
| @staticmethod |
| def _MicrosecondsToDatetime(microseconds): |
| @@ -33,27 +36,80 @@ class MonitorTryJobPipeline(BasePipeline): |
| return None |
| @staticmethod |
| - def _GetError(buildbucket_error, timed_out): |
| - # TODO(lijeffrey): Currently only timeouts (Findit abandoned monitoring the |
| - # try job after waiting too long for it to complete) and errors reported |
| - # directly in the buildbucket_client request are captured. Several other |
| - # failures can be derrived from the response in the build too which should |
| - # be determined here. |
| + def _GetError(buildbucket_response, buildbucket_error, timed_out): |
|
chanli
2016/04/25 20:51:28
Will this handle cases like: https://build.chromiu
lijeffrey
2016/04/25 22:54:16
This try job was triggered outside of Findit, but
|
| + """Determines whether or not a try job error occurred. |
| + |
| + Args: |
| + buildbucket_response: A dict of the json response from buildbucket. |
| + buildbucket_error: A BuildBucketError object returned from the call to |
| + buildbucket_client.GetTryJobs() |
| + timed_out: A bool whether or not Findit abandoned monitoring the try job. |
| + |
| + Returns: |
| + A tuple containing an error dict and number representing an error code, or |
| + (None, None) if no error was determined to have occurred. |
| + """ |
| + error_dict = None |
| + error_code = None |
| + |
| if buildbucket_error: |
| - return { |
| + error_dict = { |
| 'message': buildbucket_error.message, |
| 'reason': buildbucket_error.reason |
| } |
| - |
| - if timed_out: |
| - return { |
| + error_code = try_job_error.BUILDBUCKET_ERROR |
| + elif timed_out: |
| + error_dict = { |
| 'message': 'Try job monitoring was abandoned.', |
| - 'reason': MonitorTryJobPipeline.TIMEOUT |
| + 'reason': 'Timeout after %s hours' % ( |
| + waterfall_config.GetTryJobSettings().get('job_timeout_hours')) |
| } |
| + error_code = try_job_error.TIMEOUT |
| + elif buildbucket_response: |
| + # If there is no explicit timeout or reason specified, check the last |
| + # build response for errors. |
| + result_details_json = json.loads( |
| + buildbucket_response.get('result_details_json', '{}')) or {} |
| + |
| + # Check result_details_json for any obvious errors. |
| + error = result_details_json.get('error', {}) |
| + if error: |
| + message = error.get('message') |
| + if message: |
| + error_dict = { |
| + 'message': 'Try job could not be triggered.', |
| + 'reason': message |
| + } |
|
chanli
2016/04/25 20:51:28
Will it be better if
error_dict = {
'message'
lijeffrey
2016/04/25 22:54:16
Reason should be the root cause, not the observed
|
| + trybot_not_found_pattern = re.compile(r'Builder [^\s-]+ not found') |
| + if trybot_not_found_pattern.match(message): |
| + error_code = try_job_error.TRYBOT_NOT_FOUND |
| + else: |
| + error_code = try_job_error.UNKNOWN |
| + else: |
| + error_dict = { |
| + 'message': 'Try job error was detected.', |
| + 'reason': MonitorTryJobPipeline.UNKNOWN |
| + } |
| + error_code = try_job_error.UNKNOWN |
| + |
| + # Check the report to see if anything went wrong. |
| + report = result_details_json.get('report') |
| + if report: |
| + if ('infra_failed' in report.get('result', {}).itervalues() or |
| + report.get('metadata', {}).get('infra_failure')): |
| + # Check for any infra issues caught by the recipe. |
| + error_dict = { |
| + 'message': 'Try job encountered an infra issue during execution.', |
| + 'reason': MonitorTryJobPipeline.UNKNOWN |
| + } |
| + error_code = try_job_error.INFRA_FAILURE |
| + |
| + return error_dict, error_code |
| @staticmethod |
| def _UpdateTryJobMetadata(try_job_data, start_time, buildbucket_build, |
| buildbucket_error, timed_out): |
| + buildbucket_response = {} |
| if buildbucket_build: |
| try_job_data.request_time = MonitorTryJobPipeline._MicrosecondsToDatetime( |
| buildbucket_build.request_time) |
| @@ -67,11 +123,14 @@ class MonitorTryJobPipeline(BasePipeline): |
| try_job_data.regression_range_size = buildbucket_build.report.get( |
| 'metadata', {}).get('regression_range_size') |
| try_job_data.last_buildbucket_response = buildbucket_build.response |
| + buildbucket_response = buildbucket_build.response |
| - error = MonitorTryJobPipeline._GetError(buildbucket_error, timed_out) |
| + error_dict, error_code = MonitorTryJobPipeline._GetError( |
| + buildbucket_response, buildbucket_error, timed_out) |
| - if error: |
| - try_job_data.error = error |
| + if error_dict: |
| + try_job_data.error = error_dict |
| + try_job_data.error_code = error_code |
| try_job_data.put() |