Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 # Copyright 2015 The Chromium Authors. All rights reserved. | 1 # Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 from datetime import datetime | 5 from datetime import datetime |
| 6 import json | |
| 7 import re | |
| 6 import time | 8 import time |
| 7 | 9 |
| 8 from common.pipeline_wrapper import BasePipeline | 10 from common.pipeline_wrapper import BasePipeline |
| 9 from common.pipeline_wrapper import pipeline | 11 from common.pipeline_wrapper import pipeline |
| 10 from common.waterfall import buildbucket_client | 12 from common.waterfall import buildbucket_client |
| 11 from common.waterfall.buildbucket_client import BuildbucketBuild | 13 from common.waterfall.buildbucket_client import BuildbucketBuild |
| 12 from model import analysis_status | 14 from model import analysis_status |
| 15 from model import try_job_error | |
| 13 from model.wf_try_job import WfTryJob | 16 from model.wf_try_job import WfTryJob |
| 14 from model.wf_try_job_data import WfTryJobData | 17 from model.wf_try_job_data import WfTryJobData |
| 15 from waterfall import waterfall_config | 18 from waterfall import waterfall_config |
| 16 from waterfall.try_job_type import TryJobType | 19 from waterfall.try_job_type import TryJobType |
| 17 | 20 |
| 18 | 21 |
| 19 class MonitorTryJobPipeline(BasePipeline): | 22 class MonitorTryJobPipeline(BasePipeline): |
| 20 """A pipeline for monitoring a try job and recording results when it's done. | 23 """A pipeline for monitoring a try job and recording results when it's done. |
| 21 | 24 |
| 22 The result will be stored to compile_results or test_results according to | 25 The result will be stored to compile_results or test_results according to |
| 23 which type of build failure we are running try job for. | 26 which type of build failure we are running try job for. |
| 24 """ | 27 """ |
| 25 | 28 |
| 26 TIMEOUT = 'TIMEOUT' | 29 UNKNOWN = 'UNKNOWN' |
| 27 | 30 |
| 28 @staticmethod | 31 @staticmethod |
| 29 def _MicrosecondsToDatetime(microseconds): | 32 def _MicrosecondsToDatetime(microseconds): |
| 30 """Returns a datetime given the number of microseconds, or None.""" | 33 """Returns a datetime given the number of microseconds, or None.""" |
| 31 if microseconds: | 34 if microseconds: |
| 32 return datetime.utcfromtimestamp(float(microseconds) / 1000000) | 35 return datetime.utcfromtimestamp(float(microseconds) / 1000000) |
| 33 return None | 36 return None |
| 34 | 37 |
| 35 @staticmethod | 38 @staticmethod |
| 36 def _GetError(buildbucket_error, timed_out): | 39 def _GetError(buildbucket_response, buildbucket_error, timed_out): |
|
chanli
2016/04/25 20:51:28
Will this handle cases like: https://build.chromiu
lijeffrey
2016/04/25 22:54:16
This try job was triggered outside of Findit, but
| |
| 37 # TODO(lijeffrey): Currently only timeouts (Findit abandoned monitoring the | 40 """Determines whether or not a try job error occurred. |
| 38 # try job after waiting too long for it to complete) and errors reported | 41 |
| 39 # directly in the buildbucket_client request are captured. Several other | 42 Args: |
| 40 # failures can be derrived from the response in the build too which should | 43 buildbucket_response: A dict of the json response from buildbucket. |
| 41 # be determined here. | 44 buildbucket_error: A BuildBucketError object returned from the call to |
| 45 buildbucket_client.GetTryJobs() | |
| 46 timed_out: A bool whether or not Findit abandoned monitoring the try job. | |
| 47 | |
| 48 Returns: | |
| 49 A tuple containing an error dict and number representing an error code, or | |
| 50 (None, None) if no error was determined to have occurred. | |
| 51 """ | |
| 52 error_dict = None | |
| 53 error_code = None | |
| 54 | |
| 42 if buildbucket_error: | 55 if buildbucket_error: |
| 43 return { | 56 error_dict = { |
| 44 'message': buildbucket_error.message, | 57 'message': buildbucket_error.message, |
| 45 'reason': buildbucket_error.reason | 58 'reason': buildbucket_error.reason |
| 46 } | 59 } |
| 60 error_code = try_job_error.BUILDBUCKET_ERROR | |
| 61 elif timed_out: | |
| 62 error_dict = { | |
| 63 'message': 'Try job monitoring was abandoned.', | |
| 64 'reason': 'Timeout after %s hours' % ( | |
| 65 waterfall_config.GetTryJobSettings().get('job_timeout_hours')) | |
| 66 } | |
| 67 error_code = try_job_error.TIMEOUT | |
| 68 elif buildbucket_response: | |
| 69 # If there is no explicit timeout or reason specified, check the last | |
| 70 # build response for errors. | |
| 71 result_details_json = json.loads( | |
| 72 buildbucket_response.get('result_details_json', '{}')) or {} | |
| 47 | 73 |
| 48 if timed_out: | 74 # Check result_details_json for any obvious errors. |
| 49 return { | 75 error = result_details_json.get('error', {}) |
| 50 'message': 'Try job monitoring was abandoned.', | 76 if error: |
| 51 'reason': MonitorTryJobPipeline.TIMEOUT | 77 message = error.get('message') |
| 52 } | 78 if message: |
| 79 error_dict = { | |
| 80 'message': 'Try job could not be triggered.', | |
| 81 'reason': message | |
| 82 } | |
|
chanli
2016/04/25 20:51:28
Will it be better if
error_dict = {
'message'
lijeffrey
2016/04/25 22:54:16
Reason should be the root cause, not the observed
| |
| 83 trybot_not_found_pattern = re.compile(r'Builder [^\s-]+ not found') | |
| 84 if trybot_not_found_pattern.match(message): | |
| 85 error_code = try_job_error.TRYBOT_NOT_FOUND | |
| 86 else: | |
| 87 error_code = try_job_error.UNKNOWN | |
| 88 else: | |
| 89 error_dict = { | |
| 90 'message': 'Try job error was detected.', | |
| 91 'reason': MonitorTryJobPipeline.UNKNOWN | |
| 92 } | |
| 93 error_code = try_job_error.UNKNOWN | |
| 94 | |
| 95 # Check the report to see if anything went wrong. | |
| 96 report = result_details_json.get('report') | |
| 97 if report: | |
| 98 if ('infra_failed' in report.get('result', {}).itervalues() or | |
| 99 report.get('metadata', {}).get('infra_failure')): | |
| 100 # Check for any infra issues caught by the recipe. | |
| 101 error_dict = { | |
| 102 'message': 'Try job encountered an infra issue during execution.', | |
| 103 'reason': MonitorTryJobPipeline.UNKNOWN | |
| 104 } | |
| 105 error_code = try_job_error.INFRA_FAILURE | |
| 106 | |
| 107 return error_dict, error_code | |
| 53 | 108 |
| 54 @staticmethod | 109 @staticmethod |
| 55 def _UpdateTryJobMetadata(try_job_data, start_time, buildbucket_build, | 110 def _UpdateTryJobMetadata(try_job_data, start_time, buildbucket_build, |
| 56 buildbucket_error, timed_out): | 111 buildbucket_error, timed_out): |
| 112 buildbucket_response = {} | |
| 57 if buildbucket_build: | 113 if buildbucket_build: |
| 58 try_job_data.request_time = MonitorTryJobPipeline._MicrosecondsToDatetime( | 114 try_job_data.request_time = MonitorTryJobPipeline._MicrosecondsToDatetime( |
| 59 buildbucket_build.request_time) | 115 buildbucket_build.request_time) |
| 60 # If start_time is unavailable, fallback to request_time. | 116 # If start_time is unavailable, fallback to request_time. |
| 61 try_job_data.start_time = start_time or try_job_data.request_time | 117 try_job_data.start_time = start_time or try_job_data.request_time |
| 62 try_job_data.end_time = MonitorTryJobPipeline._MicrosecondsToDatetime( | 118 try_job_data.end_time = MonitorTryJobPipeline._MicrosecondsToDatetime( |
| 63 buildbucket_build.end_time) | 119 buildbucket_build.end_time) |
| 64 try_job_data.number_of_commits_analyzed = len( | 120 try_job_data.number_of_commits_analyzed = len( |
| 65 buildbucket_build.report.get('result', {})) | 121 buildbucket_build.report.get('result', {})) |
| 66 try_job_data.try_job_url = buildbucket_build.url | 122 try_job_data.try_job_url = buildbucket_build.url |
| 67 try_job_data.regression_range_size = buildbucket_build.report.get( | 123 try_job_data.regression_range_size = buildbucket_build.report.get( |
| 68 'metadata', {}).get('regression_range_size') | 124 'metadata', {}).get('regression_range_size') |
| 69 try_job_data.last_buildbucket_response = buildbucket_build.response | 125 try_job_data.last_buildbucket_response = buildbucket_build.response |
| 126 buildbucket_response = buildbucket_build.response | |
| 70 | 127 |
| 71 error = MonitorTryJobPipeline._GetError(buildbucket_error, timed_out) | 128 error_dict, error_code = MonitorTryJobPipeline._GetError( |
| 129 buildbucket_response, buildbucket_error, timed_out) | |
| 72 | 130 |
| 73 if error: | 131 if error_dict: |
| 74 try_job_data.error = error | 132 try_job_data.error = error_dict |
| 133 try_job_data.error_code = error_code | |
| 75 | 134 |
| 76 try_job_data.put() | 135 try_job_data.put() |
| 77 | 136 |
| 78 def _UpdateTryJobResult( | 137 def _UpdateTryJobResult( |
| 79 self, status, master_name, builder_name, build_number, try_job_type, | 138 self, status, master_name, builder_name, build_number, try_job_type, |
| 80 try_job_id, try_job_url, result_content=None): | 139 try_job_id, try_job_url, result_content=None): |
| 81 """Updates try job result based on responsed try job status and result.""" | 140 """Updates try job result based on responsed try job status and result.""" |
| 82 result = { | 141 result = { |
| 83 'report': result_content, | 142 'report': result_content, |
| 84 'url': try_job_url, | 143 'url': try_job_url, |
| (...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 166 already_set_started = True | 225 already_set_started = True |
| 167 | 226 |
| 168 if time.time() > deadline: # pragma: no cover | 227 if time.time() > deadline: # pragma: no cover |
| 169 self._UpdateTryJobMetadata(try_job_data, start_time, build, error, True) | 228 self._UpdateTryJobMetadata(try_job_data, start_time, build, error, True) |
| 170 # Explicitly abort the whole pipeline. | 229 # Explicitly abort the whole pipeline. |
| 171 raise pipeline.Abort( | 230 raise pipeline.Abort( |
| 172 'Try job %s timed out after %d hours.' % ( | 231 'Try job %s timed out after %d hours.' % ( |
| 173 try_job_id, timeout_hours)) | 232 try_job_id, timeout_hours)) |
| 174 | 233 |
| 175 time.sleep(pipeline_wait_seconds) # pragma: no cover | 234 time.sleep(pipeline_wait_seconds) # pragma: no cover |
| OLD | NEW |