| OLD | NEW |
| 1 # Copyright 2015 The Chromium Authors. All rights reserved. | 1 # Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 from datetime import datetime | 5 from datetime import datetime |
| 6 import json |
| 6 import time | 7 import time |
| 7 | 8 |
| 8 from common.pipeline_wrapper import BasePipeline | 9 from common.pipeline_wrapper import BasePipeline |
| 9 from common.pipeline_wrapper import pipeline | 10 from common.pipeline_wrapper import pipeline |
| 10 from common.waterfall import buildbucket_client | 11 from common.waterfall import buildbucket_client |
| 12 from common.waterfall import try_job_error |
| 11 from common.waterfall.buildbucket_client import BuildbucketBuild | 13 from common.waterfall.buildbucket_client import BuildbucketBuild |
| 12 from model import analysis_status | 14 from model import analysis_status |
| 13 from model.wf_try_job import WfTryJob | 15 from model.wf_try_job import WfTryJob |
| 14 from model.wf_try_job_data import WfTryJobData | 16 from model.wf_try_job_data import WfTryJobData |
| 15 from waterfall import waterfall_config | 17 from waterfall import waterfall_config |
| 16 from waterfall.try_job_type import TryJobType | 18 from waterfall.try_job_type import TryJobType |
| 17 | 19 |
| 18 | 20 |
| 19 class MonitorTryJobPipeline(BasePipeline): | 21 class MonitorTryJobPipeline(BasePipeline): |
| 20 """A pipeline for monitoring a try job and recording results when it's done. | 22 """A pipeline for monitoring a try job and recording results when it's done. |
| 21 | 23 |
| 22 The result will be stored to compile_results or test_results according to | 24 The result will be stored to compile_results or test_results according to |
| 23 which type of build failure we are running try job for. | 25 which type of build failure we are running try job for. |
| 24 """ | 26 """ |
| 25 | 27 |
| 26 TIMEOUT = 'TIMEOUT' | 28 UNKNOWN = 'UNKNOWN' |
| 27 | 29 |
| 28 @staticmethod | 30 @staticmethod |
| 29 def _MicrosecondsToDatetime(microseconds): | 31 def _MicrosecondsToDatetime(microseconds): |
| 30 """Returns a datetime given the number of microseconds, or None.""" | 32 """Returns a datetime given the number of microseconds, or None.""" |
| 31 if microseconds: | 33 if microseconds: |
| 32 return datetime.utcfromtimestamp(float(microseconds) / 1000000) | 34 return datetime.utcfromtimestamp(float(microseconds) / 1000000) |
| 33 return None | 35 return None |
| 34 | 36 |
| 35 @staticmethod | 37 @staticmethod |
| 36 def _GetError(buildbucket_error, timed_out): | 38 def _GetError(buildbucket_response, buildbucket_error, timed_out): |
| 37 # TODO(lijeffrey): Currently only timeouts (Findit abandoned monitoring the | 39 """Determines whether or not a try job error occurred. |
| 38 # try job after waiting too long for it to complete) and errors reported | 40 |
| 39 # directly in the buildbucket_client request are captured. Several other | 41 Args: |
| 40 # failures can be derrived from the response in the build too which should | 42 buildbucket_response: A dict of the json response from buildbucket. |
| 41 # be determined here. | 43 buildbucket_error: A BuildBucketError object returned from the call to |
| 44 buildbucket_client.GetTryJobs() |
| 45 timed_out: A bool whether or not Findit abandoned monitoring the try job. |
| 46 |
| 47 Returns: |
| 48 A tuple containing an error dict and number representing an error code, or |
| 49 (None, None) if no error was determined to have occurred. |
| 50 """ |
| 51 |
| 42 if buildbucket_error: | 52 if buildbucket_error: |
| 43 return { | 53 return ( |
| 44 'message': buildbucket_error.message, | 54 { |
| 45 'reason': buildbucket_error.reason | 55 'message': buildbucket_error.message, |
| 46 } | 56 'reason': buildbucket_error.reason |
| 57 }, |
| 58 try_job_error.BUILDBUCKET_REQUEST_ERROR) |
| 47 | 59 |
| 48 if timed_out: | 60 if timed_out: |
| 49 return { | 61 return ( |
| 50 'message': 'Try job monitoring was abandoned.', | 62 { |
| 51 'reason': MonitorTryJobPipeline.TIMEOUT | 63 'message': 'Try job monitoring was abandoned.', |
| 52 } | 64 'reason': 'Timeout after %s hours' % ( |
| 65 waterfall_config.GetTryJobSettings().get('job_timeout_hours')) |
| 66 }, |
| 67 try_job_error.TIMEOUT) |
| 68 |
| 69 if buildbucket_response: |
| 70 # If there is no explicit timeout or reason specified, check the last |
| 71 # build response for errors. |
| 72 result_details_json = json.loads( |
| 73 buildbucket_response.get('result_details_json', '{}')) or {} |
| 74 |
| 75 # Check result_details_json for any obvious errors. |
| 76 error = result_details_json.get('error', {}) |
| 77 if error: |
| 78 return ( |
| 79 { |
| 80 'message': 'Buildbucket reported an error.', |
| 81 'reason': error.get('message', MonitorTryJobPipeline.UNKNOWN) |
| 82 }, |
| 83 try_job_error.CI_REPORTED_ERROR) |
| 84 |
| 85 # Check the report to see if anything went wrong. |
| 86 report = result_details_json.get('properties', {}).get('report') |
| 87 if report: |
| 88 if report.get('metadata', {}).get('infra_failure'): |
| 89 # Check for any infra issues caught by the recipe. |
| 90 return ( |
| 91 { |
| 92 'message': ('Try job encountered an infra issue during ' |
| 93 'execution.'), |
| 94 'reason': MonitorTryJobPipeline.UNKNOWN |
| 95 }, |
| 96 try_job_error.INFRA_FAILURE) |
| 97 else: |
| 98 # A report should always be included as part of 'properties'. If it is |
| 99 # missing something else is wrong. |
| 100 return ( |
| 101 { |
| 102 'message': 'No result report was found.', |
| 103 'reason': MonitorTryJobPipeline.UNKNOWN |
| 104 }, |
| 105 try_job_error.UNKNOWN) |
| 106 |
| 107 return None, None |
| 53 | 108 |
| 54 @staticmethod | 109 @staticmethod |
| 55 def _UpdateTryJobMetadata(try_job_data, start_time, buildbucket_build, | 110 def _UpdateTryJobMetadata(try_job_data, start_time, buildbucket_build, |
| 56 buildbucket_error, timed_out): | 111 buildbucket_error, timed_out): |
| 112 buildbucket_response = {} |
| 57 if buildbucket_build: | 113 if buildbucket_build: |
| 58 try_job_data.request_time = MonitorTryJobPipeline._MicrosecondsToDatetime( | 114 try_job_data.request_time = MonitorTryJobPipeline._MicrosecondsToDatetime( |
| 59 buildbucket_build.request_time) | 115 buildbucket_build.request_time) |
| 60 # If start_time is unavailable, fallback to request_time. | 116 # If start_time is unavailable, fallback to request_time. |
| 61 try_job_data.start_time = start_time or try_job_data.request_time | 117 try_job_data.start_time = start_time or try_job_data.request_time |
| 62 try_job_data.end_time = MonitorTryJobPipeline._MicrosecondsToDatetime( | 118 try_job_data.end_time = MonitorTryJobPipeline._MicrosecondsToDatetime( |
| 63 buildbucket_build.end_time) | 119 buildbucket_build.end_time) |
| 64 try_job_data.number_of_commits_analyzed = len( | 120 try_job_data.number_of_commits_analyzed = len( |
| 65 buildbucket_build.report.get('result', {})) | 121 buildbucket_build.report.get('result', {})) |
| 66 try_job_data.try_job_url = buildbucket_build.url | 122 try_job_data.try_job_url = buildbucket_build.url |
| 67 try_job_data.regression_range_size = buildbucket_build.report.get( | 123 try_job_data.regression_range_size = buildbucket_build.report.get( |
| 68 'metadata', {}).get('regression_range_size') | 124 'metadata', {}).get('regression_range_size') |
| 69 try_job_data.last_buildbucket_response = buildbucket_build.response | 125 try_job_data.last_buildbucket_response = buildbucket_build.response |
| 126 buildbucket_response = buildbucket_build.response |
| 70 | 127 |
| 71 error = MonitorTryJobPipeline._GetError(buildbucket_error, timed_out) | 128 error_dict, error_code = MonitorTryJobPipeline._GetError( |
| 129 buildbucket_response, buildbucket_error, timed_out) |
| 72 | 130 |
| 73 if error: | 131 if error_dict: |
| 74 try_job_data.error = error | 132 try_job_data.error = error_dict |
| 133 try_job_data.error_code = error_code |
| 75 | 134 |
| 76 try_job_data.put() | 135 try_job_data.put() |
| 77 | 136 |
| 78 def _UpdateTryJobResult( | 137 def _UpdateTryJobResult( |
| 79 self, status, master_name, builder_name, build_number, try_job_type, | 138 self, status, master_name, builder_name, build_number, try_job_type, |
| 80 try_job_id, try_job_url, result_content=None): | 139 try_job_id, try_job_url, result_content=None): |
| 81 """Updates try job result based on responsed try job status and result.""" | 140 """Updates try job result based on responsed try job status and result.""" |
| 82 result = { | 141 result = { |
| 83 'report': result_content, | 142 'report': result_content, |
| 84 'url': try_job_url, | 143 'url': try_job_url, |
| (...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 166 already_set_started = True | 225 already_set_started = True |
| 167 | 226 |
| 168 if time.time() > deadline: # pragma: no cover | 227 if time.time() > deadline: # pragma: no cover |
| 169 self._UpdateTryJobMetadata(try_job_data, start_time, build, error, True) | 228 self._UpdateTryJobMetadata(try_job_data, start_time, build, error, True) |
| 170 # Explicitly abort the whole pipeline. | 229 # Explicitly abort the whole pipeline. |
| 171 raise pipeline.Abort( | 230 raise pipeline.Abort( |
| 172 'Try job %s timed out after %d hours.' % ( | 231 'Try job %s timed out after %d hours.' % ( |
| 173 try_job_id, timeout_hours)) | 232 try_job_id, timeout_hours)) |
| 174 | 233 |
| 175 time.sleep(pipeline_wait_seconds) # pragma: no cover | 234 time.sleep(pipeline_wait_seconds) # pragma: no cover |
| OLD | NEW |