Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 # Copyright 2015 The Chromium Authors. All rights reserved. | 1 # Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 from datetime import datetime | 5 from datetime import datetime |
| 6 import json | |
| 7 import re | |
| 6 import time | 8 import time |
| 7 | 9 |
| 8 from common.pipeline_wrapper import BasePipeline | 10 from common.pipeline_wrapper import BasePipeline |
| 9 from common.pipeline_wrapper import pipeline | 11 from common.pipeline_wrapper import pipeline |
| 10 from common.waterfall import buildbucket_client | 12 from common.waterfall import buildbucket_client |
| 11 from common.waterfall.buildbucket_client import BuildbucketBuild | 13 from common.waterfall.buildbucket_client import BuildbucketBuild |
| 12 from model import analysis_status | 14 from model import analysis_status |
| 15 from model import try_job_error | |
| 13 from model.wf_try_job import WfTryJob | 16 from model.wf_try_job import WfTryJob |
| 14 from model.wf_try_job_data import WfTryJobData | 17 from model.wf_try_job_data import WfTryJobData |
| 15 from waterfall import waterfall_config | 18 from waterfall import waterfall_config |
| 16 from waterfall.try_job_type import TryJobType | 19 from waterfall.try_job_type import TryJobType |
| 17 | 20 |
| 18 | 21 |
| 19 class MonitorTryJobPipeline(BasePipeline): | 22 class MonitorTryJobPipeline(BasePipeline): |
| 20 """A pipeline for monitoring a try job and recording results when it's done. | 23 """A pipeline for monitoring a try job and recording results when it's done. |
| 21 | 24 |
| 22 The result will be stored to compile_results or test_results according to | 25 The result will be stored to compile_results or test_results according to |
| 23 which type of build failure we are running try job for. | 26 which type of build failure we are running try job for. |
| 24 """ | 27 """ |
| 25 | 28 |
| 26 TIMEOUT = 'TIMEOUT' | 29 UNKNOWN = 'UNKNOWN' |
| 27 | 30 |
| 28 @staticmethod | 31 @staticmethod |
| 29 def _MicrosecondsToDatetime(microseconds): | 32 def _MicrosecondsToDatetime(microseconds): |
| 30 """Returns a datetime given the number of microseconds, or None.""" | 33 """Returns a datetime given the number of microseconds, or None.""" |
| 31 if microseconds: | 34 if microseconds: |
| 32 return datetime.utcfromtimestamp(float(microseconds) / 1000000) | 35 return datetime.utcfromtimestamp(float(microseconds) / 1000000) |
| 33 return None | 36 return None |
| 34 | 37 |
| 35 @staticmethod | 38 @staticmethod |
| 36 def _GetError(buildbucket_error, timed_out): | 39 def _GetError(buildbucket_response, buildbucket_error, timed_out): |
| 37 # TODO(lijeffrey): Currently only timeouts (Findit abandoned monitoring the | 40 """Determines whether or not a try job error occurred. |
| 38 # try job after waiting too long for it to complete) and errors reported | 41 |
| 39 # directly in the buildbucket_client request are captured. Several other | 42 Args: |
| 40 # failures can be derrived from the response in the build too which should | 43 buildbucket_response: A dict of the json response from buildbucket. |
| 41 # be determined here. | 44 buildbucket_error: A BuildBucketError object returned from the call to |
| 45 buildbucket_client.GetTryJobs() | |
| 46 timed_out: A bool whether or not Findit abandoned monitoring the try job. | |
| 47 | |
| 48 Returns: | |
| 49 A tuple containing an error dict and number representing an error code, or | |
| 50 (None, None) if no error was determined to have occurred. | |
| 51 """ | |
| 52 | |
| 42 if buildbucket_error: | 53 if buildbucket_error: |
| 43 return { | 54 return ( |
| 44 'message': buildbucket_error.message, | 55 { |
| 45 'reason': buildbucket_error.reason | 56 'message': buildbucket_error.message, |
| 46 } | 57 'reason': buildbucket_error.reason |
| 58 }, | |
| 59 try_job_error.BUILDBUCKET_ERROR) | |
| 47 | 60 |
| 48 if timed_out: | 61 if timed_out: |
| 49 return { | 62 return ( |
| 50 'message': 'Try job monitoring was abandoned.', | 63 { |
| 51 'reason': MonitorTryJobPipeline.TIMEOUT | 64 'message': 'Try job monitoring was abandoned.', |
| 52 } | 65 'reason': 'Timeout after %s hours' % ( |
| 66 waterfall_config.GetTryJobSettings().get('job_timeout_hours')) | |
| 67 }, | |
| 68 try_job_error.TIMEOUT) | |
| 69 | |
| 70 if buildbucket_response: | |
| 71 # If there is no explicit timeout or reason specified, check the last | |
| 72 # build response for errors. | |
| 73 result_details_json = json.loads( | |
| 74 buildbucket_response.get('result_details_json', '{}')) or {} | |
| 75 | |
| 76 # Check result_details_json for any obvious errors. | |
| 77 error = result_details_json.get('error', {}) | |
| 78 if error: | |
| 79 root_cause = error.get('message') | |
| 80 if root_cause: | |
| 81 error_dict = { | |
| 82 'message': 'Try job could not be triggered.', | |
| 83 'reason': root_cause | |
| 84 } | |
| 85 trybot_not_found_pattern = re.compile(r'Builder [^\s-]+ not found') | |
| 86 | |
| 87 if trybot_not_found_pattern.match(root_cause): | |
| 88 error_code = try_job_error.TRYBOT_NOT_FOUND | |
|
stgao
2016/04/25 23:44:39
Why we want to handle this case specially?
lijeffrey
2016/04/26 00:50:08
This case should be rare and never occur once it's
| |
| 89 else: | |
| 90 error_code = try_job_error.UNKNOWN | |
| 91 | |
| 92 return error_dict, error_code | |
| 93 | |
| 94 return ( | |
| 95 { | |
| 96 'message': 'Try job error was detected.', | |
| 97 'reason': MonitorTryJobPipeline.UNKNOWN | |
| 98 }, | |
| 99 try_job_error.UNKNOWN) | |
| 100 | |
| 101 # Check the report to see if anything went wrong. | |
| 102 report = result_details_json.get('properties', {}).get('report') | |
| 103 if report: | |
| 104 if ('infra_failed' in report.get('result', {}).itervalues() or | |
| 105 report.get('metadata', {}).get('infra_failure')): | |
|
stgao
2016/04/25 23:44:39
It is not added to findit/chromium/test.py yet.
stgao
2016/04/25 23:44:39
Why we need to check both?
lijeffrey
2016/04/26 00:50:08
Done. Good point, I think just metadata should be
| |
| 106 # Check for any infra issues caught by the recipe. | |
| 107 return ( | |
| 108 { | |
| 109 'message': ('Try job encountered an infra issue during ' | |
| 110 'execution.'), | |
| 111 'reason': MonitorTryJobPipeline.UNKNOWN | |
| 112 }, | |
| 113 try_job_error.INFRA_FAILURE) | |
| 114 else: | |
| 115 # A report should always be included as prt of the properties. If it is | |
| 116 # missing something is wrong. | |
| 117 return ( | |
| 118 { | |
| 119 'message': 'No result report was found.', | |
| 120 'reason': MonitorTryJobPipeline.UNKNOWN | |
| 121 }, | |
| 122 try_job_error.UNKNOWN) | |
| 123 | |
| 124 return None, None | |
| 53 | 125 |
| 54 @staticmethod | 126 @staticmethod |
| 55 def _UpdateTryJobMetadata(try_job_data, start_time, buildbucket_build, | 127 def _UpdateTryJobMetadata(try_job_data, start_time, buildbucket_build, |
| 56 buildbucket_error, timed_out): | 128 buildbucket_error, timed_out): |
| 129 buildbucket_response = {} | |
| 57 if buildbucket_build: | 130 if buildbucket_build: |
| 58 try_job_data.request_time = MonitorTryJobPipeline._MicrosecondsToDatetime( | 131 try_job_data.request_time = MonitorTryJobPipeline._MicrosecondsToDatetime( |
| 59 buildbucket_build.request_time) | 132 buildbucket_build.request_time) |
| 60 # If start_time is unavailable, fallback to request_time. | 133 # If start_time is unavailable, fallback to request_time. |
| 61 try_job_data.start_time = start_time or try_job_data.request_time | 134 try_job_data.start_time = start_time or try_job_data.request_time |
| 62 try_job_data.end_time = MonitorTryJobPipeline._MicrosecondsToDatetime( | 135 try_job_data.end_time = MonitorTryJobPipeline._MicrosecondsToDatetime( |
| 63 buildbucket_build.end_time) | 136 buildbucket_build.end_time) |
| 64 try_job_data.number_of_commits_analyzed = len( | 137 try_job_data.number_of_commits_analyzed = len( |
| 65 buildbucket_build.report.get('result', {})) | 138 buildbucket_build.report.get('result', {})) |
| 66 try_job_data.try_job_url = buildbucket_build.url | 139 try_job_data.try_job_url = buildbucket_build.url |
| 67 try_job_data.regression_range_size = buildbucket_build.report.get( | 140 try_job_data.regression_range_size = buildbucket_build.report.get( |
| 68 'metadata', {}).get('regression_range_size') | 141 'metadata', {}).get('regression_range_size') |
| 69 try_job_data.last_buildbucket_response = buildbucket_build.response | 142 try_job_data.last_buildbucket_response = buildbucket_build.response |
| 143 buildbucket_response = buildbucket_build.response | |
| 70 | 144 |
| 71 error = MonitorTryJobPipeline._GetError(buildbucket_error, timed_out) | 145 error_dict, error_code = MonitorTryJobPipeline._GetError( |
| 146 buildbucket_response, buildbucket_error, timed_out) | |
| 72 | 147 |
| 73 if error: | 148 if error_dict: |
| 74 try_job_data.error = error | 149 try_job_data.error = error_dict |
| 150 try_job_data.error_code = error_code | |
| 75 | 151 |
| 76 try_job_data.put() | 152 try_job_data.put() |
| 77 | 153 |
| 78 def _UpdateTryJobResult( | 154 def _UpdateTryJobResult( |
| 79 self, status, master_name, builder_name, build_number, try_job_type, | 155 self, status, master_name, builder_name, build_number, try_job_type, |
| 80 try_job_id, try_job_url, result_content=None): | 156 try_job_id, try_job_url, result_content=None): |
| 81 """Updates try job result based on responsed try job status and result.""" | 157 """Updates try job result based on responsed try job status and result.""" |
| 82 result = { | 158 result = { |
| 83 'report': result_content, | 159 'report': result_content, |
| 84 'url': try_job_url, | 160 'url': try_job_url, |
| (...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 166 already_set_started = True | 242 already_set_started = True |
| 167 | 243 |
| 168 if time.time() > deadline: # pragma: no cover | 244 if time.time() > deadline: # pragma: no cover |
| 169 self._UpdateTryJobMetadata(try_job_data, start_time, build, error, True) | 245 self._UpdateTryJobMetadata(try_job_data, start_time, build, error, True) |
| 170 # Explicitly abort the whole pipeline. | 246 # Explicitly abort the whole pipeline. |
| 171 raise pipeline.Abort( | 247 raise pipeline.Abort( |
| 172 'Try job %s timed out after %d hours.' % ( | 248 'Try job %s timed out after %d hours.' % ( |
| 173 try_job_id, timeout_hours)) | 249 try_job_id, timeout_hours)) |
| 174 | 250 |
| 175 time.sleep(pipeline_wait_seconds) # pragma: no cover | 251 time.sleep(pipeline_wait_seconds) # pragma: no cover |
| OLD | NEW |