Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(280)

Side by Side Diff: appengine/findit/waterfall/monitor_try_job_pipeline.py

Issue 1948093003: [Findit] Adding more try job error detection (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master
Patch Set: Rebase Created 4 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Copyright 2015 The Chromium Authors. All rights reserved. 1 # Copyright 2015 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 from datetime import datetime 5 from datetime import datetime
6 import json 6 import json
7 import time 7 import time
8 8
9 from google.appengine.ext import ndb 9 from google.appengine.ext import ndb
10 10
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after
62 if timed_out: 62 if timed_out:
63 return ( 63 return (
64 { 64 {
65 'message': 'Try job monitoring was abandoned.', 65 'message': 'Try job monitoring was abandoned.',
66 'reason': 'Timeout after %s hours' % ( 66 'reason': 'Timeout after %s hours' % (
67 waterfall_config.GetTryJobSettings().get('job_timeout_hours')) 67 waterfall_config.GetTryJobSettings().get('job_timeout_hours'))
68 }, 68 },
69 try_job_error.TIMEOUT) 69 try_job_error.TIMEOUT)
70 70
71 if buildbucket_response: 71 if buildbucket_response:
72 # If there is no explicit timeout or reason specified, check the last 72 # Check buildbucket_response.
73 # build response for errors. 73 buildbucket_failure_reason = buildbucket_response.get('failure_reason')
74 if buildbucket_failure_reason == 'BUILD_FAILURE':
75 # Can occurr if an exception is thrown or the disk is full.
76 return (
77 {
78 'message': 'Compile failed unexpectedly.',
chanli 2016/05/06 23:17:46 I think 'BUILD FAILURE' or 'INFRA_FAILURE' are alr
lijeffrey 2016/05/07 00:12:45 This is for consistency, the other errors are more
79 'reason': MonitorTryJobPipeline.UNKNOWN
80 },
81 try_job_error.INFRA_FAILURE
chanli 2016/05/06 23:17:46 If it's because a bad revision which was fixed wit
lijeffrey 2016/05/07 00:12:45 We don't have a way of knowing that it's due to a
82 )
83 elif buildbucket_failure_reason == 'INFRA_FAILURE':
84 return (
85 {
86 'message': ('Try job encountered an infra issue during '
87 'execution.'),
88 'reason': MonitorTryJobPipeline.UNKNOWN
89 },
90 try_job_error.INFRA_FAILURE
91 )
92 elif buildbucket_failure_reason:
93 return (
94 {
95 'message': buildbucket_failure_reason,
96 'reason': MonitorTryJobPipeline.UNKNOWN
97 },
98 try_job_error.UNKNOWN
99 )
100
101 # Check result_details_json for errors.
74 result_details_json = json.loads( 102 result_details_json = json.loads(
75 buildbucket_response.get('result_details_json', '{}')) or {} 103 buildbucket_response.get('result_details_json', '{}')) or {}
76
77 # Check result_details_json for any obvious errors.
78 error = result_details_json.get('error', {}) 104 error = result_details_json.get('error', {})
79 if error: 105 if error:
80 return ( 106 return (
81 { 107 {
82 'message': 'Buildbucket reported an error.', 108 'message': 'Buildbucket reported an error.',
83 'reason': error.get('message', MonitorTryJobPipeline.UNKNOWN) 109 'reason': error.get('message', MonitorTryJobPipeline.UNKNOWN)
84 }, 110 },
85 try_job_error.CI_REPORTED_ERROR) 111 try_job_error.CI_REPORTED_ERROR)
86 112
87 # Check the report to see if anything went wrong. 113 if not result_details_json.get('properties', {}).get('report'):
88 report = result_details_json.get('properties', {}).get('report')
89 if report:
90 if report.get('metadata', {}).get('infra_failure'):
91 # Check for any infra issues caught by the recipe.
92 return (
93 {
94 'message': ('Try job encountered an infra issue during '
95 'execution.'),
96 'reason': MonitorTryJobPipeline.UNKNOWN
97 },
98 try_job_error.INFRA_FAILURE)
99 else:
100 # A report should always be included as part of 'properties'. If it is 114 # A report should always be included as part of 'properties'. If it is
101 # missing something else is wrong. 115 # missing something else is wrong.
102 return ( 116 return (
103 { 117 {
104 'message': 'No result report was found.', 118 'message': 'No result report was found.',
105 'reason': MonitorTryJobPipeline.UNKNOWN 119 'reason': MonitorTryJobPipeline.UNKNOWN
106 }, 120 },
107 try_job_error.UNKNOWN) 121 try_job_error.UNKNOWN
122 )
108 123
109 return None, None 124 return None, None
110 125
111 @staticmethod 126 @staticmethod
112 def _UpdateTryJobMetadata(try_job_data, start_time, buildbucket_build, 127 def _UpdateTryJobMetadata(try_job_data, start_time, buildbucket_build,
113 buildbucket_error, timed_out): 128 buildbucket_error, timed_out):
114 buildbucket_response = {} 129 buildbucket_response = {}
115 130
116 if buildbucket_build: 131 if buildbucket_build:
117 try_job_data.request_time = ( 132 try_job_data.request_time = (
(...skipping 116 matching lines...) Expand 10 before | Expand all | Expand 10 after
234 already_set_started = True 249 already_set_started = True
235 250
236 if time.time() > deadline: # pragma: no cover 251 if time.time() > deadline: # pragma: no cover
237 self._UpdateTryJobMetadata(try_job_data, start_time, build, error, True) 252 self._UpdateTryJobMetadata(try_job_data, start_time, build, error, True)
238 # Explicitly abort the whole pipeline. 253 # Explicitly abort the whole pipeline.
239 raise pipeline.Abort( 254 raise pipeline.Abort(
240 'Try job %s timed out after %d hours.' % ( 255 'Try job %s timed out after %d hours.' % (
241 try_job_id, timeout_hours)) 256 try_job_id, timeout_hours))
242 257
243 time.sleep(pipeline_wait_seconds) # pragma: no cover 258 time.sleep(pipeline_wait_seconds) # pragma: no cover
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698