Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1006)

Side by Side Diff: scripts/slave/recipe_modules/auto_bisect/resources/wait_for_any.py

Issue 1339613005: Refactoring scripts that wait for buildbot jobs to complete. (Closed) Base URL: https://chromium.googlesource.com/chromium/tools/build.git@hax
Patch Set: . Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 #!/usr/bin/python 1 #!/usr/bin/python
2 #
3 # Copyright 2015 The Chromium Authors. All rights reserved. 2 # Copyright 2015 The Chromium Authors. All rights reserved.
4 # Use of this source code is governed by a BSD-style license that can be 3 # Use of this source code is governed by a BSD-style license that can be
5 # found in the LICENSE file. 4 # found in the LICENSE file.
6 5
7 """Waits for any one job out of a list to complete or a default timeout.""" 6 """Waits for any one job out of a list to complete or a default timeout."""
8 7
9 import json 8 import json
9 import os
10 import subprocess 10 import subprocess
11 import sys 11 import sys
12 import time 12 import time
13 import urllib2
14 13
15 import check_buildbot 14 import check_buildbot
16 15
16 # Return codes. Note that at this time COMPLETED == FAILED.
17 COMPLETED, FAILED, TIMED_OUT, NOT_EVEN_STARTED = 0, 0, 2, 3
prasadv 2015/09/17 22:33:35 COMPLETED == FAILED, shouldn't we mark the failed
RobertoCN 2015/09/19 00:32:54 Completed now != failed. NOT_EVEN_STARTED, means t
17 18
18 # The following intervals are specified in seconds, are expected to be sent as 19 # The following intervals are specified in seconds, are expected to be sent as
19 # arguments to time.sleep() 20 # arguments to time.sleep()
20 # All URLs are checked in sequence separated by 'short' interval seconds, to 21
21 # prevent possibly getting throttled by whatever endpoint gsutil or urllib are
22 # hitting.
23 SHORT_INTERVAL = 0.4
24 # If none of the URLs is determined to be ready, we sleep for a 'long' 22 # If none of the URLs is determined to be ready, we sleep for a 'long'
25 # interval. 23 # interval.
26 LONG_INTERVAL = 60 24 SLEEP_INTERVAL = 60
27 # We should check buildbot not more often than every 10 minutes. 25 # We should check buildbot not more often than every 10 minutes.
28 BUILDBOT_CHECK_FREQUENCY = 600 26 BUILDBOT_CHECK_INTERVAL = 600
29 # If the 'timeout' interval elapses without any URL becoming ready, we fail. 27
30 timeout_interval = 60 * 60
31 # Global gsutil path, expected to be set by main.
32 gsutil_path = ''
33 next_buildbot_check_due_time = 0 28 next_buildbot_check_due_time = 0
34 29
35 30
36 def _run_gsutil(cmd): 31 def _print_usage(argv):
37 # Sleep for a short time between gsutil calls 32 usage = 'Usage: %s <gsutil path> [--timeout=<seconds>]'
38 time.sleep(SHORT_INTERVAL) 33 print usage % argv[0]
39 cmd = [gsutil_path] + cmd 34 print 'main.__doc__'
40 try: 35 print main.__doc__
41 out = subprocess.check_output(cmd) 36 return NOT_EVEN_STARTED
42 return 0, out 37
43 except subprocess.CalledProcessError as cpe: 38
44 return cpe.returncode, cpe.output 39 def _gs_file_exists(gsutil_path, url):
45
46
47 def _gs_file_exists(url):
48 """Checks that running 'gsutil ls' returns 0 to see if file at url exists.""" 40 """Checks that running 'gsutil ls' returns 0 to see if file at url exists."""
49 return _run_gsutil(['ls', url])[0] == 0 41 cmd = [gsutil_path, 'ls', url]
42 error = subprocess.call(cmd, stderr=open(os.devnull, 'w'))
43 return not error
50 44
51 45
52 def _next_buildbot_check_due(): 46 def _next_buildbot_check_due():
47 """To limit how often we pull the [potentially big] json object from bb."""
53 global next_buildbot_check_due_time 48 global next_buildbot_check_due_time
54 if time.time() > next_buildbot_check_due_time: 49 if time.time() > next_buildbot_check_due_time:
55 next_buildbot_check_due_time = time.time() + BUILDBOT_CHECK_FREQUENCY 50 next_buildbot_check_due_time = time.time() + BUILDBOT_CHECK_INTERVAL
56 return True 51 return True
57 return False 52 return False
58 53
59 54
60 def _check_failed_buildbot_jobs(locations): 55 def _check_buildbot_jobs(jobs_to_check):
61 if not locations: 56 if not jobs_to_check:
62 return None 57 return None
63 jobs = {} 58 jobs = {}
64 for loc in locations: 59 completed_results = []
65 _, master, builder, job_name = loc.split(':', 3) 60 failed_results = []
61 # Mapping from job names to the original dictionary sent in jobs_to_check
62 entries = {}
63 for entry in jobs_to_check:
64 master = entry['master']
65 builder = entry['builder']
66 job_name = entry['job_name']
67 # Building a nested dictionary so that we check at most once per builder.
prasadv 2015/09/17 22:33:35 May be we should elaborate these comments.
RobertoCN 2015/09/19 00:32:54 Done.
66 jobs.setdefault(master, {}).setdefault(builder, []).append(job_name) 68 jobs.setdefault(master, {}).setdefault(builder, []).append(job_name)
69 entries[job_name] = entry
67 for master in jobs.keys(): 70 for master in jobs.keys():
68 for builder in jobs[master].keys(): 71 for builder in jobs[master].keys():
69 if check_buildbot.main(["check_buildbot", master, builder] 72 config = {
70 + jobs[master][builder]): 73 'master': master,
71 return 1 74 'builder': builder,
72 return 0 75 'job_names': jobs[master][builder],
76 }
77 builder_results = check_buildbot.main(config)
78 completed_results += builder_results.get('completed', [])
79 failed_results += builder_results.get('failed', [])
80 results = {}
81 if completed_results:
82 results['completed'] = [entries[k] for k in completed_results]
83 if failed_results:
84 results['failed'] = [entries[k] for k in failed_results]
85 return results
73 86
74 87
75 def main(argv): 88 def main(argv):
76 global timeout_interval 89 """Main function of the script.
90
91 The script expects the path to gsutil to be provided on the command line, and
92 a json object containing the details of the jobs to monitor on standard input.
93
94 Each job in the list, should be one of the following types:
95 - GS location, which must at least contain:
96 - The "type" key set to the "gs" value.
97 - The "location" key, containing the location ("gs://...") of the gs
98 object to check.
99 - Buildbot job, which must at least contain:
100 - The "type" key set to the "buildbot" value.
101 - The "master" key containing the name of the appropriate master, e.g.
102 "chromium.perf".
prasadv 2015/09/17 22:33:35 May be tryserver.chromium.perf be more relevant
RobertoCN 2015/09/19 00:32:54 Done.
103 - The "builder" key set to the name of the builder performing the job.
104 - The "job_name" key containing the name of the job to check. i.e.
105 typically a uuid or a hash will be used.
106
107 The script will wait until the first of the following conditions becomes true:
108 - An object exists at one of the GS locations
109 - One of the buildbot jobs completes as succesful
110 - One of the buildbot jobs fails
111 - One week elapses from the invocation of the script. (The exact timeout may
112 be overriden from the command line)
113
114 The return code will be:
115 0 if a buildbot job succeeds or an object exists at the GS locations.
116 1 if a buildbot job fails
117 2 if the one-week timeout is triggered.
prasadv 2015/09/17 22:33:35 What if the case where buildbot job succeeds but t
RobertoCN 2015/09/19 00:32:54 In that case it will still return 0. We won't wait
118
119 Additionally, a json object will be written to standard output containig the
120 results of the script.
121
122 Example of expected stdin:
123 {
124 "jobs": [
125 {
126 "type": "gs",
127 "location": "gs://chrome-perf/some_path/some_object.json"
128 },
129 {
130 "type": "buildbot",
131 "master": "tryserver.chromium.perf",
132 "builder": "linux_perf_bisect",
133 "job_name": "f74fb8e0418d47bfb7d01fad0dd4df06"
134 }
135 ]
136 }
137 EOF
138
139 Examples of results from stdout:
140 cat <<EOF #Successful result
141 {
142 "completed": [
143 {
144 "type": "buildbot",
145 "master": "tryserver.chromium.perf",
146 "builder": "linux_perf_bisect",
147 "job_name": "f74fb8e0418d47bfb7d01fad0dd4df06"
148 }
149 ]
150 }
151 EOF
152
153 cat <<EOF #Unsuccessful result
154 {
155 "failed": [
156 {
157 "type": "buildbot",
158 "master": "tryserver.chromium.perf",
159 "builder": "linux_perf_bisect",
160 "job_name": "f74fb8e0418d47bfb7d01fad0dd4df06"
161 }
162 ]
163 }
164 EOF
165 """
166 start_time = time.time()
167 # Default timeout: six days
168 timeout_interval = 6 * 24 * 60 * 60
77 if argv[-1].startswith('--timeout='): 169 if argv[-1].startswith('--timeout='):
78 timeout_interval = int(argv[-1].split('=')[1]) 170 timeout_interval = int(argv[-1].split('=')[1])
79 argv = argv[:-1] 171 argv = argv[:-1]
80 172
81 if len(argv) < 3: 173 jobs = json.loads(sys.stdin.read())['jobs']
82 usage = ('Usage: %s <gsutil path> url1 [url2 [url3...]]' 174 gs_jobs = [job for job in jobs if job['type'] == 'gs']
83 ' [--timeout=<seconds>]\n' 175 buildbot_jobs = [job for job in jobs if job['type'] == 'buildbot']
84 ' Where urls are either a google storage location for the result ' 176
85 ' file, or a buildbot location of the form ' 177 if ((not gs_jobs and not buildbot_jobs) or
86 '"bb:<master>:<builderi>:<job_name>".') 178 (gs_jobs and len(argv) < 2)):
87 print usage % argv[0] 179 return _print_usage(argv)
88 return 1 180
89 181 gsutil_path = argv[1] if gs_jobs else ''
90 list_of_urls = ', '.join(['<%s>' % url for url in argv[2:]]) 182
91 print 'Waiting for the following urls: ' + list_of_urls 183 while time.time() < start_time + timeout_interval:
92 global gsutil_path 184 # Checking GS jobs
93 start_time = time.time() 185 completed_jobs = []
94 gsutil_path = argv[1] 186 for job in gs_jobs:
95 urls = argv[2:] 187 if _gs_file_exists(gsutil_path, job['location']):
96 while urls: 188 completed_jobs.append(job)
97 for url in urls: 189 if completed_jobs:
98 if url.startswith('bb:'): 190 print json.dumps({'completed': completed_jobs})
99 pass 191 return COMPLETED
100 elif _gs_file_exists(url): 192
101 print 'Build finished: ', url 193 # Checking Buildbot jobs
102 return 0
103 if time.time() - start_time > timeout_interval:
104 print "Timed out waiting for: ", urls
105 return 1
106 if _next_buildbot_check_due(): 194 if _next_buildbot_check_due():
107 failed_job = _check_failed_buildbot_jobs( 195 buildbot_results = _check_buildbot_jobs(buildbot_jobs)
108 [url for url in urls if url.startswith('bb:')]) 196 if buildbot_results:
109 if failed_job: 197 print json.dumps(buildbot_results)
110 return 0 198 if 'completed' in buildbot_results and buildbot_results['completed']:
111 time.sleep(LONG_INTERVAL) 199 return COMPLETED
112 200 return FAILED
prasadv 2015/09/17 22:33:35 How are we handling pending jobs, I mean jobs in q
RobertoCN 2015/09/19 00:32:54 They are considered in progress, as they are neith
113 201
114 print "No jobs to check." 202 sys.stdout.write('.')
115 return 0 203 sys.stdout.flush()
116 204 time.sleep(SLEEP_INTERVAL)
205 return TIMED_OUT
117 206
118 if __name__ == '__main__': 207 if __name__ == '__main__':
119 sys.exit(main(sys.argv)) 208 sys.exit(main(sys.argv))
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698