| OLD | NEW |
| (Empty) |
| 1 # coding=utf8 | |
| 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
| 3 # Use of this source code is governed by a BSD-style license that can be | |
| 4 # found in the LICENSE file. | |
| 5 """Sends patches to the Try server and reads back results. | |
| 6 | |
| 7 - TryJobs contains TryJob, one per try job on a builder. | |
| 8 - TryRunnerBase contains the common logic to send try jobs and responds to the | |
| 9 try job results. | |
| 10 - TryRunnerSvn uses svn plus /json polling on the try server for status updates. | |
| 11 """ | |
| 12 | |
| 13 import logging | |
| 14 import os | |
| 15 import re | |
| 16 import time | |
| 17 import urllib2 | |
| 18 | |
| 19 import find_depot_tools # pylint: disable=W0611 | |
| 20 import presubmit_support | |
| 21 import trychange | |
| 22 | |
| 23 import buildbot_json | |
| 24 import model | |
| 25 from verification import base | |
| 26 | |
| 27 | |
| 28 # We don't want to have trychange use gcl so block it. | |
| 29 trychange.gcl = None | |
| 30 # Hack out trychange logging.info() | |
| 31 trychange.logging = logging.getLogger('trychange') | |
| 32 trychange.logging.setLevel(logging.WARNING) | |
| 33 | |
| 34 | |
| 35 def or_3_way(a, b): | |
| 36 """Returns highest value, where True > False > None.""" | |
| 37 return None if (a == b == None) else (a or b) | |
| 38 | |
| 39 | |
| 40 def parse_gclient_rev(rev): | |
| 41 """Returns the absolute number of a gclient revision. | |
| 42 | |
| 43 It strips off the solution. | |
| 44 """ | |
| 45 if rev is None: | |
| 46 return rev | |
| 47 return str(rev).split('@')[-1] | |
| 48 | |
| 49 | |
| 50 def unambiguous_revision(checkout, revision): | |
| 51 """Returns if a revision is unambiguous for the checkout. | |
| 52 | |
| 53 HEAD, date or branch name are ambiguous. | |
| 54 """ | |
| 55 revision = parse_gclient_rev(revision) | |
| 56 if not revision: | |
| 57 return False | |
| 58 name = checkout.__class__.__name__ | |
| 59 if revision.isdigit() and 'Svn' in name: | |
| 60 # GitSvn should accept revision numbers? | |
| 61 return True | |
| 62 if re.match(r'^[a-f0-9]{5,20}$', revision) and 'Git' in name: | |
| 63 return True | |
| 64 return False | |
| 65 | |
| 66 | |
| 67 class TryJob(model.PersistentMixIn): | |
| 68 """Represents a try job for a pending commit. | |
| 69 | |
| 70 This data can be regenerated by parsing all the try job names but it is a bit | |
| 71 hard on the try server. | |
| 72 | |
| 73 TODO(maruel): Should use __getstate__(), __setstate__() and __reduce__(). | |
| 74 """ | |
| 75 builder = unicode | |
| 76 build = (None, int) | |
| 77 revision = (None, int) | |
| 78 result = (None, int) | |
| 79 sent = float | |
| 80 failed_steps = list | |
| 81 clobber = bool | |
| 82 name = (None, unicode) | |
| 83 # Number of retries for this configuration. | |
| 84 tries = int | |
| 85 tests = list | |
| 86 | |
| 87 def __init__(self, **kwargs): | |
| 88 kwargs.setdefault('sent', time.time()) | |
| 89 super(TryJob, self).__init__(**kwargs) | |
| 90 | |
| 91 def get_state(self): | |
| 92 if self.result in ( | |
| 93 buildbot_json.SUCCESS, buildbot_json.WARNINGS, buildbot_json.SKIPPED): | |
| 94 return base.SUCCEEDED | |
| 95 elif self.result in ( | |
| 96 buildbot_json.FAILURE, buildbot_json.EXCEPTION, buildbot_json.RETRY): | |
| 97 return base.FAILED | |
| 98 else: | |
| 99 assert self.result == None | |
| 100 return base.PROCESSING | |
| 101 | |
| 102 | |
| 103 class TryJobs(base.IVerifierStatus): | |
| 104 """A set of try jobs that were sent for a specific patch.""" | |
| 105 # An array of TryJob objects. | |
| 106 try_jobs = list | |
| 107 # When NOTRY=true is specified. | |
| 108 skipped = bool | |
| 109 | |
| 110 def get_state(self): | |
| 111 if self.skipped: | |
| 112 return base.SUCCEEDED | |
| 113 if not self.try_jobs: | |
| 114 return base.PROCESSING | |
| 115 states = set(i.get_state() for i in self.try_jobs) | |
| 116 assert states.issubset(base.VALID_STATES) | |
| 117 return max(states) | |
| 118 | |
| 119 def why_not(self): | |
| 120 pass | |
| 121 | |
| 122 | |
| 123 def steps_quality(steps): | |
| 124 if not steps: | |
| 125 return None | |
| 126 return all(v in (True, None) for v in steps) | |
| 127 | |
| 128 | |
| 129 class StepDb(object): | |
| 130 """Calculate statistics about all steps for each revisions.""" | |
| 131 max_cache = 200 | |
| 132 | |
| 133 def __init__(self, builders, buildbot, checkout): | |
| 134 self._need_full = True | |
| 135 self.builders = builders | |
| 136 self.buildbot = buildbot | |
| 137 self.checkout = checkout | |
| 138 | |
| 139 def need_full(self): | |
| 140 result = self._need_full | |
| 141 self._need_full = False | |
| 142 return result | |
| 143 | |
| 144 def revision_quality_builder_steps(self, builder, revision): | |
| 145 """Calculates the occurence of a successful step execution, for a specific | |
| 146 builder, for builds on a specific revision. | |
| 147 | |
| 148 The return value is a tuple of two elements: | |
| 149 1. array of True/False/None, one value per step in a build. True means the | |
| 150 step passed at least once. False means the step always failed. None | |
| 151 means the step never ran for this revision on this builder. | |
| 152 2. Number of builds that ran on this builder at this revision. | |
| 153 """ | |
| 154 revision = str(revision) | |
| 155 steps = None | |
| 156 nb_builds = 0 | |
| 157 for build in self.buildbot.builders[builder].builds.cached_children: | |
| 158 if parse_gclient_rev(build.revision) != revision: | |
| 159 continue | |
| 160 nb_builds += 1 | |
| 161 if not (not steps or len(steps) == len(build.steps)): | |
| 162 logging.warn('Invalid build %s' % build) | |
| 163 continue | |
| 164 if not steps or len(steps) != len(build.steps): | |
| 165 # If the number of steps changed after a master restart, we need to | |
| 166 # ditch the previous steps. | |
| 167 # One workaround is to key by name but it's not worth the effort here, | |
| 168 # the worst case is that previous builds that could be considered good | |
| 169 # will be ignored, making it impossible to determine a lkgr. | |
| 170 steps = [None] * len(build.steps) | |
| 171 for step in build.steps: | |
| 172 steps[step.number] = or_3_way( | |
| 173 steps[step.number], step.simplified_result) | |
| 174 return steps, nb_builds | |
| 175 | |
| 176 def last_good_revision_builder(self, builder): | |
| 177 """Returns LKGR for this builder or None if no revision was found. | |
| 178 | |
| 179 For a single revision, for each step, make sure step either passed at least | |
| 180 once or never ran. | |
| 181 | |
| 182 For example, if build 1 has [True, True, False, None] and build 2 has [True, | |
| 183 False, True, None], the revision is known to be good since each step run | |
| 184 either succeeded one time or never ran. | |
| 185 """ | |
| 186 state = {} | |
| 187 for build in self.buildbot.builders[builder].builds.cached_children: | |
| 188 if not unambiguous_revision(self.checkout, build.revision): | |
| 189 # Ignore all builds that doesn't use revision numbers. It could be | |
| 190 # instead svn date format {2011-01-30}, 'HEAD', 'BASE', etc. | |
| 191 continue | |
| 192 build_rev = parse_gclient_rev(build.revision) | |
| 193 state.setdefault(build_rev, [None] * len(build.steps)) | |
| 194 for step in build.steps: | |
| 195 if len(state[build_rev]) <= step.number: | |
| 196 continue | |
| 197 state[build_rev][step.number] = or_3_way( | |
| 198 state[build_rev][step.number], | |
| 199 step.simplified_result) | |
| 200 | |
| 201 # Insert a None item. It will be returned if no revision was found. | |
| 202 revisions = sorted( | |
| 203 int(revision) for revision in state | |
| 204 if (all(v in (True, None) for v in state[revision]))) | |
| 205 if not revisions: | |
| 206 return None | |
| 207 return revisions[-1] | |
| 208 | |
| 209 | |
| 210 class TryRunnerBase(base.VerifierCheckout): | |
| 211 """Stateless communication with a try server. | |
| 212 | |
| 213 Sends try jobs and reads try job status. | |
| 214 | |
| 215 Analysis goes as following: | |
| 216 - compile step is not flaky. compile.py already takes care of most flakiness | |
| 217 and clobber build is done by default. If compile step fails, try again with | |
| 218 clobber=True | |
| 219 - test steps are flaky and can be retried as necessary. | |
| 220 """ | |
| 221 name = 'try server' | |
| 222 | |
| 223 # A try job sent this long ago and that hasn't started yet is deemed to be | |
| 224 # lost. | |
| 225 lost_try_job_delay = 15*60 | |
| 226 | |
| 227 # Only updates a job status once every 60 seconds. | |
| 228 update_latency = 60 | |
| 229 | |
| 230 def __init__( | |
| 231 self, context_obj, try_server_url, commit_user, | |
| 232 builders_and_tests, ignored_steps, solution): | |
| 233 super(TryRunnerBase, self).__init__(context_obj) | |
| 234 self.commit_user = commit_user | |
| 235 self.try_server_url = try_server_url | |
| 236 self.builders_and_tests = builders_and_tests | |
| 237 self.ignored_steps = set(ignored_steps) | |
| 238 self.last_update = time.time() - self.update_latency | |
| 239 self.solution = solution | |
| 240 | |
| 241 def verify(self, pending): | |
| 242 """Sends a try job to the try server and returns a TryJob list.""" | |
| 243 jobs = pending.verifications.setdefault(self.name, TryJobs()) | |
| 244 if jobs.try_jobs: | |
| 245 logging.warning( | |
| 246 'Already tried jobs. Let it go. At worst, it\'ll time out soon.') | |
| 247 return | |
| 248 | |
| 249 jobs.try_jobs = jobs.try_jobs or [] | |
| 250 if self._is_skip_try_job(pending): | |
| 251 # Do not run try job for it. | |
| 252 jobs.skipped = True | |
| 253 return | |
| 254 | |
| 255 new_jobs = [ | |
| 256 TryJob( | |
| 257 builder=builder, | |
| 258 tests=self.builders_and_tests[builder], | |
| 259 revision=pending.revision, | |
| 260 clobber=False) | |
| 261 for builder in sorted(self.builders_and_tests) | |
| 262 ] | |
| 263 jobs.try_jobs.extend(new_jobs) | |
| 264 self._send_jobs( | |
| 265 pending, | |
| 266 new_jobs, | |
| 267 False, | |
| 268 self.builders_and_tests, | |
| 269 unicode(pending.pending_name())) | |
| 270 # Slightly postpone next check. | |
| 271 self.last_update = min( | |
| 272 time.time(), self.last_update + (self.update_latency / 4)) | |
| 273 | |
| 274 def update_status(self, queue): | |
| 275 """Grabs the current status of all try jobs and update self.queue. | |
| 276 | |
| 277 Note: it would be more efficient to be event based. | |
| 278 """ | |
| 279 if not queue: | |
| 280 logging.debug('The list is empty, nothing to do') | |
| 281 return | |
| 282 | |
| 283 if time.time() - self.last_update < self.update_latency: | |
| 284 logging.debug('TS: Throttling updates') | |
| 285 return | |
| 286 self.last_update = time.time() | |
| 287 | |
| 288 self._update_statuses(queue) | |
| 289 | |
| 290 def _send_jobs( | |
| 291 self, pending, jobs, need_prepare, builders_and_tests, job_name): | |
| 292 """Prepares the TryJobs instance |jobs| to send try jobs to the try server. | |
| 293 | |
| 294 Sending try jobs is deferred to self._send_job(). | |
| 295 | |
| 296 Arguments: | |
| 297 - pending: pending_manager.Pending instance. | |
| 298 - jobs: List of TryJob instances to be executed. | |
| 299 - need_prepare: The checkout needs to have the patch applied, e.g. this | |
| 300 function is called from within update_status(). | |
| 301 - builders_and_tests: dict('builder': ['test1', 'test2']) for try jobs to | |
| 302 run. Can be self.builders_and_tests or a smaller subset when retrying | |
| 303 jobs. | |
| 304 - job_name: Job name to use, may have suffix like "retry". | |
| 305 """ | |
| 306 for job in jobs: | |
| 307 job.tries = job.tries or 0 | |
| 308 job.tries += 1 | |
| 309 if job.tries > 4: | |
| 310 raise base.DiscardPending( | |
| 311 pending, | |
| 312 ('The commit queue went berserk retrying too often for a\n' | |
| 313 'seemingly flaky test. Builder is %s, revision is %s, job name\n' | |
| 314 'was %s.') % (job.builder, job.revision, job_name)) | |
| 315 | |
| 316 builders = sorted(job.builder for job in jobs) | |
| 317 assert len(set(builders)) == len(builders) | |
| 318 | |
| 319 revision = set(job.revision for job in jobs) | |
| 320 assert len(revision) == 1 | |
| 321 revision = revision.pop() | |
| 322 | |
| 323 clobber = set(job.clobber for job in jobs) | |
| 324 assert len(clobber) == 1 | |
| 325 clobber = clobber.pop() | |
| 326 | |
| 327 for job in jobs: | |
| 328 job.result = None | |
| 329 job.build = None | |
| 330 job.name = job_name | |
| 331 job.tests = builders_and_tests[job.builder] | |
| 332 | |
| 333 if need_prepare: | |
| 334 self._prepare(pending, revision) | |
| 335 self._send_job(pending, revision, clobber, builders_and_tests, job_name) | |
| 336 for builder in builders: | |
| 337 # Signal a new try job was sent. | |
| 338 info = { | |
| 339 'builder': builder, | |
| 340 'clobber': job.clobber, | |
| 341 'job_name': job_name, | |
| 342 'revision': revision, | |
| 343 } | |
| 344 self.send_status(pending, info) | |
| 345 for job in jobs: | |
| 346 job.sent = time.time() | |
| 347 | |
| 348 def _build_status_url(self, job): | |
| 349 """Html url for this try job.""" | |
| 350 assert job.build is not None, str(job) | |
| 351 return '%s/buildstatus?builder=%s&number=%s' % ( | |
| 352 self.try_server_url.rstrip('/'), job.builder, job.build) | |
| 353 | |
| 354 def _error_msg(self, name, job, failed_steps): | |
| 355 """Constructs the error message.""" | |
| 356 def steps_to_str(steps): | |
| 357 if len(steps) > 1: | |
| 358 return 'steps "%s"' % ', '.join(steps) | |
| 359 elif steps: | |
| 360 return 'step "%s"' % steps[0] | |
| 361 else: | |
| 362 return '' | |
| 363 | |
| 364 msg = u'Try job failure for %s on %s for %s' % ( | |
| 365 name, job.builder, steps_to_str(failed_steps)) | |
| 366 if job.clobber: | |
| 367 msg += ' (clobber build)' | |
| 368 msg += '.' | |
| 369 if job.failed_steps: | |
| 370 msg += u'\nIt\'s a second try, previously, %s failed.' % ( | |
| 371 steps_to_str(job.failed_steps)) | |
| 372 msg += '\n%s' % self._build_status_url(job) | |
| 373 logging.info(msg) | |
| 374 return msg | |
| 375 | |
| 376 def _handle_try_job(self, pending, jobs, job, build): | |
| 377 """Determines if the try job is a good signal to commit the patch.""" | |
| 378 if build.simplified_result is None: | |
| 379 # The build hasn't completed yet. | |
| 380 return | |
| 381 assert job.result is None | |
| 382 assert job.build is not None | |
| 383 job.result = build.result | |
| 384 # Warning: This code assumes that steps do not abort build on failure. | |
| 385 failed_steps = list(set( | |
| 386 step.name for step in build.steps if step.simplified_result is False | |
| 387 ) - self.ignored_steps) | |
| 388 # If the failed steps are only ignored steps like update_scripts or | |
| 389 # cleanup_temp, still consider the job as a success. As such, do not use | |
| 390 # build.result. | |
| 391 if (not failed_steps and | |
| 392 all(build.steps[s].simplified_result for s in job.tests | |
| 393 if s in build.steps.keys)): | |
| 394 job.result = buildbot_json.SUCCESS | |
| 395 | |
| 396 # Signal to the dashboard a try job completed. | |
| 397 info = { | |
| 398 'build': build.number, | |
| 399 'builder': job.builder, | |
| 400 'duration': build.duration, | |
| 401 'job_name': job.name, | |
| 402 'result': job.result, | |
| 403 'revision': job.revision, | |
| 404 'url': self._build_status_url(job), | |
| 405 } | |
| 406 self.send_status(pending, info) | |
| 407 | |
| 408 if job.get_state() != base.FAILED: | |
| 409 assert not failed_steps | |
| 410 logging.info(u'Try job status for %s on %s: %s\n%s' % ( | |
| 411 job.name, | |
| 412 job.builder, | |
| 413 job.result, | |
| 414 self._build_status_url(job))) | |
| 415 return | |
| 416 | |
| 417 msg = self._error_msg(job.name, job, failed_steps) | |
| 418 quality = self._get_quality(job.builder, int(job.revision)) | |
| 419 | |
| 420 def retry(msg2, tests=None): | |
| 421 """Retry a try job. Will use LKGR if quality is bad.""" | |
| 422 if not quality: | |
| 423 lkgr = self.get_lkgr(job.builder) | |
| 424 if lkgr is None: | |
| 425 logging.error('lkgr should never be None.') | |
| 426 fail('Couldn\'t find a good revision, aborting.') | |
| 427 return | |
| 428 job.revision = lkgr | |
| 429 logging.info( | |
| 430 'Retrying %s on %s, %s; rev=%s; %s' % | |
| 431 (job.name, job.builder, str(tests), job.revision, msg2)) | |
| 432 job.failed_steps = failed_steps | |
| 433 tests = tests or job.tests | |
| 434 self._send_jobs( | |
| 435 pending, [job], True, {job.builder: tests}, u'%s (retry)' % job.name) | |
| 436 | |
| 437 def fail(msg2): | |
| 438 jobs.error_message = '%s\n%s' % (msg, msg2) | |
| 439 logging.info(jobs.error_message) | |
| 440 job.failed_steps = failed_steps | |
| 441 | |
| 442 if 'update' in failed_steps: | |
| 443 # Look at update quality specifically since it's a special step. | |
| 444 return fail( | |
| 445 '\nStep "update" is always a major failure.\n' | |
| 446 'Look at the try server FAQ for more details.') | |
| 447 | |
| 448 if 'compile' in failed_steps: | |
| 449 if not job.clobber: | |
| 450 # Note: this resets previous test failure if there has been on the | |
| 451 # second previous try. This is fine since a slave could be broken. | |
| 452 job.clobber = True | |
| 453 return retry('retry compile with clobber') | |
| 454 | |
| 455 return fail('') | |
| 456 | |
| 457 if quality: | |
| 458 if job.failed_steps: | |
| 459 # The job had already failed. | |
| 460 return fail('') | |
| 461 | |
| 462 return retry('Quality but first try', failed_steps) | |
| 463 | |
| 464 # TODO(maruel): It would make sense to do a clobber build to see if the | |
| 465 # revision is indeed broken, since this algorithm assumes that the try | |
| 466 # server is continuously used for recent revisions! | |
| 467 # The revision looks like it's broken, retry with lkgr instead. | |
| 468 return retry('No quality, no idea', failed_steps) | |
| 469 | |
| 470 @staticmethod | |
| 471 def _is_skip_try_job(pending): | |
| 472 """Returns True if a description contains NOTRY=true.""" | |
| 473 match = re.search(r'^NOTRY=(.*)$', pending.description, re.MULTILINE) | |
| 474 return match and match.group(1).lower() == 'true' | |
| 475 | |
| 476 def _prepare(self, pending, revision): | |
| 477 """Prepares the checkout by applying the patch.""" | |
| 478 raise NotImplementedError() | |
| 479 | |
| 480 def _get_quality(self, builder, revision): | |
| 481 """Gets quality about a revision job.""" | |
| 482 raise NotImplementedError() | |
| 483 | |
| 484 def get_lkgr(self, builder): | |
| 485 """Gets the last known good revision.""" | |
| 486 raise NotImplementedError() | |
| 487 | |
| 488 def _send_job(self, pending, revision, clobber, builders_and_tests, job_name): | |
| 489 """Sends a try job.""" | |
| 490 raise NotImplementedError() | |
| 491 | |
| 492 def _update_statuses(self, queue): | |
| 493 """Updates TryJob status for all the Pending instances in the queue. | |
| 494 | |
| 495 Calls to this function are throttled. | |
| 496 """ | |
| 497 raise NotImplementedError() | |
| 498 | |
| 499 | |
| 500 class TryRunnerSvn(TryRunnerBase): | |
| 501 """Uses SVN to send the try job. | |
| 502 | |
| 503 Keeps a database of steps for each revision for each builder that ever passed, | |
| 504 to know if it is possible for a step to pass. When unsure, it sends an empty | |
| 505 build for the said revsion to determine if the revision is simply broken. | |
| 506 | |
| 507 TODO(maruel): Ask the main server for details? Still doesn't cover well flaky | |
| 508 tests. | |
| 509 """ | |
| 510 def __init__( | |
| 511 self, context_obj, try_server_url, commit_user, | |
| 512 builders_and_tests, ignored_steps, solution, | |
| 513 extra_flags, lkgr): | |
| 514 super(TryRunnerSvn, self).__init__( | |
| 515 context_obj, try_server_url, commit_user, | |
| 516 builders_and_tests, ignored_steps, solution) | |
| 517 self.status = buildbot_json.Buildbot(self.try_server_url) | |
| 518 self.step_db = StepDb( | |
| 519 self.builders_and_tests.keys(), self.status, self.context.checkout) | |
| 520 self.extra_flags = extra_flags or [] | |
| 521 self.lkgr = lkgr | |
| 522 | |
| 523 def _prepare(self, pending, revision): | |
| 524 """Running from inside update_status(), the patch wasn't applied. Do it now. | |
| 525 """ | |
| 526 pending.revision = revision | |
| 527 pending.apply_patch(self.context, True) | |
| 528 | |
| 529 def _get_quality(self, builder, revision): | |
| 530 steps, _ = self.step_db.revision_quality_builder_steps(builder, revision) | |
| 531 return steps_quality(steps) | |
| 532 | |
| 533 def get_lkgr(self, builder): | |
| 534 return max(self.step_db.last_good_revision_builder(builder), self.lkgr()) | |
| 535 | |
| 536 def _send_job(self, pending, revision, clobber, builders_and_tests, job_name): | |
| 537 """Sends a try job.""" | |
| 538 assert revision | |
| 539 cmd = [ | |
| 540 '--no_search', | |
| 541 '--revision', '%s@%s' % (self.solution, revision), | |
| 542 '--name', job_name, | |
| 543 '--user', self.commit_user.split('@', 1)[0], | |
| 544 '--email', self.commit_user, | |
| 545 '--rietveld_url', self._patch_url(pending), | |
| 546 '--issue', str(pending.issue), | |
| 547 '--patchset', str(pending.patchset) | |
| 548 ] | |
| 549 cmd.extend(self.extra_flags) | |
| 550 for builder in sorted(builders_and_tests): | |
| 551 cmd.append('--bot') | |
| 552 tests = builders_and_tests[builder] | |
| 553 if tests: | |
| 554 cmd.append('%s:%s' % (builder, ','.join(tests))) | |
| 555 else: | |
| 556 cmd.append(builder) | |
| 557 if clobber: | |
| 558 cmd.append('--clobber') | |
| 559 # TODO(maruel): use GitChange when relevant. | |
| 560 change = presubmit_support.SvnChange( | |
| 561 job_name, | |
| 562 pending.description, | |
| 563 self.context.checkout.project_path, | |
| 564 [('M', f) for f in pending.files], | |
| 565 pending.issue, | |
| 566 pending.patchset, | |
| 567 pending.owner) | |
| 568 prev_dir = os.getcwd() | |
| 569 try: | |
| 570 os.chdir(self.context.checkout.project_path) | |
| 571 trychange.TryChange( | |
| 572 cmd, | |
| 573 change, | |
| 574 swallow_exception=True) | |
| 575 except SystemExit as e: | |
| 576 logging.error( | |
| 577 '_send_job(%s, %s, %s, %s, %s) failed!' % ( | |
| 578 pending.pending_name(), revision, clobber, builders_and_tests, | |
| 579 job_name)) | |
| 580 raise base.DiscardPending( | |
| 581 pending, | |
| 582 'Failed to send try job %s: %s' % (job_name, e)) | |
| 583 finally: | |
| 584 os.chdir(prev_dir) | |
| 585 | |
| 586 def _reset_cache(self, queue): | |
| 587 """Resets the cache of self.status and self.step_db so the next requests | |
| 588 are more efficient. | |
| 589 """ | |
| 590 self.status.discard() | |
| 591 | |
| 592 jobs_to_update = [] | |
| 593 for _, jobs in self.loop(queue, TryJobs, True): | |
| 594 jobs_to_update.extend( | |
| 595 job for job in jobs.try_jobs if job.get_state() == base.PROCESSING) | |
| 596 | |
| 597 # First determine what data is needed. | |
| 598 builds_to_cache = {} | |
| 599 if self.step_db.need_full(): | |
| 600 logging.info('Fetching all try jobs status to fetch good revisions') | |
| 601 builders_to_cache = self.builders_and_tests.keys() | |
| 602 else: | |
| 603 builders_to_cache = set() | |
| 604 for job in jobs_to_update: | |
| 605 if job.build is None: | |
| 606 builders_to_cache.add(job.builder) | |
| 607 else: | |
| 608 if job.get_state() == base.PROCESSING: | |
| 609 builds_to_cache.setdefault(job.builder, []).append(job.build) | |
| 610 | |
| 611 # Simplify testing. | |
| 612 builders_to_cache = sorted(builders_to_cache) | |
| 613 | |
| 614 # Reduce the number of requests by caching all the needed builders in one | |
| 615 # shot when some jobs weren't started yet. | |
| 616 if builders_to_cache: | |
| 617 self.status.builders.cache_partial(builders_to_cache) | |
| 618 | |
| 619 for builder in builders_to_cache: | |
| 620 self.status.builders[builder].builds.cache() | |
| 621 # Filter out jobs that were retrieved. | |
| 622 if builder in builds_to_cache: | |
| 623 del builds_to_cache[builder] | |
| 624 | |
| 625 # Cache remaining builds. Sort to make testing simpler. | |
| 626 for builder, builds in sorted( | |
| 627 builds_to_cache.iteritems(), key=lambda x: x[0]): | |
| 628 self.status.builders[builder].builds.cache_partial(builds) | |
| 629 | |
| 630 def _update_statuses(self, queue): | |
| 631 self._reset_cache(queue) | |
| 632 for pending, jobs in self.loop(queue, TryJobs, True): | |
| 633 for job in jobs.try_jobs: | |
| 634 if job.get_state() != base.PROCESSING: | |
| 635 continue | |
| 636 self._update_status(pending, jobs, job) | |
| 637 | |
| 638 def _update_status(self, pending, jobs, job): | |
| 639 """There's one TryJob per builder.""" | |
| 640 # TODO(maruel): There should be differentiation when there's multiple | |
| 641 # jobs for a single builder. | |
| 642 build = None | |
| 643 try: | |
| 644 if job.build is None: | |
| 645 build = self._find_job(job) | |
| 646 if build: | |
| 647 # Signal a try job was found. | |
| 648 info = { | |
| 649 'build': build.number, | |
| 650 'builder': job.builder, | |
| 651 'job_name': job.name, | |
| 652 'revision': job.revision, | |
| 653 'url': self._build_status_url(job), | |
| 654 } | |
| 655 self.send_status(pending, info) | |
| 656 else: | |
| 657 try: | |
| 658 build = self.status.builders[job.builder].builds[job.build] | |
| 659 except KeyError: | |
| 660 # May happen when there is a huge backlog and the build is not | |
| 661 # cached anymore. | |
| 662 build = None | |
| 663 except urllib2.HTTPError as e: | |
| 664 logging.error(str(e)) | |
| 665 return | |
| 666 | |
| 667 if build is not None: | |
| 668 self._handle_try_job(pending, jobs, job, build) | |
| 669 else: | |
| 670 # A job needs to be sent again if it has been sent more than | |
| 671 # self.lost_try_job_delay ago. | |
| 672 builder = self.status.builders[job.builder] | |
| 673 pending_builds = builder.data.get('pendingBuilds', 0) | |
| 674 if (time.time() - job.sent) > self.lost_try_job_delay: | |
| 675 if pending_builds: | |
| 676 job_names = [ | |
| 677 data.get('reason', '') for data in builder.pending_builds.data | |
| 678 ] | |
| 679 if job.name in job_names: | |
| 680 # It's pending, move on. | |
| 681 return | |
| 682 | |
| 683 # The job went to /dev/null. For example, the master may have | |
| 684 # restarted, the svn server may have a fluke, network may have had a | |
| 685 # short downtime, etc. Delete the previous job. | |
| 686 # Resend exactly the same job. | |
| 687 tests = job.tests | |
| 688 if not tests: | |
| 689 if not job.builder in self.builders_and_tests: | |
| 690 # This means the builder was removed. Skip it. | |
| 691 logging.warn( | |
| 692 ( 'Wanted to retry %s but it\'s not a requirement anymore. ' | |
| 693 'Ignoring it!') % job.builder) | |
| 694 job.result = buildbot_json.SKIPPED | |
| 695 return | |
| 696 | |
| 697 tests = self.builders_and_tests[job.builder] | |
| 698 self._send_jobs( | |
| 699 pending, | |
| 700 [job], | |
| 701 True, | |
| 702 {job.builder:tests}, | |
| 703 u'%s (previous was lost)' % job.name) | |
| 704 | |
| 705 def _find_job(self, job): | |
| 706 """Searches on the try server if the try job for |job| has started.""" | |
| 707 revision = '%s@%s' % (self.solution, job.revision) | |
| 708 # TODO(maruel): Strip this off. | |
| 709 job_name = job.name.split(':', 1)[-1] | |
| 710 logging.debug('Searching for job.reason = %s @ %s' % (job_name, revision)) | |
| 711 for build in self.status.builders[job.builder].builds: | |
| 712 blame = build.data.get('blame', []) | |
| 713 logging.debug( | |
| 714 'Build.reason = %s @ %s; blame: %s' % ( | |
| 715 build.reason, build.revision, ','.join(blame))) | |
| 716 if (build.reason == job_name and | |
| 717 str(build.revision) == revision and | |
| 718 len(blame) == 1 and | |
| 719 blame[0] == self.commit_user): | |
| 720 # Note the build number to remember it started. | |
| 721 logging.info('Found build %d for job %s' % (build.number, job_name)) | |
| 722 job.build = build.number | |
| 723 return build | |
| 724 return None | |
| 725 | |
| 726 def _patch_url(self, pending): | |
| 727 return ('%s/download/issue%d_%d.diff' % | |
| 728 (self.context.rietveld.url, pending.issue, pending.patchset)) | |
| OLD | NEW |