Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(186)

Side by Side Diff: appengine/cr-buildbucket/service.py

Issue 1532713002: buildbucket: add monitoring using ts_mon (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master
Patch Set: Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Copyright 2014 The Chromium Authors. All rights reserved. 1 # Copyright 2014 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 import datetime 5 import datetime
6 import json 6 import json
7 import logging 7 import logging
8 import urlparse 8 import urlparse
9 from components import auth 9 from components import auth
10 from components import pubsub 10 from components import pubsub
11 from components import utils 11 from components import utils
12 from google.appengine.api import taskqueue 12 from google.appengine.api import taskqueue
13 from google.appengine.api import modules 13 from google.appengine.api import modules
14 from google.appengine.ext import db 14 from google.appengine.ext import db
15 from google.appengine.ext import deferred 15 from google.appengine.ext import deferred
16 from google.appengine.ext import ndb 16 from google.appengine.ext import ndb
17 import acl 17 import acl
18 import errors 18 import errors
19 import metrics
19 import model 20 import model
20 import swarming 21 import swarming
21 22
22 MAX_RETURN_BUILDS = 100 23 MAX_RETURN_BUILDS = 100
23 MAX_LEASE_DURATION = datetime.timedelta(hours=2) 24 MAX_LEASE_DURATION = datetime.timedelta(hours=2)
24 DEFAULT_LEASE_DURATION = datetime.timedelta(minutes=1) 25 DEFAULT_LEASE_DURATION = datetime.timedelta(minutes=1)
25 BUILD_TIMEOUT = datetime.timedelta(days=1) 26 BUILD_TIMEOUT = datetime.timedelta(days=1)
26 27
27 validate_bucket_name = errors.validate_bucket_name 28 validate_bucket_name = errors.validate_bucket_name
28 29
(...skipping 132 matching lines...) Expand 10 before | Expand all | Expand 10 after
161 162
162 try: 163 try:
163 yield build.put_async() 164 yield build.put_async()
164 except: # pragma: no cover 165 except: # pragma: no cover
165 # Best effort. 166 # Best effort.
166 if for_swarming: 167 if for_swarming:
167 yield swarming.cancel_task_async(build) 168 yield swarming.cancel_task_async(build)
168 raise 169 raise
169 logging.info( 170 logging.info(
170 'Build %s was created by %s', build.key.id(), identity.to_bytes()) 171 'Build %s was created by %s', build.key.id(), identity.to_bytes())
172 metrics.increment(metrics.CREATE_COUNT, build)
171 173
172 if client_operation_id is not None: 174 if client_operation_id is not None:
173 yield ctx.memcache_set(client_operation_cache_key, build.key.id(), 60) 175 yield ctx.memcache_set(client_operation_cache_key, build.key.id(), 60)
174 raise ndb.Return(build) 176 raise ndb.Return(build)
175 177
176 178
177 def add(*args, **kwargs): 179 def add(*args, **kwargs):
178 """Sync version of add_async.""" 180 """Sync version of add_async."""
179 return add_async(*args, **kwargs).get_result() 181 return add_async(*args, **kwargs).get_result()
180 182
(...skipping 200 matching lines...) Expand 10 before | Expand all | Expand 10 after
381 build = _get_leasable_build(build_id) 383 build = _get_leasable_build(build_id)
382 384
383 if build.status != model.BuildStatus.SCHEDULED or build.is_leased: 385 if build.status != model.BuildStatus.SCHEDULED or build.is_leased:
384 return False, build 386 return False, build
385 387
386 build.lease_expiration_date = lease_expiration_date 388 build.lease_expiration_date = lease_expiration_date
387 build.regenerate_lease_key() 389 build.regenerate_lease_key()
388 build.leasee = auth.get_current_identity() 390 build.leasee = auth.get_current_identity()
389 build.never_leased = False 391 build.never_leased = False
390 build.put() 392 build.put()
393 return True, build
394
395 leased, build = try_lease()
396 if leased:
391 logging.info( 397 logging.info(
392 'Build %s was leased by %s', build.key.id(), build.leasee.to_bytes()) 398 'Build %s was leased by %s', build.key.id(), build.leasee.to_bytes())
393 return True, build 399 metrics.increment(metrics.LEASE_COUNT, build)
394 400 return leased, build
395 return try_lease()
396 401
397 402
398 def _check_lease(build, lease_key): 403 def _check_lease(build, lease_key):
399 if lease_key != build.lease_key: 404 if lease_key != build.lease_key:
400 raise errors.LeaseExpiredError( 405 raise errors.LeaseExpiredError(
401 'lease_key for build %s is incorrect. Your lease might be expired.' % 406 'lease_key for build %s is incorrect. Your lease might be expired.' %
402 build.key.id()) 407 build.key.id())
403 408
404 409
405 @ndb.transactional 410 @ndb.transactional
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after
449 build.key.id(), 454 build.key.id(),
450 build.pubsub_callback.topic, 455 build.pubsub_callback.topic,
451 build.pubsub_callback.user_data, 456 build.pubsub_callback.user_data,
452 build.pubsub_callback.auth_token, 457 build.pubsub_callback.auth_token,
453 _transactional=True, 458 _transactional=True,
454 _retry_options=taskqueue.TaskRetryOptions( 459 _retry_options=taskqueue.TaskRetryOptions(
455 task_age_limit=BUILD_TIMEOUT.total_seconds()), 460 task_age_limit=BUILD_TIMEOUT.total_seconds()),
456 ) 461 )
457 462
458 463
459 @ndb.transactional
460 def start(build_id, lease_key, url=None): 464 def start(build_id, lease_key, url=None):
461 """Marks build as STARTED. Idempotent. 465 """Marks build as STARTED. Idempotent.
462 466
463 Args: 467 Args:
464 build_id: id of the started build. 468 build_id: id of the started build.
465 lease_key: current lease key. 469 lease_key: current lease key.
466 url (str): a URL to a build-system-specific build, viewable by a human. 470 url (str): a URL to a build-system-specific build, viewable by a human.
467 471
468 Returns: 472 Returns:
469 The updated Build. 473 The updated Build.
470 """ 474 """
471 validate_lease_key(lease_key) 475 validate_lease_key(lease_key)
472 validate_url(url) 476 validate_url(url)
473 build = _get_leasable_build(build_id)
474 477
475 if build.status == model.BuildStatus.STARTED: 478 @ndb.transactional
476 if build.url != url: 479 def txn():
Sergey Berezin 2015/12/17 00:08:01 nit: spell out (transaction?)
nodir 2015/12/17 01:28:54 txn is kind of standard, I see it a lot in code by
Sergey Berezin 2015/12/17 02:30:18 Acknowledged.
477 build.url = url 480 build = _get_leasable_build(build_id)
478 build.put() 481
482 if build.status == model.BuildStatus.STARTED:
483 if build.url != url:
484 build.url = url
485 build.put()
486 return build
487 elif build.status == model.BuildStatus.COMPLETED:
488 raise errors.BuildIsCompletedError('Cannot start a completed build')
489 assert build.status == model.BuildStatus.SCHEDULED
490 _check_lease(build, lease_key)
491
492 build.status = model.BuildStatus.STARTED
493 build.status_changed_time = utils.utcnow()
494 build.url = url
495 build.put()
496 _enqueue_callback_task_if_needed(build)
479 return build 497 return build
480 elif build.status == model.BuildStatus.COMPLETED:
481 raise errors.BuildIsCompletedError('Cannot start a completed build')
482 assert build.status == model.BuildStatus.SCHEDULED
483 _check_lease(build, lease_key)
484 498
485 build.status = model.BuildStatus.STARTED 499 build = txn()
486 build.status_changed_time = utils.utcnow()
487 build.url = url
488 build.put()
489 logging.info('Build %s was started. URL: %s', build.key.id(), url) 500 logging.info('Build %s was started. URL: %s', build.key.id(), url)
490 _enqueue_callback_task_if_needed(build) 501 metrics.increment(metrics.START_COUNT, build)
491 return build 502 return build
492 503
493 504
494 @ndb.transactional_tasklet 505 @ndb.tasklet
495 def heartbeat_async(build_id, lease_key, lease_expiration_date): 506 def heartbeat_async(build_id, lease_key, lease_expiration_date):
496 """Extends build lease. 507 """Extends build lease.
497 508
498 Args: 509 Args:
499 build_id: id of the build. 510 build_id: id of the build.
500 lease_key: current lease key. 511 lease_key: current lease key.
501 lease_expiration_date (datetime.timedelta): new lease expiration date. 512 lease_expiration_date (datetime.timedelta): new lease expiration date.
502 513
503 Returns: 514 Returns:
504 The updated Build as Future. 515 The updated Build as Future.
505 """ 516 """
506 try: 517 @ndb.transactional_tasklet
518 def txn():
507 validate_lease_key(lease_key) 519 validate_lease_key(lease_key)
508 if lease_expiration_date is None: 520 if lease_expiration_date is None:
509 raise errors.InvalidInputError('Lease expiration date not specified') 521 raise errors.InvalidInputError('Lease expiration date not specified')
510 validate_lease_expiration_date(lease_expiration_date) 522 validate_lease_expiration_date(lease_expiration_date)
511 build = yield model.Build.get_by_id_async(build_id) 523 build = yield model.Build.get_by_id_async(build_id)
512 if build is None: 524 if build is None:
513 raise errors.BuildNotFoundError() 525 raise errors.BuildNotFoundError()
514 if build.status == model.BuildStatus.COMPLETED: 526 if build.status == model.BuildStatus.COMPLETED:
515 raise errors.BuildIsCompletedError() 527 raise errors.BuildIsCompletedError()
516 _check_lease(build, lease_key) 528 _check_lease(build, lease_key)
517 build.lease_expiration_date = lease_expiration_date 529 build.lease_expiration_date = lease_expiration_date
518 yield build.put_async() 530 yield build.put_async()
531 raise ndb.Return(build)
532
533 build = None
534 try:
535 build = yield txn()
519 except Exception as ex: 536 except Exception as ex:
520 logging.warning('Heartbeat for build %s failed: %s', build_id, ex) 537 logging.warning('Heartbeat for build %s failed: %s', build_id, ex)
538 metrics.increment(metrics.HEARTBEAT_FAILURE_COUNT, build)
Sergey Berezin 2015/12/17 00:08:01 nit: as an option, you might consider counting all
nodir 2015/12/17 01:28:54 hearbeats are sent in batches and heartbeat_async
Sergey Berezin 2015/12/17 02:30:18 If DeferredMetricStore is not initialized (as is t
nodir 2015/12/17 03:19:56 Done.
521 raise 539 raise
522 raise ndb.Return(build) 540 raise ndb.Return(build)
523 541
524 542
525 def heartbeat(build_id, lease_key, lease_expiration_date): 543 def heartbeat(build_id, lease_key, lease_expiration_date):
526 future = heartbeat_async(build_id, lease_key, lease_expiration_date) 544 future = heartbeat_async(build_id, lease_key, lease_expiration_date)
527 return future.get_result() 545 return future.get_result()
528 546
529 547
530 def heartbeat_batch(heartbeats): 548 def heartbeat_batch(heartbeats):
(...skipping 12 matching lines...) Expand all
543 build_id = hb['build_id'] 561 build_id = hb['build_id']
544 exc = future.get_exception() 562 exc = future.get_exception()
545 if not exc: 563 if not exc:
546 return build_id, future.get_result(), None 564 return build_id, future.get_result(), None
547 else: 565 else:
548 return build_id, None, exc 566 return build_id, None, exc
549 567
550 return [get_result(h, f) for h, f in futures] 568 return [get_result(h, f) for h, f in futures]
551 569
552 570
553 @ndb.transactional
554 def _complete( 571 def _complete(
555 build_id, lease_key, result, result_details, failure_reason=None, 572 build_id, lease_key, result, result_details, failure_reason=None,
556 url=None): 573 url=None):
557 """Marks a build as completed. Used by succeed and fail methods.""" 574 """Marks a build as completed. Used by succeed and fail methods."""
558 validate_lease_key(lease_key) 575 validate_lease_key(lease_key)
559 validate_url(url) 576 validate_url(url)
560 assert result in (model.BuildResult.SUCCESS, model.BuildResult.FAILURE) 577 assert result in (model.BuildResult.SUCCESS, model.BuildResult.FAILURE)
561 build = _get_leasable_build(build_id)
562 578
563 if build.status == model.BuildStatus.COMPLETED: 579 @ndb.transactional
564 if (build.result == result and 580 def txn():
565 build.failure_reason == failure_reason and 581 build = _get_leasable_build(build_id)
566 build.result_details == result_details and
567 build.url == url):
568 return build
569 raise errors.BuildIsCompletedError(
570 'Build %s has already completed' % build_id)
571 _check_lease(build, lease_key)
572 582
573 build.status = model.BuildStatus.COMPLETED 583 if build.status == model.BuildStatus.COMPLETED:
574 build.status_changed_time = utils.utcnow() 584 if (build.result == result and
575 build.complete_time = utils.utcnow() 585 build.failure_reason == failure_reason and
576 build.result = result 586 build.result_details == result_details and
577 if url is not None: # pragma: no branch 587 build.url == url):
578 build.url = url 588 return build
579 build.result_details = result_details 589 raise errors.BuildIsCompletedError(
580 build.failure_reason = failure_reason 590 'Build %s has already completed' % build_id)
581 build.clear_lease() 591 _check_lease(build, lease_key)
582 build.put() 592
593 build.status = model.BuildStatus.COMPLETED
594 build.status_changed_time = utils.utcnow()
595 build.complete_time = utils.utcnow()
596 build.result = result
597 if url is not None: # pragma: no branch
598 build.url = url
599 build.result_details = result_details
600 build.failure_reason = failure_reason
601 build.clear_lease()
602 build.put()
603 _enqueue_callback_task_if_needed(build)
604 return build
605
606 build = txn()
583 logging.info( 607 logging.info(
584 'Build %s was completed. Status: %s. Result: %s', 608 'Build %s was completed. Status: %s. Result: %s',
585 build.key.id(), build.status, build.result) 609 build.key.id(), build.status, build.result)
586 _enqueue_callback_task_if_needed(build) 610 metrics.increment(
611 metrics.COMPLETE_COUNT,
612 build,
613 result=result,
614 failure_reason=failure_reason,
615 )
587 return build 616 return build
588 617
589 618
590 def succeed(build_id, lease_key, result_details=None, url=None): 619 def succeed(build_id, lease_key, result_details=None, url=None):
591 """Marks a build as succeeded. Idempotent. 620 """Marks a build as succeeded. Idempotent.
592 621
593 Args: 622 Args:
594 build_id: id of the build to complete. 623 build_id: id of the build to complete.
595 lease_key: current lease key. 624 lease_key: current lease key.
596 result_details (dict): build result description. 625 result_details (dict): build result description.
(...skipping 19 matching lines...) Expand all
616 645
617 Returns: 646 Returns:
618 The failed Build. 647 The failed Build.
619 """ 648 """
620 failure_reason = failure_reason or model.FailureReason.BUILD_FAILURE 649 failure_reason = failure_reason or model.FailureReason.BUILD_FAILURE
621 return _complete( 650 return _complete(
622 build_id, lease_key, model.BuildResult.FAILURE, result_details, 651 build_id, lease_key, model.BuildResult.FAILURE, result_details,
623 failure_reason, url=url) 652 failure_reason, url=url)
624 653
625 654
626 @ndb.transactional
627 def cancel(build_id): 655 def cancel(build_id):
628 """Cancels build. Does not require a lease key. 656 """Cancels build. Does not require a lease key.
629 657
630 The current user has to have a permission to cancel a build in the 658 The current user has to have a permission to cancel a build in the
631 bucket. 659 bucket.
632 660
633 Returns: 661 Returns:
634 Canceled Build. 662 Canceled Build.
635 """ 663 """
636 build = model.Build.get_by_id(build_id) 664 @ndb.transactional
637 if build is None: 665 def txn():
638 raise errors.BuildNotFoundError() 666 build = model.Build.get_by_id(build_id)
639 if not acl.can_cancel_build(build): 667 if build is None:
640 raise current_identity_cannot('cancel build %s', build.key.id()) 668 raise errors.BuildNotFoundError()
641 if build.status == model.BuildStatus.COMPLETED: 669 if not acl.can_cancel_build(build):
642 if build.result == model.BuildResult.CANCELED: 670 raise current_identity_cannot('cancel build %s', build.key.id())
643 return build 671 if build.status == model.BuildStatus.COMPLETED:
644 raise errors.BuildIsCompletedError('Cannot cancel a completed build') 672 if build.result == model.BuildResult.CANCELED:
645 now = utils.utcnow() 673 return build
646 build.status = model.BuildStatus.COMPLETED 674 raise errors.BuildIsCompletedError('Cannot cancel a completed build')
647 build.status_changed_time = now 675 now = utils.utcnow()
648 build.result = model.BuildResult.CANCELED 676 build.status = model.BuildStatus.COMPLETED
649 build.cancelation_reason = model.CancelationReason.CANCELED_EXPLICITLY 677 build.status_changed_time = now
650 build.complete_time = now 678 build.result = model.BuildResult.CANCELED
651 build.clear_lease() 679 build.cancelation_reason = model.CancelationReason.CANCELED_EXPLICITLY
652 build.put() 680 build.complete_time = now
681 build.clear_lease()
682 build.put()
683 return build
684
685 build = txn()
653 logging.info( 686 logging.info(
654 'Build %s was cancelled by %s', build.key.id(), 687 'Build %s was cancelled by %s', build.key.id(),
655 auth.get_current_identity().to_bytes()) 688 auth.get_current_identity().to_bytes())
689 metrics.increment(
690 metrics.COMPLETE_COUNT,
691 build,
692 result=build.result,
693 cancelation_reason=build.cancelation_reason,
694 )
656 return build 695 return build
657 696
658 697
659 @ndb.transactional_tasklet 698 @ndb.tasklet
660 def _reset_expired_build_async(build_id): 699 def _reset_expired_build_async(build_id):
661 build = yield model.Build.get_by_id_async(build_id) 700 @ndb.transactional_tasklet
662 if not build or build.lease_expiration_date is None: # pragma: no cover 701 def txn():
663 return 702 build = yield model.Build.get_by_id_async(build_id)
664 is_expired = build.lease_expiration_date <= utils.utcnow() 703 if not build or build.lease_expiration_date is None: # pragma: no cover
665 if not is_expired: # pragma: no cover 704 return
666 return 705 is_expired = build.lease_expiration_date <= utils.utcnow()
706 if not is_expired: # pragma: no cover
707 return
667 708
668 assert build.status != model.BuildStatus.COMPLETED, ( 709 assert build.status != model.BuildStatus.COMPLETED, (
669 'Completed build is leased') 710 'Completed build is leased')
670 build.clear_lease() 711 build.clear_lease()
671 build.status = model.BuildStatus.SCHEDULED 712 build.status = model.BuildStatus.SCHEDULED
672 build.status_changed_time = utils.utcnow() 713 build.status_changed_time = utils.utcnow()
673 build.url = None 714 build.url = None
674 yield build.put_async() 715 yield build.put_async()
716 raise ndb.Return(build)
717
718 build = yield txn()
675 logging.info('Expired build %s was reset', build_id) 719 logging.info('Expired build %s was reset', build_id)
720 metrics.increment(metrics.LEASE_EXPIRATION_COUNT, build)
676 721
677 722
678 @ndb.transactional_tasklet 723 @ndb.transactional_tasklet
679 def _timeout_async(build_id): 724 def _timeout_async(build_id):
680 build = yield model.Build.get_by_id_async(build_id) 725 build = yield model.Build.get_by_id_async(build_id)
681 if not build or build.status == model.BuildStatus.COMPLETED: 726 if not build or build.status == model.BuildStatus.COMPLETED:
682 return # pragma: no cover 727 return # pragma: no cover
683 728
684 build.clear_lease() 729 build.clear_lease()
685 build.status = model.BuildStatus.COMPLETED 730 build.status = model.BuildStatus.COMPLETED
(...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after
763 if isinstance(identity, basestring): 808 if isinstance(identity, basestring):
764 if not identity: # pragma: no cover 809 if not identity: # pragma: no cover
765 return None 810 return None
766 if ':' not in identity: # pragma: no branch 811 if ':' not in identity: # pragma: no branch
767 identity = 'user:%s' % identity 812 identity = 'user:%s' % identity
768 try: 813 try:
769 identity = auth.Identity.from_bytes(identity) 814 identity = auth.Identity.from_bytes(identity)
770 except ValueError as ex: 815 except ValueError as ex:
771 raise errors.InvalidInputError('Invalid identity identity: %s' % ex) 816 raise errors.InvalidInputError('Invalid identity identity: %s' % ex)
772 return identity 817 return identity
OLDNEW
« appengine/cr-buildbucket/metrics.py ('K') | « appengine/cr-buildbucket/module-backend.yaml ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698