| OLD | NEW |
| (Empty) |
| 1 """A LatentSlave that uses EC2 to instantiate the slaves on demand. | |
| 2 | |
| 3 Tested with Python boto 1.5c | |
| 4 """ | |
| 5 | |
| 6 # Portions copyright Canonical Ltd. 2009 | |
| 7 | |
| 8 import cStringIO | |
| 9 import os | |
| 10 import re | |
| 11 import time | |
| 12 import urllib | |
| 13 | |
| 14 import boto | |
| 15 import boto.exception | |
| 16 from twisted.internet import defer, threads | |
| 17 from twisted.python import log | |
| 18 | |
| 19 from buildbot.buildslave import AbstractLatentBuildSlave | |
| 20 from buildbot import interfaces | |
| 21 | |
| 22 PENDING = 'pending' | |
| 23 RUNNING = 'running' | |
| 24 SHUTTINGDOWN = 'shutting-down' | |
| 25 TERMINATED = 'terminated' | |
| 26 | |
| 27 class EC2LatentBuildSlave(AbstractLatentBuildSlave): | |
| 28 | |
| 29 instance = image = None | |
| 30 _poll_resolution = 5 # hook point for tests | |
| 31 | |
| 32 def __init__(self, name, password, instance_type, ami=None, | |
| 33 valid_ami_owners=None, valid_ami_location_regex=None, | |
| 34 elastic_ip=None, identifier=None, secret_identifier=None, | |
| 35 aws_id_file_path=None, user_data=None, | |
| 36 keypair_name='latent_buildbot_slave', | |
| 37 security_name='latent_buildbot_slave', | |
| 38 max_builds=None, notify_on_missing=[], missing_timeout=60*20, | |
| 39 build_wait_timeout=60*10, properties={}): | |
| 40 AbstractLatentBuildSlave.__init__( | |
| 41 self, name, password, max_builds, notify_on_missing, | |
| 42 missing_timeout, build_wait_timeout, properties) | |
| 43 if not ((ami is not None) ^ | |
| 44 (valid_ami_owners is not None or | |
| 45 valid_ami_location_regex is not None)): | |
| 46 raise ValueError( | |
| 47 'You must provide either a specific ami, or one or both of ' | |
| 48 'valid_ami_location_regex and valid_ami_owners') | |
| 49 self.ami = ami | |
| 50 if valid_ami_owners is not None: | |
| 51 if isinstance(valid_ami_owners, (int, long)): | |
| 52 valid_ami_owners = (valid_ami_owners,) | |
| 53 else: | |
| 54 for element in valid_ami_owners: | |
| 55 if not isinstance(element, (int, long)): | |
| 56 raise ValueError( | |
| 57 'valid_ami_owners should be int or iterable ' | |
| 58 'of ints', element) | |
| 59 if valid_ami_location_regex is not None: | |
| 60 if not isinstance(valid_ami_location_regex, basestring): | |
| 61 raise ValueError( | |
| 62 'valid_ami_location_regex should be a string') | |
| 63 else: | |
| 64 # verify that regex will compile | |
| 65 re.compile(valid_ami_location_regex) | |
| 66 self.valid_ami_owners = valid_ami_owners | |
| 67 self.valid_ami_location_regex = valid_ami_location_regex | |
| 68 self.instance_type = instance_type | |
| 69 self.keypair_name = keypair_name | |
| 70 self.security_name = security_name | |
| 71 self.user_data = user_data | |
| 72 if identifier is None: | |
| 73 assert secret_identifier is None, ( | |
| 74 'supply both or neither of identifier, secret_identifier') | |
| 75 if aws_id_file_path is None: | |
| 76 home = os.environ['HOME'] | |
| 77 aws_id_file_path = os.path.join(home, '.ec2', 'aws_id') | |
| 78 if not os.path.exists(aws_id_file_path): | |
| 79 raise ValueError( | |
| 80 "Please supply your AWS access key identifier and secret " | |
| 81 "access key identifier either when instantiating this %s " | |
| 82 "or in the %s file (on two lines).\n" % | |
| 83 (self.__class__.__name__, aws_id_file_path)) | |
| 84 aws_file = open(aws_id_file_path, 'r') | |
| 85 try: | |
| 86 identifier = aws_file.readline().strip() | |
| 87 secret_identifier = aws_file.readline().strip() | |
| 88 finally: | |
| 89 aws_file.close() | |
| 90 else: | |
| 91 assert aws_id_file_path is None, \ | |
| 92 'if you supply the identifier and secret_identifier, ' \ | |
| 93 'do not specify the aws_id_file_path' | |
| 94 assert secret_identifier is not None, \ | |
| 95 'supply both or neither of identifier, secret_identifier' | |
| 96 # Make the EC2 connection. | |
| 97 self.conn = boto.connect_ec2(identifier, secret_identifier) | |
| 98 | |
| 99 # Make a keypair | |
| 100 # | |
| 101 # We currently discard the keypair data because we don't need it. | |
| 102 # If we do need it in the future, we will always recreate the keypairs | |
| 103 # because there is no way to | |
| 104 # programmatically retrieve the private key component, unless we | |
| 105 # generate it and store it on the filesystem, which is an unnecessary | |
| 106 # usage requirement. | |
| 107 try: | |
| 108 key_pair = self.conn.get_all_key_pairs(keypair_name)[0] | |
| 109 # key_pair.delete() # would be used to recreate | |
| 110 except boto.exception.EC2ResponseError, e: | |
| 111 if e.code != 'InvalidKeyPair.NotFound': | |
| 112 if e.code == 'AuthFailure': | |
| 113 print ('POSSIBLE CAUSES OF ERROR:\n' | |
| 114 ' Did you sign up for EC2?\n' | |
| 115 ' Did you put a credit card number in your AWS ' | |
| 116 'account?\n' | |
| 117 'Please doublecheck before reporting a problem.\n') | |
| 118 raise | |
| 119 # make one; we would always do this, and stash the result, if we | |
| 120 # needed the key (for instance, to SSH to the box). We'd then | |
| 121 # use paramiko to use the key to connect. | |
| 122 self.conn.create_key_pair(keypair_name) | |
| 123 | |
| 124 # create security group | |
| 125 try: | |
| 126 group = self.conn.get_all_security_groups(security_name)[0] | |
| 127 except boto.exception.EC2ResponseError, e: | |
| 128 if e.code == 'InvalidGroup.NotFound': | |
| 129 self.security_group = self.conn.create_security_group( | |
| 130 security_name, | |
| 131 'Authorization to access the buildbot instance.') | |
| 132 # Authorize the master as necessary | |
| 133 # TODO this is where we'd open the hole to do the reverse pb | |
| 134 # connect to the buildbot | |
| 135 # ip = urllib.urlopen( | |
| 136 # 'http://checkip.amazonaws.com').read().strip() | |
| 137 # self.security_group.authorize('tcp', 22, 22, '%s/32' % ip) | |
| 138 # self.security_group.authorize('tcp', 80, 80, '%s/32' % ip) | |
| 139 else: | |
| 140 raise | |
| 141 | |
| 142 # get the image | |
| 143 if self.ami is not None: | |
| 144 self.image = self.conn.get_image(self.ami) | |
| 145 else: | |
| 146 # verify we have access to at least one acceptable image | |
| 147 discard = self.get_image() | |
| 148 | |
| 149 # get the specified elastic IP, if any | |
| 150 if elastic_ip is not None: | |
| 151 elastic_ip = self.conn.get_all_addresses([elastic_ip])[0] | |
| 152 self.elastic_ip = elastic_ip | |
| 153 | |
| 154 def get_image(self): | |
| 155 if self.image is not None: | |
| 156 return self.image | |
| 157 if self.valid_ami_location_regex: | |
| 158 level = 0 | |
| 159 options = [] | |
| 160 get_match = re.compile(self.valid_ami_location_regex).match | |
| 161 for image in self.conn.get_all_images( | |
| 162 owners=self.valid_ami_owners): | |
| 163 # gather sorting data | |
| 164 match = get_match(image.location) | |
| 165 if match: | |
| 166 alpha_sort = int_sort = None | |
| 167 if level < 2: | |
| 168 try: | |
| 169 alpha_sort = match.group(1) | |
| 170 except IndexError: | |
| 171 level = 2 | |
| 172 else: | |
| 173 if level == 0: | |
| 174 try: | |
| 175 int_sort = int(alpha_sort) | |
| 176 except ValueError: | |
| 177 level = 1 | |
| 178 options.append([int_sort, alpha_sort, | |
| 179 image.location, image.id, image]) | |
| 180 if level: | |
| 181 log.msg('sorting images at level %d' % level) | |
| 182 options = [candidate[level:] for candidate in options] | |
| 183 else: | |
| 184 options = [(image.location, image.id, image) for image | |
| 185 in self.conn.get_all_images( | |
| 186 owners=self.valid_ami_owners)] | |
| 187 options.sort() | |
| 188 log.msg('sorted images (last is chosen): %s' % | |
| 189 (', '.join( | |
| 190 ['%s (%s)' % (candidate[-1].id, candidate[-1].location) | |
| 191 for candidate in options]))) | |
| 192 if not options: | |
| 193 raise ValueError('no available images match constraints') | |
| 194 return options[-1][-1] | |
| 195 | |
| 196 def dns(self): | |
| 197 if self.instance is None: | |
| 198 return None | |
| 199 return self.instance.public_dns_name | |
| 200 dns = property(dns) | |
| 201 | |
| 202 def start_instance(self): | |
| 203 if self.instance is not None: | |
| 204 raise ValueError('instance active') | |
| 205 return threads.deferToThread(self._start_instance) | |
| 206 | |
| 207 def _start_instance(self): | |
| 208 image = self.get_image() | |
| 209 reservation = image.run( | |
| 210 key_name=self.keypair_name, security_groups=[self.security_name], | |
| 211 instance_type=self.instance_type, user_data=self.user_data) | |
| 212 self.instance = reservation.instances[0] | |
| 213 log.msg('%s %s starting instance %s' % | |
| 214 (self.__class__.__name__, self.slavename, self.instance.id)) | |
| 215 duration = 0 | |
| 216 interval = self._poll_resolution | |
| 217 while self.instance.state == PENDING: | |
| 218 time.sleep(interval) | |
| 219 duration += interval | |
| 220 if duration % 60 == 0: | |
| 221 log.msg('%s %s has waited %d minutes for instance %s' % | |
| 222 (self.__class__.__name__, self.slavename, duration//60, | |
| 223 self.instance.id)) | |
| 224 self.instance.update() | |
| 225 if self.instance.state == RUNNING: | |
| 226 self.output = self.instance.get_console_output() | |
| 227 minutes = duration//60 | |
| 228 seconds = duration%60 | |
| 229 log.msg('%s %s instance %s started on %s ' | |
| 230 'in about %d minutes %d seconds (%s)' % | |
| 231 (self.__class__.__name__, self.slavename, | |
| 232 self.instance.id, self.dns, minutes, seconds, | |
| 233 self.output.output)) | |
| 234 if self.elastic_ip is not None: | |
| 235 self.instance.use_ip(self.elastic_ip) | |
| 236 return [self.instance.id, | |
| 237 image.id, | |
| 238 '%02d:%02d:%02d' % (minutes//60, minutes%60, seconds)] | |
| 239 else: | |
| 240 log.msg('%s %s failed to start instance %s (%s)' % | |
| 241 (self.__class__.__name__, self.slavename, | |
| 242 self.instance.id, self.instance.state)) | |
| 243 raise interfaces.LatentBuildSlaveFailedToSubstantiate( | |
| 244 self.instance.id, self.instance.state) | |
| 245 | |
| 246 def stop_instance(self, fast=False): | |
| 247 if self.instance is None: | |
| 248 # be gentle. Something may just be trying to alert us that an | |
| 249 # instance never attached, and it's because, somehow, we never | |
| 250 # started. | |
| 251 return defer.succeed(None) | |
| 252 instance = self.instance | |
| 253 self.output = self.instance = None | |
| 254 return threads.deferToThread( | |
| 255 self._stop_instance, instance, fast) | |
| 256 | |
| 257 def _stop_instance(self, instance, fast): | |
| 258 if self.elastic_ip is not None: | |
| 259 self.conn.disassociate_address(self.elastic_ip.public_ip) | |
| 260 instance.update() | |
| 261 if instance.state not in (SHUTTINGDOWN, TERMINATED): | |
| 262 instance.stop() | |
| 263 log.msg('%s %s terminating instance %s' % | |
| 264 (self.__class__.__name__, self.slavename, instance.id)) | |
| 265 duration = 0 | |
| 266 interval = self._poll_resolution | |
| 267 if fast: | |
| 268 goal = (SHUTTINGDOWN, TERMINATED) | |
| 269 instance.update() | |
| 270 else: | |
| 271 goal = (TERMINATED,) | |
| 272 while instance.state not in goal: | |
| 273 time.sleep(interval) | |
| 274 duration += interval | |
| 275 if duration % 60 == 0: | |
| 276 log.msg( | |
| 277 '%s %s has waited %d minutes for instance %s to end' % | |
| 278 (self.__class__.__name__, self.slavename, duration//60, | |
| 279 instance.id)) | |
| 280 instance.update() | |
| 281 log.msg('%s %s instance %s %s ' | |
| 282 'after about %d minutes %d seconds' % | |
| 283 (self.__class__.__name__, self.slavename, | |
| 284 instance.id, goal, duration//60, duration%60)) | |
| OLD | NEW |