Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(268)

Side by Side Diff: appengine/swarming/swarming_bot/bot_code/remote_client.py

Issue 2593863002: Fix evil retry loop on poll errors. (Closed)
Patch Set: Created 4 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Copyright 2016 The LUCI Authors. All rights reserved. 1 # Copyright 2016 The LUCI Authors. All rights reserved.
2 # Use of this source code is governed under the Apache License, Version 2.0 2 # Use of this source code is governed under the Apache License, Version 2.0
3 # that can be found in the LICENSE file. 3 # that can be found in the LICENSE file.
4 4
5 import base64 5 import base64
6 import logging 6 import logging
7 import threading 7 import threading
8 import time 8 import time
9 import traceback 9 import traceback
10 import urllib 10 import urllib
11 11
12 from utils import net 12 from utils import net
13 13
14 from remote_client_errors import BotCodeError
14 from remote_client_errors import InitializationError 15 from remote_client_errors import InitializationError
15 from remote_client_errors import BotCodeError
16 from remote_client_errors import InternalError 16 from remote_client_errors import InternalError
17 from remote_client_errors import PollError
17 18
18 19
19 # RemoteClient will attempt to refresh the authentication headers once they are 20 # RemoteClient will attempt to refresh the authentication headers once they are
20 # this close to the expiration. 21 # this close to the expiration.
21 AUTH_HEADERS_EXPIRATION_SEC = 3*60 22 AUTH_HEADERS_EXPIRATION_SEC = 3*60
22 23
23 24
24 # How long to wait for a response from the server. Must not be greater than 25 # How long to wait for a response from the server. Must not be greater than
25 # AUTH_HEADERS_EXPIRATION_SEC, since otherwise there's a chance auth headers 26 # AUTH_HEADERS_EXPIRATION_SEC, since otherwise there's a chance auth headers
26 # will expire while we wait for connection. 27 # will expire while we wait for connection.
(...skipping 180 matching lines...) Expand 10 before | Expand all | Expand 10 after
207 def do_handshake(self, attributes): 208 def do_handshake(self, attributes):
208 """Performs the initial handshake. Returns a dict (contents TBD)""" 209 """Performs the initial handshake. Returns a dict (contents TBD)"""
209 return self._url_read_json( 210 return self._url_read_json(
210 '/swarming/api/v1/bot/handshake', 211 '/swarming/api/v1/bot/handshake',
211 data=attributes) 212 data=attributes)
212 213
213 def poll(self, attributes): 214 def poll(self, attributes):
214 """Polls for new work or other commands; returns a (cmd, value) pair as 215 """Polls for new work or other commands; returns a (cmd, value) pair as
215 shown below. 216 shown below.
216 217
217 Note that if the returned dict does not have the correct 218 Raises:
218 values set, this method will raise an exception. 219 PollError if can't contact the server after many attempts, the server
220 replies with an error or the returned dict does not have the correct
221 values set.
219 """ 222 """
220 resp = self._url_read_json('/swarming/api/v1/bot/poll', data=attributes) 223 resp = self._url_read_json('/swarming/api/v1/bot/poll', data=attributes)
221 if not resp: 224 if not resp or resp.get('error'):
222 return (None, None) 225 raise PollError(
226 resp.get('error') if resp else 'Failed to contact server')
223 227
224 cmd = resp['cmd'] 228 cmd = resp['cmd']
225 if cmd == 'sleep': 229 if cmd == 'sleep':
226 return (cmd, resp['duration']) 230 return (cmd, resp['duration'])
227 if cmd == 'terminate': 231 if cmd == 'terminate':
228 return (cmd, resp['task_id']) 232 return (cmd, resp['task_id'])
229 if cmd == 'run': 233 if cmd == 'run':
230 return (cmd, resp['manifest']) 234 return (cmd, resp['manifest'])
231 if cmd == 'update': 235 if cmd == 'update':
232 return (cmd, resp['version']) 236 return (cmd, resp['version'])
233 if cmd == 'restart': 237 if cmd == 'restart':
234 return (cmd, resp['message']) 238 return (cmd, resp['message'])
235 raise ValueError('Unexpected command: %s\n%s' % (cmd, resp)) 239 raise PollError('Unexpected command: %s\n%s' % (cmd, resp))
236 240
237 def get_bot_code(self, new_zip_path, bot_version, bot_id): 241 def get_bot_code(self, new_zip_path, bot_version, bot_id):
238 """Downloads code into the file specified by new_zip_fn (a string). 242 """Downloads code into the file specified by new_zip_fn (a string).
239 243
240 Throws BotCodeError on error. 244 Throws BotCodeError on error.
241 """ 245 """
242 url_path = '/swarming/api/v1/bot/bot_code/%s?bot_id=%s' % ( 246 url_path = '/swarming/api/v1/bot/bot_code/%s?bot_id=%s' % (
243 bot_version, urllib.quote_plus(bot_id)) 247 bot_version, urllib.quote_plus(bot_id))
244 if not self._url_retrieve(new_zip_path, url_path): 248 if not self._url_retrieve(new_zip_path, url_path):
245 raise BotCodeError(new_zip_path, self._server + url_path, bot_version) 249 raise BotCodeError(new_zip_path, self._server + url_path, bot_version)
246 250
247 def ping(self): 251 def ping(self):
248 """Unlike all other methods, this one isn't authenticated.""" 252 """Unlike all other methods, this one isn't authenticated."""
249 resp = net.url_read(self._server + '/swarming/api/v1/bot/server_ping') 253 resp = net.url_read(self._server + '/swarming/api/v1/bot/server_ping')
250 if resp is None: 254 if resp is None:
251 logging.error('No response from server_ping') 255 logging.error('No response from server_ping')
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698