Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(250)

Side by Side Diff: utils/net.py

Issue 23431002: [Abandoned] Move url_open with dependencies to utils.net module. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/swarm_client
Patch Set: Created 7 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « tests/url_open_timeout_test.py ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 # Copyright 2013 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
4
5 """Classes and functions for generic network communication over HTTP."""
6
7 import cookielib
8 import cStringIO as StringIO
9 import httplib
10 import itertools
11 import logging
12 import math
13 import os
14 import random
15 import socket
16 import ssl
17 import threading
18 import time
19 import urllib
20 import urllib2
21 import urlparse
22
23 from third_party.rietveld import upload
24
25 # Hack out upload logging.info()
26 upload.logging = logging.getLogger('upload')
27 # Mac pylint choke on this line.
28 upload.logging.setLevel(logging.WARNING) # pylint: disable=E1103
29
30
31 # The name of the key to store the count of url attempts.
32 COUNT_KEY = 'UrlOpenAttempt'
33
34 # Default maximum number of attempts to trying opening a url before aborting.
35 URL_OPEN_MAX_ATTEMPTS = 30
36
37 # Default timeout when retrying.
38 URL_OPEN_TIMEOUT = 6*60.
39
40
41 # Global (for now) map: server URL (http://example.com) -> HttpService instance.
42 # Used by get_http_service to cache HttpService instances.
43 _http_services = {}
44 _http_services_lock = threading.Lock()
45
46
47 class TimeoutError(IOError):
48 """Timeout while reading HTTP response."""
49
50 def __init__(self, inner_exc=None):
51 super(TimeoutError, self).__init__(str(inner_exc or 'Timeout'))
52 self.inner_exc = inner_exc
53
54
55 def url_open(url, **kwargs):
56 """Attempts to open the given url multiple times.
57
58 |data| can be either:
59 -None for a GET request
60 -str for pre-encoded data
61 -list for data to be encoded
62 -dict for data to be encoded (COUNT_KEY will be added in this case)
63
64 Returns HttpResponse object, where the response may be read from, or None
65 if it was unable to connect.
66 """
67 urlhost, urlpath = split_server_request_url(url)
68 service = get_http_service(urlhost)
69 return service.request(urlpath, **kwargs)
70
71
72 def url_read(url, **kwargs):
73 """Attempts to open the given url multiple times and read all data from it.
74
75 Accepts same arguments as url_open function.
76
77 Returns all data read or None if it was unable to connect or read the data.
78 """
79 response = url_open(url, **kwargs)
80 if not response:
81 return None
82 try:
83 return response.read()
84 except TimeoutError:
85 return None
86
87
88 def split_server_request_url(url):
89 """Splits the url into scheme+netloc and path+params+query+fragment."""
90 url_parts = list(urlparse.urlparse(url))
91 urlhost = '%s://%s' % (url_parts[0], url_parts[1])
92 urlpath = urlparse.urlunparse(['', ''] + url_parts[2:])
93 return urlhost, urlpath
94
95
96 def get_http_service(urlhost):
97 """Returns existing or creates new instance of HttpService that can send
98 requests to given base urlhost.
99 """
100 # Ensure consistency.
101 urlhost = str(urlhost).lower().rstrip('/')
102 with _http_services_lock:
103 service = _http_services.get(urlhost)
104 if not service:
105 service = AppEngineService(urlhost)
106 _http_services[urlhost] = service
107 return service
108
109
110 class HttpService(object):
111 """Base class for a class that provides an API to HTTP based service:
112 - Provides 'request' method.
113 - Supports automatic request retries.
114 - Supports persistent cookies.
115 - Thread safe.
116 """
117
118 # File to use to store all auth cookies.
119 COOKIE_FILE = os.path.join(os.path.expanduser('~'), '.isolated_cookies')
120
121 # CookieJar reused by all services + lock that protects its instantiation.
122 _cookie_jar = None
123 _cookie_jar_lock = threading.Lock()
124
125 def __init__(self, urlhost):
126 self.urlhost = urlhost
127 self.cookie_jar = self.load_cookie_jar()
128 self.opener = self.create_url_opener()
129
130 def authenticate(self): # pylint: disable=R0201
131 """Called when HTTP server asks client to authenticate.
132 Can be implemented in subclasses.
133 """
134 return False
135
136 @staticmethod
137 def load_cookie_jar():
138 """Returns global CoookieJar object that stores cookies in the file."""
139 with HttpService._cookie_jar_lock:
140 if HttpService._cookie_jar is not None:
141 return HttpService._cookie_jar
142 jar = ThreadSafeCookieJar(HttpService.COOKIE_FILE)
143 jar.load()
144 HttpService._cookie_jar = jar
145 return jar
146
147 @staticmethod
148 def save_cookie_jar():
149 """Called when cookie jar needs to be flushed to disk."""
150 with HttpService._cookie_jar_lock:
151 if HttpService._cookie_jar is not None:
152 HttpService._cookie_jar.save()
153
154 def create_url_opener(self): # pylint: disable=R0201
155 """Returns OpenerDirector that will be used when sending requests.
156 Can be reimplemented in subclasses."""
157 return urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cookie_jar))
158
159 def request(self, urlpath, data=None, content_type=None, **kwargs):
160 """Attempts to open the given url multiple times.
161
162 |urlpath| is relative to the server root, i.e. '/some/request?param=1'.
163
164 |data| can be either:
165 -None for a GET request
166 -str for pre-encoded data
167 -list for data to be encoded
168 -dict for data to be encoded (COUNT_KEY will be added in this case)
169
170 Returns a file-like object, where the response may be read from, or None
171 if it was unable to connect.
172 """
173 assert urlpath and urlpath[0] == '/'
174
175 if isinstance(data, dict) and COUNT_KEY in data:
176 logging.error('%s already existed in the data passed into UlrOpen. It '
177 'would be overwritten. Aborting UrlOpen', COUNT_KEY)
178 return None
179
180 method = 'GET' if data is None else 'POST'
181 assert not ((method != 'POST') and content_type), (
182 'Can\'t use content_type on GET')
183
184 def make_request(extra):
185 """Returns a urllib2.Request instance for this specific retry."""
186 if isinstance(data, str) or data is None:
187 payload = data
188 else:
189 if isinstance(data, dict):
190 payload = data.items()
191 else:
192 payload = data[:]
193 payload.extend(extra.iteritems())
194 payload = urllib.urlencode(payload)
195 new_url = urlparse.urljoin(self.urlhost, urlpath[1:])
196 if isinstance(data, str) or data is None:
197 # In these cases, add the extra parameter to the query part of the url.
198 url_parts = list(urlparse.urlparse(new_url))
199 # Append the query parameter.
200 if url_parts[4] and extra:
201 url_parts[4] += '&'
202 url_parts[4] += urllib.urlencode(extra)
203 new_url = urlparse.urlunparse(url_parts)
204 request = urllib2.Request(new_url, data=payload)
205 if payload is not None:
206 if content_type:
207 request.add_header('Content-Type', content_type)
208 request.add_header('Content-Length', len(payload))
209 return request
210
211 return self._retry_loop(make_request, **kwargs)
212
213 def _retry_loop(
214 self,
215 make_request,
216 max_attempts=URL_OPEN_MAX_ATTEMPTS,
217 retry_404=False,
218 retry_50x=True,
219 timeout=URL_OPEN_TIMEOUT,
220 read_timeout=None):
221 """Runs internal request-retry loop.
222
223 - Optionally retries HTTP 404 and 50x.
224 - Retries up to |max_attempts| times. If None or 0, there's no limit in the
225 number of retries.
226 - Retries up to |timeout| duration in seconds. If None or 0, there's no
227 limit in the time taken to do retries.
228 - If both |max_attempts| and |timeout| are None or 0, this functions retries
229 indefinitely.
230
231 If |read_timeout| is not None will configure underlying socket to
232 raise TimeoutError exception whenever there's no response from the server
233 for more than |read_timeout| seconds. It can happen during any read
234 operation so once you pass non-None |read_timeout| be prepared to handle
235 these exceptions in subsequent reads from the stream.
236 """
237 authenticated = False
238 last_error = None
239 attempt = 0
240 start = self._now()
241 for attempt in itertools.count():
242 if max_attempts and attempt >= max_attempts:
243 # Too many attempts.
244 break
245 if timeout and (self._now() - start) >= timeout:
246 # Retried for too long.
247 break
248 extra = {COUNT_KEY: attempt} if attempt else {}
249 request = make_request(extra)
250 try:
251 url_response = self._url_open(request, timeout=read_timeout)
252 logging.debug('url_open(%s) succeeded', request.get_full_url())
253 # Some tests mock url_open to return StringIO without 'headers'.
254 return HttpResponse(url_response, request.get_full_url(),
255 getattr(url_response, 'headers', {}))
256 except urllib2.HTTPError as e:
257 # Unauthorized. Ask to authenticate and then try again.
258 if e.code in (401, 403):
259 # Try to authenticate only once. If it doesn't help, then server does
260 # not support app engine authentication.
261 logging.error(
262 'Authentication is required for %s on attempt %d.\n%s',
263 request.get_full_url(), attempt,
264 self._format_exception(e, verbose=True))
265 if not authenticated and self.authenticate():
266 authenticated = True
267 # Do not sleep.
268 continue
269 # If authentication failed, return.
270 logging.error(
271 'Unable to authenticate to %s.\n%s',
272 request.get_full_url(), self._format_exception(e, verbose=True))
273 return None
274
275 if ((e.code < 500 and not (retry_404 and e.code == 404)) or
276 (e.code >= 500 and not retry_50x)):
277 # This HTTPError means we reached the server and there was a problem
278 # with the request, so don't retry.
279 logging.error(
280 'Able to connect to %s but an exception was thrown.\n%s',
281 request.get_full_url(), self._format_exception(e, verbose=True))
282 return None
283
284 # The HTTPError was due to a server error, so retry the attempt.
285 logging.warning('Able to connect to %s on attempt %d.\n%s',
286 request.get_full_url(), attempt,
287 self._format_exception(e))
288 last_error = e
289
290 except (urllib2.URLError, httplib.HTTPException,
291 socket.timeout, ssl.SSLError) as e:
292 logging.warning('Unable to open url %s on attempt %d.\n%s',
293 request.get_full_url(), attempt,
294 self._format_exception(e))
295 last_error = e
296
297 # Only sleep if we are going to try again.
298 if max_attempts and attempt != max_attempts:
299 remaining = None
300 if timeout:
301 remaining = timeout - (self._now() - start)
302 if remaining <= 0:
303 break
304 self.sleep_before_retry(attempt, remaining)
305
306 logging.error('Unable to open given url, %s, after %d attempts.\n%s',
307 request.get_full_url(), max_attempts,
308 self._format_exception(last_error, verbose=True))
309 return None
310
311 def _url_open(self, request, timeout=None):
312 """Low level method to execute urllib2.Request's.
313
314 To be mocked in tests.
315 """
316 if timeout is not None:
317 return self.opener.open(request, timeout=timeout)
318 else:
319 # Leave original default value for |timeout|. It's nontrivial.
320 return self.opener.open(request)
321
322 @staticmethod
323 def _now():
324 """To be mocked in tests."""
325 return time.time()
326
327 @staticmethod
328 def calculate_sleep_before_retry(attempt, max_duration):
329 # Maximum sleeping time. We're hammering a cloud-distributed service, it'll
330 # survive.
331 MAX_SLEEP = 10.
332 # random.random() returns [0.0, 1.0). Starts with relatively short waiting
333 # time by starting with 1.5/2+1.5^-1 median offset.
334 duration = (random.random() * 1.5) + math.pow(1.5, (attempt - 1))
335 assert duration > 0.1
336 duration = min(MAX_SLEEP, duration)
337 if max_duration:
338 duration = min(max_duration, duration)
339 return duration
340
341 @classmethod
342 def sleep_before_retry(cls, attempt, max_duration):
343 """Sleeps for some amount of time when retrying the request.
344
345 To be mocked in tests.
346 """
347 time.sleep(cls.calculate_sleep_before_retry(attempt, max_duration))
348
349 @staticmethod
350 def _format_exception(exc, verbose=False):
351 """Given an instance of some exception raised by urlopen returns human
352 readable piece of text with detailed information about the error.
353 """
354 out = ['Exception: %s' % (exc,)]
355 if verbose:
356 if isinstance(exc, urllib2.HTTPError):
357 out.append('-' * 10)
358 if exc.hdrs:
359 for header, value in exc.hdrs.items():
360 if not header.startswith('x-'):
361 out.append('%s: %s' % (header.capitalize(), value))
362 out.append('')
363 out.append(exc.read() or '<empty body>')
364 out.append('-' * 10)
365 return '\n'.join(out)
366
367
368 class HttpResponse(object):
369 """Response from HttpService."""
370
371 def __init__(self, stream, url, headers):
372 self._stream = stream
373 self._url = url
374 self._headers = headers
375 self._read = 0
376
377 @property
378 def content_length(self):
379 """Total length to the response or None if not known in advance."""
380 length = self._headers.get('Content-Length')
381 return int(length) if length is not None else None
382
383 def read(self, size=None):
384 """Reads up to |size| bytes from the stream and returns them.
385
386 If |size| is None reads all available bytes.
387
388 Raises TimeoutError on read timeout.
389 """
390 try:
391 # cStringIO has a bug: stream.read(None) is not the same as stream.read().
392 data = self._stream.read() if size is None else self._stream.read(size)
393 self._read += len(data)
394 return data
395 except (socket.timeout, ssl.SSLError) as e:
396 logging.error('Timeout while reading from %s, read %d of %s: %s',
397 self._url, self._read, self.content_length, e)
398 raise TimeoutError(e)
399
400 @classmethod
401 def get_fake_response(cls, content, url):
402 """Returns HttpResponse with predefined content, useful in tests."""
403 return cls(StringIO.StringIO(content),
404 url, {'content-length': len(content)})
405
406
407
408 class AppEngineService(HttpService):
409 """This class implements authentication support for
410 an app engine based services.
411 """
412
413 # This lock ensures that user won't be confused with multiple concurrent
414 # login prompts.
415 _auth_lock = threading.Lock()
416
417 def __init__(self, urlhost, email=None, password=None):
418 super(AppEngineService, self).__init__(urlhost)
419 self.email = email
420 self.password = password
421 self._keyring = None
422
423 def authenticate(self):
424 """Authenticates in the app engine application.
425 Returns True on success.
426 """
427 if not upload:
428 logging.error('\'upload\' module is missing, '
429 'app engine authentication is disabled.')
430 return False
431 cookie_jar = self.cookie_jar
432 save_cookie_jar = self.save_cookie_jar
433 # RPC server that uses AuthenticationSupport's cookie jar.
434 class AuthServer(upload.AbstractRpcServer):
435 def _GetOpener(self):
436 # Authentication code needs to know about 302 response.
437 # So make OpenerDirector without HTTPRedirectHandler.
438 opener = urllib2.OpenerDirector()
439 opener.add_handler(urllib2.ProxyHandler())
440 opener.add_handler(urllib2.UnknownHandler())
441 opener.add_handler(urllib2.HTTPHandler())
442 opener.add_handler(urllib2.HTTPDefaultErrorHandler())
443 opener.add_handler(urllib2.HTTPSHandler())
444 opener.add_handler(urllib2.HTTPErrorProcessor())
445 opener.add_handler(urllib2.HTTPCookieProcessor(cookie_jar))
446 return opener
447 def PerformAuthentication(self):
448 self._Authenticate()
449 save_cookie_jar()
450 return self.authenticated
451 with AppEngineService._auth_lock:
452 rpc_server = AuthServer(self.urlhost, self.get_credentials)
453 return rpc_server.PerformAuthentication()
454
455 def get_credentials(self):
456 """Called during authentication process to get the credentials.
457 May be called mutliple times if authentication fails.
458 Returns tuple (email, password).
459 """
460 # 'authenticate' calls this only if 'upload' is present.
461 # Ensure other callers (if any) fail non-cryptically if 'upload' is missing.
462 assert upload, '\'upload\' module is required for this to work'
463 if self.email and self.password:
464 return (self.email, self.password)
465 if not self._keyring:
466 self._keyring = upload.KeyringCreds(self.urlhost,
467 self.urlhost,
468 self.email)
469 return self._keyring.GetUserCredentials()
470
471
472 class ThreadSafeCookieJar(cookielib.MozillaCookieJar):
473 """MozillaCookieJar with thread safe load and save."""
474
475 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
476 """Loads cookies from the file if it exists."""
477 filename = os.path.expanduser(filename or self.filename)
478 with self._cookies_lock:
479 if os.path.exists(filename):
480 try:
481 cookielib.MozillaCookieJar.load(self, filename,
482 ignore_discard,
483 ignore_expires)
484 logging.debug('Loaded cookies from %s', filename)
485 except (cookielib.LoadError, IOError):
486 pass
487 else:
488 try:
489 fd = os.open(filename, os.O_CREAT, 0600)
490 os.close(fd)
491 except OSError:
492 logging.error('Failed to create %s', filename)
493 try:
494 os.chmod(filename, 0600)
495 except OSError:
496 logging.error('Failed to fix mode for %s', filename)
497
498 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
499 """Saves cookies to the file, completely overwriting it."""
500 logging.debug('Saving cookies to %s', filename or self.filename)
501 with self._cookies_lock:
502 try:
503 cookielib.MozillaCookieJar.save(self, filename,
504 ignore_discard,
505 ignore_expires)
506 except OSError:
507 logging.error('Failed to save %s', filename)
OLDNEW
« no previous file with comments | « tests/url_open_timeout_test.py ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698