| OLD | NEW |
| (Empty) |
| 1 # Copyright 2013 The Chromium Authors. All rights reserved. | |
| 2 # Use of this source code is governed by a BSD-style license that can be | |
| 3 # found in the LICENSE file. | |
| 4 | |
| 5 """Classes and functions for generic network communication over HTTP.""" | |
| 6 | |
| 7 import cookielib | |
| 8 import cStringIO as StringIO | |
| 9 import httplib | |
| 10 import itertools | |
| 11 import logging | |
| 12 import math | |
| 13 import os | |
| 14 import random | |
| 15 import re | |
| 16 import socket | |
| 17 import ssl | |
| 18 import threading | |
| 19 import time | |
| 20 import urllib | |
| 21 import urllib2 | |
| 22 import urlparse | |
| 23 | |
| 24 from third_party import requests | |
| 25 from third_party.requests import adapters | |
| 26 from third_party.rietveld import upload | |
| 27 | |
| 28 from utils import zip_package | |
| 29 | |
| 30 # Hack out upload logging.info() | |
| 31 upload.logging = logging.getLogger('upload') | |
| 32 # Mac pylint choke on this line. | |
| 33 upload.logging.setLevel(logging.WARNING) # pylint: disable=E1103 | |
| 34 | |
| 35 | |
| 36 # TODO(vadimsh): Remove this once we don't have to support python 2.6 anymore. | |
| 37 def monkey_patch_httplib(): | |
| 38 """Patch httplib.HTTPConnection to have '_tunnel_host' attribute. | |
| 39 | |
| 40 'requests' library (>= v2) accesses 'HTTPConnection._tunnel_host' attribute | |
| 41 added only in python 2.6.3. This function patches HTTPConnection to have it | |
| 42 on python 2.6.2 as well. | |
| 43 """ | |
| 44 conn = httplib.HTTPConnection('example.com') | |
| 45 if not hasattr(conn, '_tunnel_host'): | |
| 46 httplib.HTTPConnection._tunnel_host = None | |
| 47 monkey_patch_httplib() | |
| 48 | |
| 49 | |
| 50 # Big switch that controls what API to use to make HTTP requests. | |
| 51 # It's temporary here to simplify benchmarking of old vs new implementation. | |
| 52 USE_REQUESTS_LIB = True | |
| 53 | |
| 54 # The name of the key to store the count of url attempts. | |
| 55 COUNT_KEY = 'UrlOpenAttempt' | |
| 56 | |
| 57 # Default maximum number of attempts to trying opening a url before aborting. | |
| 58 URL_OPEN_MAX_ATTEMPTS = 30 | |
| 59 | |
| 60 # Default timeout when retrying. | |
| 61 URL_OPEN_TIMEOUT = 6*60. | |
| 62 | |
| 63 # Content type for url encoded POST body. | |
| 64 URL_ENCODED_FORM_CONTENT_TYPE = 'application/x-www-form-urlencoded' | |
| 65 | |
| 66 # Default content type for POST body. | |
| 67 DEFAULT_CONTENT_TYPE = URL_ENCODED_FORM_CONTENT_TYPE | |
| 68 | |
| 69 # Content type -> function that encodes a request body. | |
| 70 CONTENT_ENCODERS = { | |
| 71 URL_ENCODED_FORM_CONTENT_TYPE: urllib.urlencode, | |
| 72 } | |
| 73 | |
| 74 # File to use to store all auth cookies. | |
| 75 COOKIE_FILE = os.path.join(os.path.expanduser('~'), '.isolated_cookies') | |
| 76 | |
| 77 # Google Storage URL regular expression. | |
| 78 GS_STORAGE_HOST_URL_RE = re.compile(r'https://.*\.storage\.googleapis\.com') | |
| 79 | |
| 80 | |
| 81 # Global (for now) map: server URL (http://example.com) -> HttpService instance. | |
| 82 # Used by get_http_service to cache HttpService instances. | |
| 83 _http_services = {} | |
| 84 _http_services_lock = threading.Lock() | |
| 85 | |
| 86 # CookieJar reused by all services + lock that protects its instantiation. | |
| 87 _cookie_jar = None | |
| 88 _cookie_jar_lock = threading.Lock() | |
| 89 | |
| 90 # Path to cacert.pem bundle file reused by all services. | |
| 91 _ca_certs = None | |
| 92 _ca_certs_lock = threading.Lock() | |
| 93 | |
| 94 | |
| 95 class NetError(IOError): | |
| 96 """Generic network related error.""" | |
| 97 | |
| 98 def __init__(self, inner_exc=None): | |
| 99 super(NetError, self).__init__(str(inner_exc or self.__doc__)) | |
| 100 self.inner_exc = inner_exc | |
| 101 | |
| 102 def format(self, verbose=False): | |
| 103 """Human readable description with detailed information about the error.""" | |
| 104 out = ['Exception: %s' % (self.inner_exc,)] | |
| 105 if verbose: | |
| 106 headers = None | |
| 107 body = None | |
| 108 if isinstance(self.inner_exc, urllib2.HTTPError): | |
| 109 headers = self.inner_exc.hdrs.items() | |
| 110 body = self.inner_exc.read() | |
| 111 elif isinstance(self.inner_exc, requests.HTTPError): | |
| 112 headers = self.inner_exc.response.headers.items() | |
| 113 body = self.inner_exc.response.content | |
| 114 if headers or body: | |
| 115 out.append('----------') | |
| 116 if headers: | |
| 117 for header, value in headers: | |
| 118 if not header.startswith('x-'): | |
| 119 out.append('%s: %s' % (header.capitalize(), value)) | |
| 120 out.append('') | |
| 121 out.append(body or '<empty body>') | |
| 122 out.append('----------') | |
| 123 return '\n'.join(out) | |
| 124 | |
| 125 | |
| 126 class TimeoutError(NetError): | |
| 127 """Timeout while reading HTTP response.""" | |
| 128 | |
| 129 | |
| 130 class ConnectionError(NetError): | |
| 131 """Failed to connect to the server.""" | |
| 132 | |
| 133 | |
| 134 class HttpError(NetError): | |
| 135 """Server returned HTTP error code.""" | |
| 136 | |
| 137 def __init__(self, code, inner_exc=None): | |
| 138 super(HttpError, self).__init__(inner_exc) | |
| 139 self.code = code | |
| 140 | |
| 141 | |
| 142 def url_open(url, **kwargs): | |
| 143 """Attempts to open the given url multiple times. | |
| 144 | |
| 145 |data| can be either: | |
| 146 - None for a GET request | |
| 147 - str for pre-encoded data | |
| 148 - list for data to be encoded | |
| 149 - dict for data to be encoded | |
| 150 | |
| 151 See HttpService.request for a full list of arguments. | |
| 152 | |
| 153 Returns HttpResponse object, where the response may be read from, or None | |
| 154 if it was unable to connect. | |
| 155 """ | |
| 156 urlhost, urlpath = split_server_request_url(url) | |
| 157 service = get_http_service(urlhost) | |
| 158 return service.request(urlpath, **kwargs) | |
| 159 | |
| 160 | |
| 161 def url_read(url, **kwargs): | |
| 162 """Attempts to open the given url multiple times and read all data from it. | |
| 163 | |
| 164 Accepts same arguments as url_open function. | |
| 165 | |
| 166 Returns all data read or None if it was unable to connect or read the data. | |
| 167 """ | |
| 168 kwargs['stream'] = False | |
| 169 response = url_open(url, **kwargs) | |
| 170 if not response: | |
| 171 return None | |
| 172 try: | |
| 173 return response.read() | |
| 174 except TimeoutError: | |
| 175 return None | |
| 176 | |
| 177 | |
| 178 def split_server_request_url(url): | |
| 179 """Splits the url into scheme+netloc and path+params+query+fragment.""" | |
| 180 url_parts = list(urlparse.urlparse(url)) | |
| 181 urlhost = '%s://%s' % (url_parts[0], url_parts[1]) | |
| 182 urlpath = urlparse.urlunparse(['', ''] + url_parts[2:]) | |
| 183 return urlhost, urlpath | |
| 184 | |
| 185 | |
| 186 def get_http_service(urlhost): | |
| 187 """Returns existing or creates new instance of HttpService that can send | |
| 188 requests to given base urlhost. | |
| 189 """ | |
| 190 # Ensure consistency. | |
| 191 urlhost = str(urlhost).lower().rstrip('/') | |
| 192 with _http_services_lock: | |
| 193 service = _http_services.get(urlhost) | |
| 194 if not service: | |
| 195 if GS_STORAGE_HOST_URL_RE.match(urlhost): | |
| 196 # For Google Storage URL create a dumber HttpService that doesn't modify | |
| 197 # requests with COUNT_KEY (since it breaks a signature) and doesn't try | |
| 198 # to 'login' into Google Storage (since it's impossible). | |
| 199 service = HttpService( | |
| 200 urlhost, | |
| 201 engine=create_request_engine(None), | |
| 202 authenticator=None, | |
| 203 use_count_key=False) | |
| 204 else: | |
| 205 # For other URLs (presumably App Engine), create a fancier HttpService | |
| 206 # with cookies, authentication and COUNT_KEY query parameter in retries. | |
| 207 cookie_jar = get_cookie_jar() | |
| 208 service = HttpService( | |
| 209 urlhost, | |
| 210 engine=create_request_engine(cookie_jar), | |
| 211 authenticator=AppEngineAuthenticator(urlhost, cookie_jar), | |
| 212 use_count_key=True) | |
| 213 _http_services[urlhost] = service | |
| 214 return service | |
| 215 | |
| 216 | |
| 217 def create_request_engine(cookie_jar): | |
| 218 """Returns a new instance of RequestEngine subclass. | |
| 219 | |
| 220 |cookie_jar| is an instance of ThreadSafeCookieJar class that holds all | |
| 221 cookies. It is optional and may be None (in that case cookies are not saved | |
| 222 on disk). | |
| 223 """ | |
| 224 if USE_REQUESTS_LIB: | |
| 225 return RequestsLibEngine(cookie_jar, get_cacerts_bundle()) | |
| 226 return Urllib2Engine(cookie_jar) | |
| 227 | |
| 228 | |
| 229 def get_cookie_jar(): | |
| 230 """Returns global CoookieJar object that stores cookies in the file.""" | |
| 231 global _cookie_jar | |
| 232 with _cookie_jar_lock: | |
| 233 if _cookie_jar is not None: | |
| 234 return _cookie_jar | |
| 235 jar = ThreadSafeCookieJar(COOKIE_FILE) | |
| 236 jar.load() | |
| 237 _cookie_jar = jar | |
| 238 return jar | |
| 239 | |
| 240 | |
| 241 def get_cacerts_bundle(): | |
| 242 """Returns path to a file with CA root certificates bundle.""" | |
| 243 global _ca_certs | |
| 244 with _ca_certs_lock: | |
| 245 if _ca_certs is not None and os.path.exists(_ca_certs): | |
| 246 return _ca_certs | |
| 247 _ca_certs = zip_package.extract_resource(requests, 'cacert.pem') | |
| 248 return _ca_certs | |
| 249 | |
| 250 | |
| 251 class HttpService(object): | |
| 252 """Base class for a class that provides an API to HTTP based service: | |
| 253 - Provides 'request' method. | |
| 254 - Supports automatic request retries. | |
| 255 - Supports persistent cookies. | |
| 256 - Thread safe. | |
| 257 """ | |
| 258 def __init__(self, urlhost, engine, authenticator=None, use_count_key=True): | |
| 259 self.urlhost = urlhost | |
| 260 self.engine = engine | |
| 261 self.authenticator = authenticator | |
| 262 self.use_count_key = use_count_key | |
| 263 | |
| 264 @staticmethod | |
| 265 def is_transient_http_error(code, retry_404, retry_50x): | |
| 266 """Returns True if given HTTP response code is a transient error.""" | |
| 267 # Google Storage can return this and it should be retried. | |
| 268 if code == 408: | |
| 269 return True | |
| 270 # Retry 404 only if allowed by the caller. | |
| 271 if code == 404: | |
| 272 return retry_404 | |
| 273 # All other 4** errors are fatal. | |
| 274 if code < 500: | |
| 275 return False | |
| 276 # Retry >= 500 error only if allowed by the caller. | |
| 277 return retry_50x | |
| 278 | |
| 279 @staticmethod | |
| 280 def encode_request_body(body, content_type): | |
| 281 """Returns request body encoded according to its content type.""" | |
| 282 # No body or it is already encoded. | |
| 283 if body is None or isinstance(body, str): | |
| 284 return body | |
| 285 # Any body should have content type set. | |
| 286 assert content_type, 'Request has body, but no content type' | |
| 287 encoder = CONTENT_ENCODERS.get(content_type) | |
| 288 assert encoder, ('Unknown content type %s' % content_type) | |
| 289 return encoder(body) | |
| 290 | |
| 291 def request( | |
| 292 self, | |
| 293 urlpath, | |
| 294 data=None, | |
| 295 content_type=None, | |
| 296 max_attempts=URL_OPEN_MAX_ATTEMPTS, | |
| 297 retry_404=False, | |
| 298 retry_50x=True, | |
| 299 timeout=URL_OPEN_TIMEOUT, | |
| 300 read_timeout=None, | |
| 301 stream=True, | |
| 302 method=None): | |
| 303 """Attempts to open the given url multiple times. | |
| 304 | |
| 305 |urlpath| is relative to the server root, i.e. '/some/request?param=1'. | |
| 306 | |
| 307 |data| can be either: | |
| 308 - None for a GET request | |
| 309 - str for pre-encoded data | |
| 310 - list for data to be form-encoded | |
| 311 - dict for data to be form-encoded | |
| 312 | |
| 313 - Optionally retries HTTP 404 and 50x. | |
| 314 - Retries up to |max_attempts| times. If None or 0, there's no limit in the | |
| 315 number of retries. | |
| 316 - Retries up to |timeout| duration in seconds. If None or 0, there's no | |
| 317 limit in the time taken to do retries. | |
| 318 - If both |max_attempts| and |timeout| are None or 0, this functions retries | |
| 319 indefinitely. | |
| 320 | |
| 321 If |method| is given it can be 'GET', 'POST' or 'PUT' and it will be used | |
| 322 when performing the request. By default it's GET if |data| is None and POST | |
| 323 if |data| is not None. | |
| 324 | |
| 325 If |read_timeout| is not None will configure underlying socket to | |
| 326 raise TimeoutError exception whenever there's no response from the server | |
| 327 for more than |read_timeout| seconds. It can happen during any read | |
| 328 operation so once you pass non-None |read_timeout| be prepared to handle | |
| 329 these exceptions in subsequent reads from the stream. | |
| 330 | |
| 331 Returns a file-like object, where the response may be read from, or None | |
| 332 if it was unable to connect. If |stream| is False will read whole response | |
| 333 into memory buffer before returning file-like object that reads from this | |
| 334 memory buffer. | |
| 335 """ | |
| 336 assert urlpath and urlpath[0] == '/', urlpath | |
| 337 | |
| 338 if data is not None: | |
| 339 assert method in (None, 'POST', 'PUT') | |
| 340 method = method or 'POST' | |
| 341 content_type = content_type or DEFAULT_CONTENT_TYPE | |
| 342 body = self.encode_request_body(data, content_type) | |
| 343 else: | |
| 344 assert method in (None, 'GET') | |
| 345 method = method or 'GET' | |
| 346 body = None | |
| 347 assert not content_type, 'Can\'t use content_type on GET' | |
| 348 | |
| 349 # Prepare request info. | |
| 350 parsed = urlparse.urlparse('/' + urlpath.lstrip('/')) | |
| 351 resource_url = urlparse.urljoin(self.urlhost, parsed.path) | |
| 352 query_params = urlparse.parse_qsl(parsed.query) | |
| 353 | |
| 354 # Prepare headers. | |
| 355 headers = {} | |
| 356 if body is not None: | |
| 357 headers['Content-Length'] = len(body) | |
| 358 if content_type: | |
| 359 headers['Content-Type'] = content_type | |
| 360 | |
| 361 last_error = None | |
| 362 auth_attempted = False | |
| 363 | |
| 364 for attempt in retry_loop(max_attempts, timeout): | |
| 365 # Log non-first attempt. | |
| 366 if attempt.attempt: | |
| 367 logging.warning( | |
| 368 'Retrying request %s, attempt %d/%d...', | |
| 369 resource_url, attempt.attempt, max_attempts) | |
| 370 | |
| 371 try: | |
| 372 # Prepare and send a new request. | |
| 373 request = HttpRequest(method, resource_url, query_params, body, | |
| 374 headers, read_timeout, stream) | |
| 375 self.prepare_request(request, attempt.attempt) | |
| 376 response = self.engine.perform_request(request) | |
| 377 logging.debug('Request %s succeeded', request.get_full_url()) | |
| 378 return response | |
| 379 | |
| 380 except (ConnectionError, TimeoutError) as e: | |
| 381 last_error = e | |
| 382 logging.warning( | |
| 383 'Unable to open url %s on attempt %d.\n%s', | |
| 384 request.get_full_url(), attempt.attempt, e.format()) | |
| 385 continue | |
| 386 | |
| 387 except HttpError as e: | |
| 388 last_error = e | |
| 389 | |
| 390 # Access denied -> authenticate. | |
| 391 if e.code in (401, 403): | |
| 392 logging.error( | |
| 393 'Authentication is required for %s on attempt %d.\n%s', | |
| 394 request.get_full_url(), attempt.attempt, e.format()) | |
| 395 # Try to authenticate only once. If it doesn't help, then server does | |
| 396 # not support app engine authentication. | |
| 397 if not auth_attempted: | |
| 398 auth_attempted = True | |
| 399 if self.authenticator and self.authenticator.authenticate(): | |
| 400 # Success! Run request again immediately. | |
| 401 attempt.skip_sleep = True | |
| 402 # Also refresh cookies used by request engine. | |
| 403 self.engine.reload_cookies() | |
| 404 continue | |
| 405 # Authentication attempt was unsuccessful. | |
| 406 logging.error( | |
| 407 'Unable to authenticate to %s.\n%s', | |
| 408 request.get_full_url(), e.format(verbose=True)) | |
| 409 return None | |
| 410 | |
| 411 # Hit a error that can not be retried -> stop retry loop. | |
| 412 if not self.is_transient_http_error(e.code, retry_404, retry_50x): | |
| 413 # This HttpError means we reached the server and there was a problem | |
| 414 # with the request, so don't retry. | |
| 415 logging.error( | |
| 416 'Able to connect to %s but an exception was thrown.\n%s', | |
| 417 request.get_full_url(), e.format(verbose=True)) | |
| 418 return None | |
| 419 | |
| 420 # Retry all other errors. | |
| 421 logging.warning( | |
| 422 'Server responded with error on %s on attempt %d.\n%s', | |
| 423 request.get_full_url(), attempt.attempt, e.format()) | |
| 424 continue | |
| 425 | |
| 426 logging.error( | |
| 427 'Unable to open given url, %s, after %d attempts.\n%s', | |
| 428 request.get_full_url(), max_attempts, last_error.format(verbose=True)) | |
| 429 return None | |
| 430 | |
| 431 def prepare_request(self, request, attempt): # pylint: disable=R0201 | |
| 432 """Modify HttpRequest before sending it by adding COUNT_KEY parameter.""" | |
| 433 # Add COUNT_KEY only on retries. | |
| 434 if self.use_count_key and attempt: | |
| 435 request.params += [(COUNT_KEY, attempt)] | |
| 436 | |
| 437 | |
| 438 class HttpRequest(object): | |
| 439 """Request to HttpService.""" | |
| 440 | |
| 441 def __init__(self, method, url, params, body, headers, timeout, stream): | |
| 442 """Arguments: | |
| 443 |method| - HTTP method to use | |
| 444 |url| - relative URL to the resource, without query parameters | |
| 445 |params| - list of (key, value) pairs to put into GET parameters | |
| 446 |body| - encoded body of the request (None or str) | |
| 447 |headers| - dict with request headers | |
| 448 |timeout| - socket read timeout (None to disable) | |
| 449 |stream| - True to stream response from socket | |
| 450 """ | |
| 451 self.method = method | |
| 452 self.url = url | |
| 453 self.params = params[:] | |
| 454 self.body = body | |
| 455 self.headers = headers.copy() | |
| 456 self.timeout = timeout | |
| 457 self.stream = stream | |
| 458 | |
| 459 def get_full_url(self): | |
| 460 """Resource URL with url-encoded GET parameters.""" | |
| 461 if not self.params: | |
| 462 return self.url | |
| 463 else: | |
| 464 return '%s?%s' % (self.url, urllib.urlencode(self.params)) | |
| 465 | |
| 466 def make_fake_response(self, content=''): | |
| 467 """Makes new fake HttpResponse to this request, useful in tests.""" | |
| 468 return HttpResponse.get_fake_response(content, self.get_full_url()) | |
| 469 | |
| 470 | |
| 471 class HttpResponse(object): | |
| 472 """Response from HttpService.""" | |
| 473 | |
| 474 def __init__(self, stream, url, headers): | |
| 475 self._stream = stream | |
| 476 self._url = url | |
| 477 self._headers = headers | |
| 478 self._read = 0 | |
| 479 | |
| 480 @property | |
| 481 def content_length(self): | |
| 482 """Total length to the response or None if not known in advance.""" | |
| 483 length = self._headers.get('Content-Length') | |
| 484 return int(length) if length is not None else None | |
| 485 | |
| 486 def read(self, size=None): | |
| 487 """Reads up to |size| bytes from the stream and returns them. | |
| 488 | |
| 489 If |size| is None reads all available bytes. | |
| 490 | |
| 491 Raises TimeoutError on read timeout. | |
| 492 """ | |
| 493 try: | |
| 494 # cStringIO has a bug: stream.read(None) is not the same as stream.read(). | |
| 495 data = self._stream.read() if size is None else self._stream.read(size) | |
| 496 self._read += len(data) | |
| 497 return data | |
| 498 except (socket.timeout, ssl.SSLError, requests.Timeout) as e: | |
| 499 logging.error('Timeout while reading from %s, read %d of %s: %s', | |
| 500 self._url, self._read, self.content_length, e) | |
| 501 raise TimeoutError(e) | |
| 502 | |
| 503 @classmethod | |
| 504 def get_fake_response(cls, content, url): | |
| 505 """Returns HttpResponse with predefined content, useful in tests.""" | |
| 506 return cls(StringIO.StringIO(content), | |
| 507 url, {'content-length': len(content)}) | |
| 508 | |
| 509 | |
| 510 class RequestEngine(object): | |
| 511 """Base class for objects that know how to execute HttpRequests.""" | |
| 512 | |
| 513 def perform_request(self, request): | |
| 514 """Sends a HttpRequest to the server and reads back the response. | |
| 515 | |
| 516 Returns HttpResponse. | |
| 517 | |
| 518 Raises: | |
| 519 ConnectionError - failed to establish connection to the server. | |
| 520 TimeoutError - timeout while connecting or reading response. | |
| 521 HttpError - server responded with >= 400 error code. | |
| 522 """ | |
| 523 raise NotImplementedError() | |
| 524 | |
| 525 def reload_cookies(self): | |
| 526 """Reloads cookies from original cookie jar.""" | |
| 527 # This method is optional. | |
| 528 pass | |
| 529 | |
| 530 | |
| 531 class Authenticator(object): | |
| 532 """Base class for objects that know how to authenticate into http services.""" | |
| 533 | |
| 534 def authenticate(self): | |
| 535 """Authenticates in the app engine service.""" | |
| 536 raise NotImplementedError() | |
| 537 | |
| 538 | |
| 539 class Urllib2Engine(RequestEngine): | |
| 540 """Class that knows how to execute HttpRequests via urllib2.""" | |
| 541 | |
| 542 def __init__(self, cookie_jar): | |
| 543 super(Urllib2Engine, self).__init__() | |
| 544 self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie_jar)) | |
| 545 | |
| 546 def perform_request(self, request): | |
| 547 try: | |
| 548 req = self.make_urllib2_request(request) | |
| 549 if request.timeout: | |
| 550 resp = self.opener.open(req, timeout=request.timeout) | |
| 551 else: | |
| 552 resp = self.opener.open(req) | |
| 553 return HttpResponse(resp, req.get_full_url(), resp.headers) | |
| 554 except urllib2.HTTPError as e: | |
| 555 raise HttpError(e.code, e) | |
| 556 except (urllib2.URLError, httplib.HTTPException, | |
| 557 socket.timeout, ssl.SSLError) as e: | |
| 558 raise ConnectionError(e) | |
| 559 | |
| 560 @staticmethod | |
| 561 def make_urllib2_request(request): | |
| 562 """Converts HttpRequest to urllib2.Request.""" | |
| 563 result = urllib2.Request(request.get_full_url(), data=request.body) | |
| 564 for header, value in request.headers.iteritems(): | |
| 565 result.add_header(header, value) | |
| 566 return result | |
| 567 | |
| 568 | |
| 569 class RequestsLibEngine(RequestEngine): | |
| 570 """Class that knows how to execute HttpRequests via requests library.""" | |
| 571 | |
| 572 # Preferred number of connections in a connection pool. | |
| 573 CONNECTION_POOL_SIZE = 64 | |
| 574 # If True will not open more than CONNECTION_POOL_SIZE connections. | |
| 575 CONNECTION_POOL_BLOCK = False | |
| 576 # Maximum number of internal connection retries in a connection pool. | |
| 577 CONNECTION_RETRIES = 0 | |
| 578 | |
| 579 def __init__(self, cookie_jar, ca_certs): | |
| 580 super(RequestsLibEngine, self).__init__() | |
| 581 self.session = requests.Session() | |
| 582 self.cookie_jar = cookie_jar | |
| 583 # Configure session. | |
| 584 self.session.trust_env = False | |
| 585 if cookie_jar: | |
| 586 self.session.cookies = cookie_jar | |
| 587 self.session.verify = ca_certs | |
| 588 # Configure connection pools. | |
| 589 for protocol in ('https://', 'http://'): | |
| 590 self.session.mount(protocol, adapters.HTTPAdapter( | |
| 591 pool_connections=self.CONNECTION_POOL_SIZE, | |
| 592 pool_maxsize=self.CONNECTION_POOL_SIZE, | |
| 593 max_retries=self.CONNECTION_RETRIES, | |
| 594 pool_block=self.CONNECTION_POOL_BLOCK)) | |
| 595 | |
| 596 def perform_request(self, request): | |
| 597 try: | |
| 598 response = self.session.request( | |
| 599 method=request.method, | |
| 600 url=request.url, | |
| 601 params=request.params, | |
| 602 data=request.body, | |
| 603 headers=request.headers, | |
| 604 timeout=request.timeout, | |
| 605 stream=request.stream) | |
| 606 response.raise_for_status() | |
| 607 if request.stream: | |
| 608 stream = response.raw | |
| 609 else: | |
| 610 stream = StringIO.StringIO(response.content) | |
| 611 return HttpResponse(stream, request.get_full_url(), response.headers) | |
| 612 except requests.Timeout as e: | |
| 613 raise TimeoutError(e) | |
| 614 except requests.HTTPError as e: | |
| 615 raise HttpError(e.response.status_code, e) | |
| 616 except (requests.ConnectionError, socket.timeout, ssl.SSLError) as e: | |
| 617 raise ConnectionError(e) | |
| 618 | |
| 619 def reload_cookies(self): | |
| 620 if self.cookie_jar: | |
| 621 self.session.cookies = self.cookie_jar | |
| 622 | |
| 623 | |
| 624 class AppEngineAuthenticator(Authenticator): | |
| 625 """Helper class to perform AppEngine authentication dance via upload.py.""" | |
| 626 | |
| 627 # This lock ensures that user won't be confused with multiple concurrent | |
| 628 # login prompts. | |
| 629 _auth_lock = threading.Lock() | |
| 630 | |
| 631 def __init__(self, urlhost, cookie_jar, email=None, password=None): | |
| 632 super(AppEngineAuthenticator, self).__init__() | |
| 633 self.urlhost = urlhost | |
| 634 self.cookie_jar = cookie_jar | |
| 635 self.email = email | |
| 636 self.password = password | |
| 637 self._keyring = None | |
| 638 | |
| 639 def authenticate(self): | |
| 640 """Authenticates in the app engine application. | |
| 641 | |
| 642 Mutates |self.cookie_jar| in place by adding all required cookies. | |
| 643 | |
| 644 Returns True on success. | |
| 645 """ | |
| 646 # To be used from inside AuthServer. | |
| 647 cookie_jar = self.cookie_jar | |
| 648 # RPC server that uses AuthenticationSupport's cookie jar. | |
| 649 class AuthServer(upload.AbstractRpcServer): | |
| 650 def _GetOpener(self): | |
| 651 # Authentication code needs to know about 302 response. | |
| 652 # So make OpenerDirector without HTTPRedirectHandler. | |
| 653 opener = urllib2.OpenerDirector() | |
| 654 opener.add_handler(urllib2.ProxyHandler()) | |
| 655 opener.add_handler(urllib2.UnknownHandler()) | |
| 656 opener.add_handler(urllib2.HTTPHandler()) | |
| 657 opener.add_handler(urllib2.HTTPDefaultErrorHandler()) | |
| 658 opener.add_handler(urllib2.HTTPSHandler()) | |
| 659 opener.add_handler(urllib2.HTTPErrorProcessor()) | |
| 660 opener.add_handler(urllib2.HTTPCookieProcessor(cookie_jar)) | |
| 661 return opener | |
| 662 def PerformAuthentication(self): | |
| 663 self._Authenticate() | |
| 664 return self.authenticated | |
| 665 with cookie_jar: | |
| 666 with self._auth_lock: | |
| 667 rpc_server = AuthServer(self.urlhost, self.get_credentials) | |
| 668 return rpc_server.PerformAuthentication() | |
| 669 | |
| 670 def get_credentials(self): | |
| 671 """Called during authentication process to get the credentials. | |
| 672 May be called multiple times if authentication fails. | |
| 673 Returns tuple (email, password). | |
| 674 """ | |
| 675 if self.email and self.password: | |
| 676 return (self.email, self.password) | |
| 677 self._keyring = self._keyring or upload.KeyringCreds(self.urlhost, | |
| 678 self.urlhost, self.email) | |
| 679 return self._keyring.GetUserCredentials() | |
| 680 | |
| 681 | |
| 682 class ThreadSafeCookieJar(cookielib.MozillaCookieJar): | |
| 683 """MozillaCookieJar with thread safe load and save.""" | |
| 684 | |
| 685 def __enter__(self): | |
| 686 """Context manager interface.""" | |
| 687 return self | |
| 688 | |
| 689 def __exit__(self, *_args): | |
| 690 """Saves cookie jar when exiting the block.""" | |
| 691 self.save() | |
| 692 return False | |
| 693 | |
| 694 def load(self, filename=None, ignore_discard=False, ignore_expires=False): | |
| 695 """Loads cookies from the file if it exists.""" | |
| 696 filename = os.path.expanduser(filename or self.filename) | |
| 697 with self._cookies_lock: | |
| 698 if os.path.exists(filename): | |
| 699 try: | |
| 700 cookielib.MozillaCookieJar.load( | |
| 701 self, filename, ignore_discard, ignore_expires) | |
| 702 logging.debug('Loaded cookies from %s', filename) | |
| 703 except (cookielib.LoadError, IOError): | |
| 704 pass | |
| 705 else: | |
| 706 try: | |
| 707 fd = os.open(filename, os.O_CREAT, 0600) | |
| 708 os.close(fd) | |
| 709 except OSError: | |
| 710 logging.debug('Failed to create %s', filename) | |
| 711 try: | |
| 712 os.chmod(filename, 0600) | |
| 713 except OSError: | |
| 714 logging.debug('Failed to fix mode for %s', filename) | |
| 715 | |
| 716 def save(self, filename=None, ignore_discard=False, ignore_expires=False): | |
| 717 """Saves cookies to the file, completely overwriting it.""" | |
| 718 logging.debug('Saving cookies to %s', filename or self.filename) | |
| 719 with self._cookies_lock: | |
| 720 try: | |
| 721 cookielib.MozillaCookieJar.save( | |
| 722 self, filename, ignore_discard, ignore_expires) | |
| 723 except OSError: | |
| 724 logging.error('Failed to save %s', filename) | |
| 725 | |
| 726 | |
| 727 class RetryAttempt(object): | |
| 728 """Contains information about current retry attempt. | |
| 729 | |
| 730 Yielded from retry_loop. | |
| 731 """ | |
| 732 | |
| 733 def __init__(self, attempt, remaining): | |
| 734 """Information about current attempt in retry loop: | |
| 735 |attempt| - zero based index of attempt. | |
| 736 |remaining| - how much time is left before retry loop finishes retries. | |
| 737 """ | |
| 738 self.attempt = attempt | |
| 739 self.remaining = remaining | |
| 740 self.skip_sleep = False | |
| 741 | |
| 742 | |
| 743 def calculate_sleep_before_retry(attempt, max_duration): | |
| 744 """How long to sleep before retrying an attempt in retry_loop.""" | |
| 745 # Maximum sleeping time. We're hammering a cloud-distributed service, it'll | |
| 746 # survive. | |
| 747 MAX_SLEEP = 10. | |
| 748 # random.random() returns [0.0, 1.0). Starts with relatively short waiting | |
| 749 # time by starting with 1.5/2+1.5^-1 median offset. | |
| 750 duration = (random.random() * 1.5) + math.pow(1.5, (attempt - 1)) | |
| 751 assert duration > 0.1 | |
| 752 duration = min(MAX_SLEEP, duration) | |
| 753 if max_duration: | |
| 754 duration = min(max_duration, duration) | |
| 755 return duration | |
| 756 | |
| 757 | |
| 758 def sleep_before_retry(attempt, max_duration): | |
| 759 """Sleeps for some amount of time when retrying the attempt in retry_loop. | |
| 760 | |
| 761 To be mocked in tests. | |
| 762 """ | |
| 763 time.sleep(calculate_sleep_before_retry(attempt, max_duration)) | |
| 764 | |
| 765 | |
| 766 def current_time(): | |
| 767 """Used by retry loop to get current time. | |
| 768 | |
| 769 To be mocked in tests. | |
| 770 """ | |
| 771 return time.time() | |
| 772 | |
| 773 | |
| 774 def retry_loop(max_attempts=None, timeout=None): | |
| 775 """Yields whenever new attempt to perform some action is needed. | |
| 776 | |
| 777 Yields instances of RetryAttempt class that contains information about current | |
| 778 attempt. Setting |skip_sleep| attribute of RetryAttempt to True will cause | |
| 779 retry loop to run next attempt immediately. | |
| 780 """ | |
| 781 start = current_time() | |
| 782 for attempt in itertools.count(): | |
| 783 # Too many attempts? | |
| 784 if max_attempts and attempt == max_attempts: | |
| 785 break | |
| 786 # Retried for too long? | |
| 787 remaining = (timeout - (current_time() - start)) if timeout else None | |
| 788 if remaining is not None and remaining < 0: | |
| 789 break | |
| 790 # Kick next iteration. | |
| 791 attemp_obj = RetryAttempt(attempt, remaining) | |
| 792 yield attemp_obj | |
| 793 if attemp_obj.skip_sleep: | |
| 794 continue | |
| 795 # Only sleep if we are going to try again. | |
| 796 if max_attempts and attempt != max_attempts - 1: | |
| 797 remaining = (timeout - (current_time() - start)) if timeout else None | |
| 798 if remaining is not None and remaining < 0: | |
| 799 break | |
| 800 sleep_before_retry(attempt, remaining) | |
| OLD | NEW |