utils/net.py - Issue 23431002: [Abandoned] Move url_open with dependencies to utils.net module.

Side by Side Diff: utils/net.py

Issue 23431002: [Abandoned] Move url_open with dependencies to utils.net module. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/swarm_client

Patch Set: Created 7 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 # Copyright 2013 The Chromium Authors. All rights reserved.

	2 # Use of this source code is governed by a BSD-style license that can be

	3 # found in the LICENSE file.

	4

	5 """Classes and functions for generic network communication over HTTP."""

	6

	7 import cookielib

	8 import cStringIO as StringIO

	9 import httplib

	10 import itertools

	11 import logging

	12 import math

	13 import os

	14 import random

	15 import socket

	16 import ssl

	17 import threading

	18 import time

	19 import urllib

	20 import urllib2

	21 import urlparse

	22

	23 from third_party.rietveld import upload

	24

	25 # Hack out upload logging.info()

	26 upload.logging = logging.getLogger('upload')

	27 # Mac pylint choke on this line.

	28 upload.logging.setLevel(logging.WARNING) # pylint: disable=E1103

	29

	30

	31 # The name of the key to store the count of url attempts.

	32 COUNT_KEY = 'UrlOpenAttempt'

	33

	34 # Default maximum number of attempts to trying opening a url before aborting.

	35 URL_OPEN_MAX_ATTEMPTS = 30

	36

	37 # Default timeout when retrying.

	38 URL_OPEN_TIMEOUT = 6*60.

	39

	40

	41 # Global (for now) map: server URL (http://example.com) -> HttpService instance.

	42 # Used by get_http_service to cache HttpService instances.

	43 _http_services = {}

	44 _http_services_lock = threading.Lock()

	45

	46

	47 class TimeoutError(IOError):

	48 """Timeout while reading HTTP response."""

	49

	50 def __init__(self, inner_exc=None):

	51 super(TimeoutError, self).__init__(str(inner_exc or 'Timeout'))

	52 self.inner_exc = inner_exc

	53

	54

	55 def url_open(url, **kwargs):

	56 """Attempts to open the given url multiple times.

	57

	58 \|data\| can be either:

	59 -None for a GET request

	60 -str for pre-encoded data

	61 -list for data to be encoded

	62 -dict for data to be encoded (COUNT_KEY will be added in this case)

	63

	64 Returns HttpResponse object, where the response may be read from, or None

	65 if it was unable to connect.

	66 """

	67 urlhost, urlpath = split_server_request_url(url)

	68 service = get_http_service(urlhost)

	69 return service.request(urlpath, **kwargs)

	70

	71

	72 def url_read(url, **kwargs):

	73 """Attempts to open the given url multiple times and read all data from it.

	74

	75 Accepts same arguments as url_open function.

	76

	77 Returns all data read or None if it was unable to connect or read the data.

	78 """

	79 response = url_open(url, **kwargs)

	80 if not response:

	81 return None

	82 try:

	83 return response.read()

	84 except TimeoutError:

	85 return None

	86

	87

	88 def split_server_request_url(url):

	89 """Splits the url into scheme+netloc and path+params+query+fragment."""

	90 url_parts = list(urlparse.urlparse(url))

	91 urlhost = '%s://%s' % (url_parts[0], url_parts[1])

	92 urlpath = urlparse.urlunparse(['', ''] + url_parts[2:])

	93 return urlhost, urlpath

	94

	95

	96 def get_http_service(urlhost):

	97 """Returns existing or creates new instance of HttpService that can send

	98 requests to given base urlhost.

	99 """

	100 # Ensure consistency.

	101 urlhost = str(urlhost).lower().rstrip('/')

	102 with _http_services_lock:

	103 service = _http_services.get(urlhost)

	104 if not service:

	105 service = AppEngineService(urlhost)

	106 _http_services[urlhost] = service

	107 return service

	108

	109

	110 class HttpService(object):

	111 """Base class for a class that provides an API to HTTP based service:

	112 - Provides 'request' method.

	113 - Supports automatic request retries.

	114 - Supports persistent cookies.

	115 - Thread safe.

	116 """

	117

	118 # File to use to store all auth cookies.

	119 COOKIE_FILE = os.path.join(os.path.expanduser('~'), '.isolated_cookies')

	120

	121 # CookieJar reused by all services + lock that protects its instantiation.

	122 _cookie_jar = None

	123 _cookie_jar_lock = threading.Lock()

	124

	125 def __init__(self, urlhost):

	126 self.urlhost = urlhost

	127 self.cookie_jar = self.load_cookie_jar()

	128 self.opener = self.create_url_opener()

	129

	130 def authenticate(self): # pylint: disable=R0201

	131 """Called when HTTP server asks client to authenticate.

	132 Can be implemented in subclasses.

	133 """

	134 return False

	135

	136 @staticmethod

	137 def load_cookie_jar():

	138 """Returns global CoookieJar object that stores cookies in the file."""

	139 with HttpService._cookie_jar_lock:

	140 if HttpService._cookie_jar is not None:

	141 return HttpService._cookie_jar

	142 jar = ThreadSafeCookieJar(HttpService.COOKIE_FILE)

	143 jar.load()

	144 HttpService._cookie_jar = jar

	145 return jar

	146

	147 @staticmethod

	148 def save_cookie_jar():

	149 """Called when cookie jar needs to be flushed to disk."""

	150 with HttpService._cookie_jar_lock:

	151 if HttpService._cookie_jar is not None:

	152 HttpService._cookie_jar.save()

	153

	154 def create_url_opener(self): # pylint: disable=R0201

	155 """Returns OpenerDirector that will be used when sending requests.

	156 Can be reimplemented in subclasses."""

	157 return urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cookie_jar))

	158

	159 def request(self, urlpath, data=None, content_type=None, **kwargs):

	160 """Attempts to open the given url multiple times.

	161

	162 \|urlpath\| is relative to the server root, i.e. '/some/request?param=1'.

	163

	164 \|data\| can be either:

	165 -None for a GET request

	166 -str for pre-encoded data

	167 -list for data to be encoded

	168 -dict for data to be encoded (COUNT_KEY will be added in this case)

	169

	170 Returns a file-like object, where the response may be read from, or None

	171 if it was unable to connect.

	172 """

	173 assert urlpath and urlpath[0] == '/'

	174

	175 if isinstance(data, dict) and COUNT_KEY in data:

	176 logging.error('%s already existed in the data passed into UlrOpen. It '

	177 'would be overwritten. Aborting UrlOpen', COUNT_KEY)

	178 return None

	179

	180 method = 'GET' if data is None else 'POST'

	181 assert not ((method != 'POST') and content_type), (

	182 'Can\'t use content_type on GET')

	183

	184 def make_request(extra):

	185 """Returns a urllib2.Request instance for this specific retry."""

	186 if isinstance(data, str) or data is None:

	187 payload = data

	188 else:

	189 if isinstance(data, dict):

	190 payload = data.items()

	191 else:

	192 payload = data[:]

	193 payload.extend(extra.iteritems())

	194 payload = urllib.urlencode(payload)

	195 new_url = urlparse.urljoin(self.urlhost, urlpath[1:])

	196 if isinstance(data, str) or data is None:

	197 # In these cases, add the extra parameter to the query part of the url.

	198 url_parts = list(urlparse.urlparse(new_url))

	199 # Append the query parameter.

	200 if url_parts[4] and extra:

	201 url_parts[4] += '&'

	202 url_parts[4] += urllib.urlencode(extra)

	203 new_url = urlparse.urlunparse(url_parts)

	204 request = urllib2.Request(new_url, data=payload)

	205 if payload is not None:

	206 if content_type:

	207 request.add_header('Content-Type', content_type)

	208 request.add_header('Content-Length', len(payload))

	209 return request

	210

	211 return self._retry_loop(make_request, **kwargs)

	212

	213 def _retry_loop(

	214 self,

	215 make_request,

	216 max_attempts=URL_OPEN_MAX_ATTEMPTS,

	217 retry_404=False,

	218 retry_50x=True,

	219 timeout=URL_OPEN_TIMEOUT,

	220 read_timeout=None):

	221 """Runs internal request-retry loop.

	222

	223 - Optionally retries HTTP 404 and 50x.

	224 - Retries up to \|max_attempts\| times. If None or 0, there's no limit in the

	225 number of retries.

	226 - Retries up to \|timeout\| duration in seconds. If None or 0, there's no

	227 limit in the time taken to do retries.

	228 - If both \|max_attempts\| and \|timeout\| are None or 0, this functions retries

	229 indefinitely.

	230

	231 If \|read_timeout\| is not None will configure underlying socket to

	232 raise TimeoutError exception whenever there's no response from the server

	233 for more than \|read_timeout\| seconds. It can happen during any read

	234 operation so once you pass non-None \|read_timeout\| be prepared to handle

	235 these exceptions in subsequent reads from the stream.

	236 """

	237 authenticated = False

	238 last_error = None

	239 attempt = 0

	240 start = self._now()

	241 for attempt in itertools.count():

	242 if max_attempts and attempt >= max_attempts:

	243 # Too many attempts.

	244 break

	245 if timeout and (self._now() - start) >= timeout:

	246 # Retried for too long.

	247 break

	248 extra = {COUNT_KEY: attempt} if attempt else {}

	249 request = make_request(extra)

	250 try:

	251 url_response = self._url_open(request, timeout=read_timeout)

	252 logging.debug('url_open(%s) succeeded', request.get_full_url())

	253 # Some tests mock url_open to return StringIO without 'headers'.

	254 return HttpResponse(url_response, request.get_full_url(),

	255 getattr(url_response, 'headers', {}))

	256 except urllib2.HTTPError as e:

	257 # Unauthorized. Ask to authenticate and then try again.

	258 if e.code in (401, 403):

	259 # Try to authenticate only once. If it doesn't help, then server does

	260 # not support app engine authentication.

	261 logging.error(

	262 'Authentication is required for %s on attempt %d.\n%s',

	263 request.get_full_url(), attempt,

	264 self._format_exception(e, verbose=True))

	265 if not authenticated and self.authenticate():

	266 authenticated = True

	267 # Do not sleep.

	268 continue

	269 # If authentication failed, return.

	270 logging.error(

	271 'Unable to authenticate to %s.\n%s',

	272 request.get_full_url(), self._format_exception(e, verbose=True))

	273 return None

	274

	275 if ((e.code < 500 and not (retry_404 and e.code == 404)) or

	276 (e.code >= 500 and not retry_50x)):

	277 # This HTTPError means we reached the server and there was a problem

	278 # with the request, so don't retry.

	279 logging.error(

	280 'Able to connect to %s but an exception was thrown.\n%s',

	281 request.get_full_url(), self._format_exception(e, verbose=True))

	282 return None

	283

	284 # The HTTPError was due to a server error, so retry the attempt.

	285 logging.warning('Able to connect to %s on attempt %d.\n%s',

	286 request.get_full_url(), attempt,

	287 self._format_exception(e))

	288 last_error = e

	289

	290 except (urllib2.URLError, httplib.HTTPException,

	291 socket.timeout, ssl.SSLError) as e:

	292 logging.warning('Unable to open url %s on attempt %d.\n%s',

	293 request.get_full_url(), attempt,

	294 self._format_exception(e))

	295 last_error = e

	296

	297 # Only sleep if we are going to try again.

	298 if max_attempts and attempt != max_attempts:

	299 remaining = None

	300 if timeout:

	301 remaining = timeout - (self._now() - start)

	302 if remaining <= 0:

	303 break

	304 self.sleep_before_retry(attempt, remaining)

	305

	306 logging.error('Unable to open given url, %s, after %d attempts.\n%s',

	307 request.get_full_url(), max_attempts,

	308 self._format_exception(last_error, verbose=True))

	309 return None

	310

	311 def _url_open(self, request, timeout=None):

	312 """Low level method to execute urllib2.Request's.

	313

	314 To be mocked in tests.

	315 """

	316 if timeout is not None:

	317 return self.opener.open(request, timeout=timeout)

	318 else:

	319 # Leave original default value for \|timeout\|. It's nontrivial.

	320 return self.opener.open(request)

	321

	322 @staticmethod

	323 def _now():

	324 """To be mocked in tests."""

	325 return time.time()

	326

	327 @staticmethod

	328 def calculate_sleep_before_retry(attempt, max_duration):

	329 # Maximum sleeping time. We're hammering a cloud-distributed service, it'll

	330 # survive.

	331 MAX_SLEEP = 10.

	332 # random.random() returns [0.0, 1.0). Starts with relatively short waiting

	333 # time by starting with 1.5/2+1.5^-1 median offset.

	334 duration = (random.random() * 1.5) + math.pow(1.5, (attempt - 1))

	335 assert duration > 0.1

	336 duration = min(MAX_SLEEP, duration)

	337 if max_duration:

	338 duration = min(max_duration, duration)

	339 return duration

	340

	341 @classmethod

	342 def sleep_before_retry(cls, attempt, max_duration):

	343 """Sleeps for some amount of time when retrying the request.

	344

	345 To be mocked in tests.

	346 """

	347 time.sleep(cls.calculate_sleep_before_retry(attempt, max_duration))

	348

	349 @staticmethod

	350 def _format_exception(exc, verbose=False):

	351 """Given an instance of some exception raised by urlopen returns human

	352 readable piece of text with detailed information about the error.

	353 """

	354 out = ['Exception: %s' % (exc,)]

	355 if verbose:

	356 if isinstance(exc, urllib2.HTTPError):

	357 out.append('-' * 10)

	358 if exc.hdrs:

	359 for header, value in exc.hdrs.items():

	360 if not header.startswith('x-'):

	361 out.append('%s: %s' % (header.capitalize(), value))

	362 out.append('')

	363 out.append(exc.read() or '<empty body>')

	364 out.append('-' * 10)

	365 return '\n'.join(out)

	366

	367

	368 class HttpResponse(object):

	369 """Response from HttpService."""

	370

	371 def __init__(self, stream, url, headers):

	372 self._stream = stream

	373 self._url = url

	374 self._headers = headers

	375 self._read = 0

	376

	377 @property

	378 def content_length(self):

	379 """Total length to the response or None if not known in advance."""

	380 length = self._headers.get('Content-Length')

	381 return int(length) if length is not None else None

	382

	383 def read(self, size=None):

	384 """Reads up to \|size\| bytes from the stream and returns them.

	385

	386 If \|size\| is None reads all available bytes.

	387

	388 Raises TimeoutError on read timeout.

	389 """

	390 try:

	391 # cStringIO has a bug: stream.read(None) is not the same as stream.read().

	392 data = self._stream.read() if size is None else self._stream.read(size)

	393 self._read += len(data)

	394 return data

	395 except (socket.timeout, ssl.SSLError) as e:

	396 logging.error('Timeout while reading from %s, read %d of %s: %s',

	397 self._url, self._read, self.content_length, e)

	398 raise TimeoutError(e)

	399

	400 @classmethod

	401 def get_fake_response(cls, content, url):

	402 """Returns HttpResponse with predefined content, useful in tests."""

	403 return cls(StringIO.StringIO(content),

	404 url, {'content-length': len(content)})

	405

	406

	407

	408 class AppEngineService(HttpService):

	409 """This class implements authentication support for

	410 an app engine based services.

	411 """

	412

	413 # This lock ensures that user won't be confused with multiple concurrent

	414 # login prompts.

	415 _auth_lock = threading.Lock()

	416

	417 def __init__(self, urlhost, email=None, password=None):

	418 super(AppEngineService, self).__init__(urlhost)

	419 self.email = email

	420 self.password = password

	421 self._keyring = None

	422

	423 def authenticate(self):

	424 """Authenticates in the app engine application.

	425 Returns True on success.

	426 """

	427 if not upload:

	428 logging.error('\'upload\' module is missing, '

	429 'app engine authentication is disabled.')

	430 return False

	431 cookie_jar = self.cookie_jar

	432 save_cookie_jar = self.save_cookie_jar

	433 # RPC server that uses AuthenticationSupport's cookie jar.

	434 class AuthServer(upload.AbstractRpcServer):

	435 def _GetOpener(self):

	436 # Authentication code needs to know about 302 response.

	437 # So make OpenerDirector without HTTPRedirectHandler.

	438 opener = urllib2.OpenerDirector()

	439 opener.add_handler(urllib2.ProxyHandler())

	440 opener.add_handler(urllib2.UnknownHandler())

	441 opener.add_handler(urllib2.HTTPHandler())

	442 opener.add_handler(urllib2.HTTPDefaultErrorHandler())

	443 opener.add_handler(urllib2.HTTPSHandler())

	444 opener.add_handler(urllib2.HTTPErrorProcessor())

	445 opener.add_handler(urllib2.HTTPCookieProcessor(cookie_jar))

	446 return opener

	447 def PerformAuthentication(self):

	448 self._Authenticate()

	449 save_cookie_jar()

	450 return self.authenticated

	451 with AppEngineService._auth_lock:

	452 rpc_server = AuthServer(self.urlhost, self.get_credentials)

	453 return rpc_server.PerformAuthentication()

	454

	455 def get_credentials(self):

	456 """Called during authentication process to get the credentials.

	457 May be called mutliple times if authentication fails.

	458 Returns tuple (email, password).

	459 """

	460 # 'authenticate' calls this only if 'upload' is present.

	461 # Ensure other callers (if any) fail non-cryptically if 'upload' is missing.

	462 assert upload, '\'upload\' module is required for this to work'

	463 if self.email and self.password:

	464 return (self.email, self.password)

	465 if not self._keyring:

	466 self._keyring = upload.KeyringCreds(self.urlhost,

	467 self.urlhost,

	468 self.email)

	469 return self._keyring.GetUserCredentials()

	470

	471

	472 class ThreadSafeCookieJar(cookielib.MozillaCookieJar):

	473 """MozillaCookieJar with thread safe load and save."""

	474

	475 def load(self, filename=None, ignore_discard=False, ignore_expires=False):

	476 """Loads cookies from the file if it exists."""

	477 filename = os.path.expanduser(filename or self.filename)

	478 with self._cookies_lock:

	479 if os.path.exists(filename):

	480 try:

	481 cookielib.MozillaCookieJar.load(self, filename,

	482 ignore_discard,

	483 ignore_expires)

	484 logging.debug('Loaded cookies from %s', filename)

	485 except (cookielib.LoadError, IOError):

	486 pass

	487 else:

	488 try:

	489 fd = os.open(filename, os.O_CREAT, 0600)

	490 os.close(fd)

	491 except OSError:

	492 logging.error('Failed to create %s', filename)

	493 try:

	494 os.chmod(filename, 0600)

	495 except OSError:

	496 logging.error('Failed to fix mode for %s', filename)

	497

	498 def save(self, filename=None, ignore_discard=False, ignore_expires=False):

	499 """Saves cookies to the file, completely overwriting it."""

	500 logging.debug('Saving cookies to %s', filename or self.filename)

	501 with self._cookies_lock:

	502 try:

	503 cookielib.MozillaCookieJar.save(self, filename,

	504 ignore_discard,

	505 ignore_expires)

	506 except OSError:

	507 logging.error('Failed to save %s', filename)

OLD	NEW

« no previous file with comments | « tests/url_open_timeout_test.py ('k') | no next file » | no next file with comments »