OLD | NEW |
(Empty) | |
| 1 from __future__ import generators |
| 2 """ |
| 3 httplib2 |
| 4 |
| 5 A caching http interface that supports ETags and gzip |
| 6 to conserve bandwidth. |
| 7 |
| 8 Requires Python 2.3 or later |
| 9 |
| 10 Changelog: |
| 11 2007-08-18, Rick: Modified so it's able to use a socks proxy if needed. |
| 12 |
| 13 """ |
| 14 |
| 15 __author__ = "Joe Gregorio (joe@bitworking.org)" |
| 16 __copyright__ = "Copyright 2006, Joe Gregorio" |
| 17 __contributors__ = ["Thomas Broyer (t.broyer@ltgt.net)", |
| 18 "James Antill", |
| 19 "Xavier Verges Farrero", |
| 20 "Jonathan Feinberg", |
| 21 "Blair Zajac", |
| 22 "Sam Ruby", |
| 23 "Louis Nyffenegger"] |
| 24 __license__ = "MIT" |
| 25 __version__ = "0.7.5" |
| 26 |
| 27 import re |
| 28 import sys |
| 29 import email |
| 30 import email.Utils |
| 31 import email.Message |
| 32 import email.FeedParser |
| 33 import StringIO |
| 34 import gzip |
| 35 import zlib |
| 36 import httplib |
| 37 import urlparse |
| 38 import urllib |
| 39 import base64 |
| 40 import os |
| 41 import copy |
| 42 import calendar |
| 43 import time |
| 44 import random |
| 45 import errno |
| 46 try: |
| 47 from hashlib import sha1 as _sha, md5 as _md5 |
| 48 except ImportError: |
| 49 # prior to Python 2.5, these were separate modules |
| 50 import sha |
| 51 import md5 |
| 52 _sha = sha.new |
| 53 _md5 = md5.new |
| 54 import hmac |
| 55 from gettext import gettext as _ |
| 56 import socket |
| 57 |
| 58 try: |
| 59 from httplib2 import socks |
| 60 except ImportError: |
| 61 try: |
| 62 import socks |
| 63 except ImportError: |
| 64 socks = None |
| 65 |
| 66 # Build the appropriate socket wrapper for ssl |
| 67 try: |
| 68 import ssl # python 2.6 |
| 69 ssl_SSLError = ssl.SSLError |
| 70 def _ssl_wrap_socket(sock, key_file, cert_file, |
| 71 disable_validation, ca_certs): |
| 72 if disable_validation: |
| 73 cert_reqs = ssl.CERT_NONE |
| 74 else: |
| 75 cert_reqs = ssl.CERT_REQUIRED |
| 76 # We should be specifying SSL version 3 or TLS v1, but the ssl module |
| 77 # doesn't expose the necessary knobs. So we need to go with the default |
| 78 # of SSLv23. |
| 79 return ssl.wrap_socket(sock, keyfile=key_file, certfile=cert_file, |
| 80 cert_reqs=cert_reqs, ca_certs=ca_certs) |
| 81 except (AttributeError, ImportError): |
| 82 ssl_SSLError = None |
| 83 def _ssl_wrap_socket(sock, key_file, cert_file, |
| 84 disable_validation, ca_certs): |
| 85 if not disable_validation: |
| 86 raise CertificateValidationUnsupported( |
| 87 "SSL certificate validation is not supported without " |
| 88 "the ssl module installed. To avoid this error, install " |
| 89 "the ssl module, or explicity disable validation.") |
| 90 ssl_sock = socket.ssl(sock, key_file, cert_file) |
| 91 return httplib.FakeSocket(sock, ssl_sock) |
| 92 |
| 93 |
| 94 if sys.version_info >= (2,3): |
| 95 from iri2uri import iri2uri |
| 96 else: |
| 97 def iri2uri(uri): |
| 98 return uri |
| 99 |
| 100 def has_timeout(timeout): # python 2.6 |
| 101 if hasattr(socket, '_GLOBAL_DEFAULT_TIMEOUT'): |
| 102 return (timeout is not None and timeout is not socket._GLOBAL_DEFAULT_TI
MEOUT) |
| 103 return (timeout is not None) |
| 104 |
| 105 __all__ = ['Http', 'Response', 'ProxyInfo', 'HttpLib2Error', |
| 106 'RedirectMissingLocation', 'RedirectLimit', 'FailedToDecompressContent', |
| 107 'UnimplementedDigestAuthOptionError', 'UnimplementedHmacDigestAuthOptionError'
, |
| 108 'debuglevel', 'ProxiesUnavailableError'] |
| 109 |
| 110 |
| 111 # The httplib debug level, set to a non-zero value to get debug output |
| 112 debuglevel = 0 |
| 113 |
| 114 # A request will be tried 'RETRIES' times if it fails at the socket/connection l
evel. |
| 115 RETRIES = 2 |
| 116 |
| 117 # Python 2.3 support |
| 118 if sys.version_info < (2,4): |
| 119 def sorted(seq): |
| 120 seq.sort() |
| 121 return seq |
| 122 |
| 123 # Python 2.3 support |
| 124 def HTTPResponse__getheaders(self): |
| 125 """Return list of (header, value) tuples.""" |
| 126 if self.msg is None: |
| 127 raise httplib.ResponseNotReady() |
| 128 return self.msg.items() |
| 129 |
| 130 if not hasattr(httplib.HTTPResponse, 'getheaders'): |
| 131 httplib.HTTPResponse.getheaders = HTTPResponse__getheaders |
| 132 |
| 133 # All exceptions raised here derive from HttpLib2Error |
| 134 class HttpLib2Error(Exception): pass |
| 135 |
| 136 # Some exceptions can be caught and optionally |
| 137 # be turned back into responses. |
| 138 class HttpLib2ErrorWithResponse(HttpLib2Error): |
| 139 def __init__(self, desc, response, content): |
| 140 self.response = response |
| 141 self.content = content |
| 142 HttpLib2Error.__init__(self, desc) |
| 143 |
| 144 class RedirectMissingLocation(HttpLib2ErrorWithResponse): pass |
| 145 class RedirectLimit(HttpLib2ErrorWithResponse): pass |
| 146 class FailedToDecompressContent(HttpLib2ErrorWithResponse): pass |
| 147 class UnimplementedDigestAuthOptionError(HttpLib2ErrorWithResponse): pass |
| 148 class UnimplementedHmacDigestAuthOptionError(HttpLib2ErrorWithResponse): pass |
| 149 |
| 150 class MalformedHeader(HttpLib2Error): pass |
| 151 class RelativeURIError(HttpLib2Error): pass |
| 152 class ServerNotFoundError(HttpLib2Error): pass |
| 153 class ProxiesUnavailableError(HttpLib2Error): pass |
| 154 class CertificateValidationUnsupported(HttpLib2Error): pass |
| 155 class SSLHandshakeError(HttpLib2Error): pass |
| 156 class NotSupportedOnThisPlatform(HttpLib2Error): pass |
| 157 class CertificateHostnameMismatch(SSLHandshakeError): |
| 158 def __init__(self, desc, host, cert): |
| 159 HttpLib2Error.__init__(self, desc) |
| 160 self.host = host |
| 161 self.cert = cert |
| 162 |
| 163 # Open Items: |
| 164 # ----------- |
| 165 # Proxy support |
| 166 |
| 167 # Are we removing the cached content too soon on PUT (only delete on 200 Maybe?) |
| 168 |
| 169 # Pluggable cache storage (supports storing the cache in |
| 170 # flat files by default. We need a plug-in architecture |
| 171 # that can support Berkeley DB and Squid) |
| 172 |
| 173 # == Known Issues == |
| 174 # Does not handle a resource that uses conneg and Last-Modified but no ETag as a
cache validator. |
| 175 # Does not handle Cache-Control: max-stale |
| 176 # Does not use Age: headers when calculating cache freshness. |
| 177 |
| 178 |
| 179 # The number of redirections to follow before giving up. |
| 180 # Note that only GET redirects are automatically followed. |
| 181 # Will also honor 301 requests by saving that info and never |
| 182 # requesting that URI again. |
| 183 DEFAULT_MAX_REDIRECTS = 5 |
| 184 |
| 185 # Default CA certificates file bundled with httplib2. |
| 186 CA_CERTS = os.path.join( |
| 187 os.path.dirname(os.path.abspath(__file__ )), "cacerts.txt") |
| 188 |
| 189 # Which headers are hop-by-hop headers by default |
| 190 HOP_BY_HOP = ['connection', 'keep-alive', 'proxy-authenticate', 'proxy-authoriza
tion', 'te', 'trailers', 'transfer-encoding', 'upgrade'] |
| 191 |
| 192 def _get_end2end_headers(response): |
| 193 hopbyhop = list(HOP_BY_HOP) |
| 194 hopbyhop.extend([x.strip() for x in response.get('connection', '').split(','
)]) |
| 195 return [header for header in response.keys() if header not in hopbyhop] |
| 196 |
| 197 URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?") |
| 198 |
| 199 def parse_uri(uri): |
| 200 """Parses a URI using the regex given in Appendix B of RFC 3986. |
| 201 |
| 202 (scheme, authority, path, query, fragment) = parse_uri(uri) |
| 203 """ |
| 204 groups = URI.match(uri).groups() |
| 205 return (groups[1], groups[3], groups[4], groups[6], groups[8]) |
| 206 |
| 207 def urlnorm(uri): |
| 208 (scheme, authority, path, query, fragment) = parse_uri(uri) |
| 209 if not scheme or not authority: |
| 210 raise RelativeURIError("Only absolute URIs are allowed. uri = %s" % uri) |
| 211 authority = authority.lower() |
| 212 scheme = scheme.lower() |
| 213 if not path: |
| 214 path = "/" |
| 215 # Could do syntax based normalization of the URI before |
| 216 # computing the digest. See Section 6.2.2 of Std 66. |
| 217 request_uri = query and "?".join([path, query]) or path |
| 218 scheme = scheme.lower() |
| 219 defrag_uri = scheme + "://" + authority + request_uri |
| 220 return scheme, authority, request_uri, defrag_uri |
| 221 |
| 222 |
| 223 # Cache filename construction (original borrowed from Venus http://intertwingly.
net/code/venus/) |
| 224 re_url_scheme = re.compile(r'^\w+://') |
| 225 re_slash = re.compile(r'[?/:|]+') |
| 226 |
| 227 def safename(filename): |
| 228 """Return a filename suitable for the cache. |
| 229 |
| 230 Strips dangerous and common characters to create a filename we |
| 231 can use to store the cache in. |
| 232 """ |
| 233 |
| 234 try: |
| 235 if re_url_scheme.match(filename): |
| 236 if isinstance(filename,str): |
| 237 filename = filename.decode('utf-8') |
| 238 filename = filename.encode('idna') |
| 239 else: |
| 240 filename = filename.encode('idna') |
| 241 except UnicodeError: |
| 242 pass |
| 243 if isinstance(filename,unicode): |
| 244 filename=filename.encode('utf-8') |
| 245 filemd5 = _md5(filename).hexdigest() |
| 246 filename = re_url_scheme.sub("", filename) |
| 247 filename = re_slash.sub(",", filename) |
| 248 |
| 249 # limit length of filename |
| 250 if len(filename)>200: |
| 251 filename=filename[:200] |
| 252 return ",".join((filename, filemd5)) |
| 253 |
| 254 NORMALIZE_SPACE = re.compile(r'(?:\r\n)?[ \t]+') |
| 255 def _normalize_headers(headers): |
| 256 return dict([ (key.lower(), NORMALIZE_SPACE.sub(value, ' ').strip()) for (k
ey, value) in headers.iteritems()]) |
| 257 |
| 258 def _parse_cache_control(headers): |
| 259 retval = {} |
| 260 if headers.has_key('cache-control'): |
| 261 parts = headers['cache-control'].split(',') |
| 262 parts_with_args = [tuple([x.strip().lower() for x in part.split("=", 1)]
) for part in parts if -1 != part.find("=")] |
| 263 parts_wo_args = [(name.strip().lower(), 1) for name in parts if -1 == na
me.find("=")] |
| 264 retval = dict(parts_with_args + parts_wo_args) |
| 265 return retval |
| 266 |
| 267 # Whether to use a strict mode to parse WWW-Authenticate headers |
| 268 # Might lead to bad results in case of ill-formed header value, |
| 269 # so disabled by default, falling back to relaxed parsing. |
| 270 # Set to true to turn on, usefull for testing servers. |
| 271 USE_WWW_AUTH_STRICT_PARSING = 0 |
| 272 |
| 273 # In regex below: |
| 274 # [^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+ matches a "token" a
s defined by HTTP |
| 275 # "(?:[^\0-\x08\x0A-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?" matches a "quoted-s
tring" as defined by HTTP, when LWS have already been replaced by a single space |
| 276 # Actually, as an auth-param value can be either a token or a quoted-string, the
y are combined in a single pattern which matches both: |
| 277 # \"?((?<=\")(?:[^\0-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?(?=\")|(?<!\")[^\0-\x08
\x0A-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+(?!\"))\"? |
| 278 WWW_AUTH_STRICT = re.compile(r"^(?:\s*(?:,\s*)?([^\0-\x1f\x7f-\xff()<>@,;:\\\"/[
\]?={} \t]+)\s*=\s*\"?((?<=\")(?:[^\0-\x08\x0A-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?
(?=\")|(?<!\")[^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+(?!\"))\"?)(.*)$") |
| 279 WWW_AUTH_RELAXED = re.compile(r"^(?:\s*(?:,\s*)?([^ \t\r\n=]+)\s*=\s*\"?((?<=\")
(?:[^\\\"]|\\.)*?(?=\")|(?<!\")[^ \t\r\n,]+(?!\"))\"?)(.*)$") |
| 280 UNQUOTE_PAIRS = re.compile(r'\\(.)') |
| 281 def _parse_www_authenticate(headers, headername='www-authenticate'): |
| 282 """Returns a dictionary of dictionaries, one dict |
| 283 per auth_scheme.""" |
| 284 retval = {} |
| 285 if headers.has_key(headername): |
| 286 try: |
| 287 authenticate = headers[headername].strip() |
| 288 www_auth = USE_WWW_AUTH_STRICT_PARSING and WWW_AUTH_STRICT or WWW_AUTH
_RELAXED |
| 289 while authenticate: |
| 290 # Break off the scheme at the beginning of the line |
| 291 if headername == 'authentication-info': |
| 292 (auth_scheme, the_rest) = ('digest', authenticate) |
| 293 else: |
| 294 (auth_scheme, the_rest) = authenticate.split(" ", 1) |
| 295 # Now loop over all the key value pairs that come after the scheme
, |
| 296 # being careful not to roll into the next scheme |
| 297 match = www_auth.search(the_rest) |
| 298 auth_params = {} |
| 299 while match: |
| 300 if match and len(match.groups()) == 3: |
| 301 (key, value, the_rest) = match.groups() |
| 302 auth_params[key.lower()] = UNQUOTE_PAIRS.sub(r'\1', value)
# '\\'.join([x.replace('\\', '') for x in value.split('\\\\')]) |
| 303 match = www_auth.search(the_rest) |
| 304 retval[auth_scheme.lower()] = auth_params |
| 305 authenticate = the_rest.strip() |
| 306 except ValueError: |
| 307 raise MalformedHeader("WWW-Authenticate") |
| 308 return retval |
| 309 |
| 310 |
| 311 def _entry_disposition(response_headers, request_headers): |
| 312 """Determine freshness from the Date, Expires and Cache-Control headers. |
| 313 |
| 314 We don't handle the following: |
| 315 |
| 316 1. Cache-Control: max-stale |
| 317 2. Age: headers are not used in the calculations. |
| 318 |
| 319 Not that this algorithm is simpler than you might think |
| 320 because we are operating as a private (non-shared) cache. |
| 321 This lets us ignore 's-maxage'. We can also ignore |
| 322 'proxy-invalidate' since we aren't a proxy. |
| 323 We will never return a stale document as |
| 324 fresh as a design decision, and thus the non-implementation |
| 325 of 'max-stale'. This also lets us safely ignore 'must-revalidate' |
| 326 since we operate as if every server has sent 'must-revalidate'. |
| 327 Since we are private we get to ignore both 'public' and |
| 328 'private' parameters. We also ignore 'no-transform' since |
| 329 we don't do any transformations. |
| 330 The 'no-store' parameter is handled at a higher level. |
| 331 So the only Cache-Control parameters we look at are: |
| 332 |
| 333 no-cache |
| 334 only-if-cached |
| 335 max-age |
| 336 min-fresh |
| 337 """ |
| 338 |
| 339 retval = "STALE" |
| 340 cc = _parse_cache_control(request_headers) |
| 341 cc_response = _parse_cache_control(response_headers) |
| 342 |
| 343 if request_headers.has_key('pragma') and request_headers['pragma'].lower().f
ind('no-cache') != -1: |
| 344 retval = "TRANSPARENT" |
| 345 if 'cache-control' not in request_headers: |
| 346 request_headers['cache-control'] = 'no-cache' |
| 347 elif cc.has_key('no-cache'): |
| 348 retval = "TRANSPARENT" |
| 349 elif cc_response.has_key('no-cache'): |
| 350 retval = "STALE" |
| 351 elif cc.has_key('only-if-cached'): |
| 352 retval = "FRESH" |
| 353 elif response_headers.has_key('date'): |
| 354 date = calendar.timegm(email.Utils.parsedate_tz(response_headers['date']
)) |
| 355 now = time.time() |
| 356 current_age = max(0, now - date) |
| 357 if cc_response.has_key('max-age'): |
| 358 try: |
| 359 freshness_lifetime = int(cc_response['max-age']) |
| 360 except ValueError: |
| 361 freshness_lifetime = 0 |
| 362 elif response_headers.has_key('expires'): |
| 363 expires = email.Utils.parsedate_tz(response_headers['expires']) |
| 364 if None == expires: |
| 365 freshness_lifetime = 0 |
| 366 else: |
| 367 freshness_lifetime = max(0, calendar.timegm(expires) - date) |
| 368 else: |
| 369 freshness_lifetime = 0 |
| 370 if cc.has_key('max-age'): |
| 371 try: |
| 372 freshness_lifetime = int(cc['max-age']) |
| 373 except ValueError: |
| 374 freshness_lifetime = 0 |
| 375 if cc.has_key('min-fresh'): |
| 376 try: |
| 377 min_fresh = int(cc['min-fresh']) |
| 378 except ValueError: |
| 379 min_fresh = 0 |
| 380 current_age += min_fresh |
| 381 if freshness_lifetime > current_age: |
| 382 retval = "FRESH" |
| 383 return retval |
| 384 |
| 385 def _decompressContent(response, new_content): |
| 386 content = new_content |
| 387 try: |
| 388 encoding = response.get('content-encoding', None) |
| 389 if encoding in ['gzip', 'deflate']: |
| 390 if encoding == 'gzip': |
| 391 content = gzip.GzipFile(fileobj=StringIO.StringIO(new_content)).
read() |
| 392 if encoding == 'deflate': |
| 393 content = zlib.decompress(content) |
| 394 response['content-length'] = str(len(content)) |
| 395 # Record the historical presence of the encoding in a way the won't
interfere. |
| 396 response['-content-encoding'] = response['content-encoding'] |
| 397 del response['content-encoding'] |
| 398 except IOError: |
| 399 content = "" |
| 400 raise FailedToDecompressContent(_("Content purported to be compressed wi
th %s but failed to decompress.") % response.get('content-encoding'), response,
content) |
| 401 return content |
| 402 |
| 403 def _updateCache(request_headers, response_headers, content, cache, cachekey): |
| 404 if cachekey: |
| 405 cc = _parse_cache_control(request_headers) |
| 406 cc_response = _parse_cache_control(response_headers) |
| 407 if cc.has_key('no-store') or cc_response.has_key('no-store'): |
| 408 cache.delete(cachekey) |
| 409 else: |
| 410 info = email.Message.Message() |
| 411 for key, value in response_headers.iteritems(): |
| 412 if key not in ['status','content-encoding','transfer-encoding']: |
| 413 info[key] = value |
| 414 |
| 415 # Add annotations to the cache to indicate what headers |
| 416 # are variant for this request. |
| 417 vary = response_headers.get('vary', None) |
| 418 if vary: |
| 419 vary_headers = vary.lower().replace(' ', '').split(',') |
| 420 for header in vary_headers: |
| 421 key = '-varied-%s' % header |
| 422 try: |
| 423 info[key] = request_headers[header] |
| 424 except KeyError: |
| 425 pass |
| 426 |
| 427 status = response_headers.status |
| 428 if status == 304: |
| 429 status = 200 |
| 430 |
| 431 status_header = 'status: %d\r\n' % status |
| 432 |
| 433 header_str = info.as_string() |
| 434 |
| 435 header_str = re.sub("\r(?!\n)|(?<!\r)\n", "\r\n", header_str) |
| 436 text = "".join([status_header, header_str, content]) |
| 437 |
| 438 cache.set(cachekey, text) |
| 439 |
| 440 def _cnonce(): |
| 441 dig = _md5("%s:%s" % (time.ctime(), ["0123456789"[random.randrange(0, 9)] fo
r i in range(20)])).hexdigest() |
| 442 return dig[:16] |
| 443 |
| 444 def _wsse_username_token(cnonce, iso_now, password): |
| 445 return base64.b64encode(_sha("%s%s%s" % (cnonce, iso_now, password)).digest(
)).strip() |
| 446 |
| 447 |
| 448 # For credentials we need two things, first |
| 449 # a pool of credential to try (not necesarily tied to BAsic, Digest, etc.) |
| 450 # Then we also need a list of URIs that have already demanded authentication |
| 451 # That list is tricky since sub-URIs can take the same auth, or the |
| 452 # auth scheme may change as you descend the tree. |
| 453 # So we also need each Auth instance to be able to tell us |
| 454 # how close to the 'top' it is. |
| 455 |
| 456 class Authentication(object): |
| 457 def __init__(self, credentials, host, request_uri, headers, response, conten
t, http): |
| 458 (scheme, authority, path, query, fragment) = parse_uri(request_uri) |
| 459 self.path = path |
| 460 self.host = host |
| 461 self.credentials = credentials |
| 462 self.http = http |
| 463 |
| 464 def depth(self, request_uri): |
| 465 (scheme, authority, path, query, fragment) = parse_uri(request_uri) |
| 466 return request_uri[len(self.path):].count("/") |
| 467 |
| 468 def inscope(self, host, request_uri): |
| 469 # XXX Should we normalize the request_uri? |
| 470 (scheme, authority, path, query, fragment) = parse_uri(request_uri) |
| 471 return (host == self.host) and path.startswith(self.path) |
| 472 |
| 473 def request(self, method, request_uri, headers, content): |
| 474 """Modify the request headers to add the appropriate |
| 475 Authorization header. Over-ride this in sub-classes.""" |
| 476 pass |
| 477 |
| 478 def response(self, response, content): |
| 479 """Gives us a chance to update with new nonces |
| 480 or such returned from the last authorized response. |
| 481 Over-rise this in sub-classes if necessary. |
| 482 |
| 483 Return TRUE is the request is to be retried, for |
| 484 example Digest may return stale=true. |
| 485 """ |
| 486 return False |
| 487 |
| 488 |
| 489 |
| 490 class BasicAuthentication(Authentication): |
| 491 def __init__(self, credentials, host, request_uri, headers, response, conten
t, http): |
| 492 Authentication.__init__(self, credentials, host, request_uri, headers, r
esponse, content, http) |
| 493 |
| 494 def request(self, method, request_uri, headers, content): |
| 495 """Modify the request headers to add the appropriate |
| 496 Authorization header.""" |
| 497 headers['authorization'] = 'Basic ' + base64.b64encode("%s:%s" % self.cr
edentials).strip() |
| 498 |
| 499 |
| 500 class DigestAuthentication(Authentication): |
| 501 """Only do qop='auth' and MD5, since that |
| 502 is all Apache currently implements""" |
| 503 def __init__(self, credentials, host, request_uri, headers, response, conten
t, http): |
| 504 Authentication.__init__(self, credentials, host, request_uri, headers, r
esponse, content, http) |
| 505 challenge = _parse_www_authenticate(response, 'www-authenticate') |
| 506 self.challenge = challenge['digest'] |
| 507 qop = self.challenge.get('qop', 'auth') |
| 508 self.challenge['qop'] = ('auth' in [x.strip() for x in qop.split()]) and
'auth' or None |
| 509 if self.challenge['qop'] is None: |
| 510 raise UnimplementedDigestAuthOptionError( _("Unsupported value for q
op: %s." % qop)) |
| 511 self.challenge['algorithm'] = self.challenge.get('algorithm', 'MD5').upp
er() |
| 512 if self.challenge['algorithm'] != 'MD5': |
| 513 raise UnimplementedDigestAuthOptionError( _("Unsupported value for a
lgorithm: %s." % self.challenge['algorithm'])) |
| 514 self.A1 = "".join([self.credentials[0], ":", self.challenge['realm'], ":
", self.credentials[1]]) |
| 515 self.challenge['nc'] = 1 |
| 516 |
| 517 def request(self, method, request_uri, headers, content, cnonce = None): |
| 518 """Modify the request headers""" |
| 519 H = lambda x: _md5(x).hexdigest() |
| 520 KD = lambda s, d: H("%s:%s" % (s, d)) |
| 521 A2 = "".join([method, ":", request_uri]) |
| 522 self.challenge['cnonce'] = cnonce or _cnonce() |
| 523 request_digest = '"%s"' % KD(H(self.A1), "%s:%s:%s:%s:%s" % (self.chall
enge['nonce'], |
| 524 '%08x' % self.challenge['nc'], |
| 525 self.challenge['cnonce'], |
| 526 self.challenge['qop'], H(A2) |
| 527 )) |
| 528 headers['authorization'] = 'Digest username="%s", realm="%s", nonce="%s"
, uri="%s", algorithm=%s, response=%s, qop=%s, nc=%08x, cnonce="%s"' % ( |
| 529 self.credentials[0], |
| 530 self.challenge['realm'], |
| 531 self.challenge['nonce'], |
| 532 request_uri, |
| 533 self.challenge['algorithm'], |
| 534 request_digest, |
| 535 self.challenge['qop'], |
| 536 self.challenge['nc'], |
| 537 self.challenge['cnonce'], |
| 538 ) |
| 539 if self.challenge.get('opaque'): |
| 540 headers['authorization'] += ', opaque="%s"' % self.challenge['opaque
'] |
| 541 self.challenge['nc'] += 1 |
| 542 |
| 543 def response(self, response, content): |
| 544 if not response.has_key('authentication-info'): |
| 545 challenge = _parse_www_authenticate(response, 'www-authenticate').ge
t('digest', {}) |
| 546 if 'true' == challenge.get('stale'): |
| 547 self.challenge['nonce'] = challenge['nonce'] |
| 548 self.challenge['nc'] = 1 |
| 549 return True |
| 550 else: |
| 551 updated_challenge = _parse_www_authenticate(response, 'authenticatio
n-info').get('digest', {}) |
| 552 |
| 553 if updated_challenge.has_key('nextnonce'): |
| 554 self.challenge['nonce'] = updated_challenge['nextnonce'] |
| 555 self.challenge['nc'] = 1 |
| 556 return False |
| 557 |
| 558 |
| 559 class HmacDigestAuthentication(Authentication): |
| 560 """Adapted from Robert Sayre's code and DigestAuthentication above.""" |
| 561 __author__ = "Thomas Broyer (t.broyer@ltgt.net)" |
| 562 |
| 563 def __init__(self, credentials, host, request_uri, headers, response, conten
t, http): |
| 564 Authentication.__init__(self, credentials, host, request_uri, headers, r
esponse, content, http) |
| 565 challenge = _parse_www_authenticate(response, 'www-authenticate') |
| 566 self.challenge = challenge['hmacdigest'] |
| 567 # TODO: self.challenge['domain'] |
| 568 self.challenge['reason'] = self.challenge.get('reason', 'unauthorized') |
| 569 if self.challenge['reason'] not in ['unauthorized', 'integrity']: |
| 570 self.challenge['reason'] = 'unauthorized' |
| 571 self.challenge['salt'] = self.challenge.get('salt', '') |
| 572 if not self.challenge.get('snonce'): |
| 573 raise UnimplementedHmacDigestAuthOptionError( _("The challenge doesn
't contain a server nonce, or this one is empty.")) |
| 574 self.challenge['algorithm'] = self.challenge.get('algorithm', 'HMAC-SHA-
1') |
| 575 if self.challenge['algorithm'] not in ['HMAC-SHA-1', 'HMAC-MD5']: |
| 576 raise UnimplementedHmacDigestAuthOptionError( _("Unsupported value f
or algorithm: %s." % self.challenge['algorithm'])) |
| 577 self.challenge['pw-algorithm'] = self.challenge.get('pw-algorithm', 'SHA
-1') |
| 578 if self.challenge['pw-algorithm'] not in ['SHA-1', 'MD5']: |
| 579 raise UnimplementedHmacDigestAuthOptionError( _("Unsupported value f
or pw-algorithm: %s." % self.challenge['pw-algorithm'])) |
| 580 if self.challenge['algorithm'] == 'HMAC-MD5': |
| 581 self.hashmod = _md5 |
| 582 else: |
| 583 self.hashmod = _sha |
| 584 if self.challenge['pw-algorithm'] == 'MD5': |
| 585 self.pwhashmod = _md5 |
| 586 else: |
| 587 self.pwhashmod = _sha |
| 588 self.key = "".join([self.credentials[0], ":", |
| 589 self.pwhashmod.new("".join([self.credentials[1], self.challe
nge['salt']])).hexdigest().lower(), |
| 590 ":", self.challenge['realm'] |
| 591 ]) |
| 592 self.key = self.pwhashmod.new(self.key).hexdigest().lower() |
| 593 |
| 594 def request(self, method, request_uri, headers, content): |
| 595 """Modify the request headers""" |
| 596 keys = _get_end2end_headers(headers) |
| 597 keylist = "".join(["%s " % k for k in keys]) |
| 598 headers_val = "".join([headers[k] for k in keys]) |
| 599 created = time.strftime('%Y-%m-%dT%H:%M:%SZ',time.gmtime()) |
| 600 cnonce = _cnonce() |
| 601 request_digest = "%s:%s:%s:%s:%s" % (method, request_uri, cnonce, self.c
hallenge['snonce'], headers_val) |
| 602 request_digest = hmac.new(self.key, request_digest, self.hashmod).hexdi
gest().lower() |
| 603 headers['authorization'] = 'HMACDigest username="%s", realm="%s", snonce
="%s", cnonce="%s", uri="%s", created="%s", response="%s", headers="%s"' % ( |
| 604 self.credentials[0], |
| 605 self.challenge['realm'], |
| 606 self.challenge['snonce'], |
| 607 cnonce, |
| 608 request_uri, |
| 609 created, |
| 610 request_digest, |
| 611 keylist, |
| 612 ) |
| 613 |
| 614 def response(self, response, content): |
| 615 challenge = _parse_www_authenticate(response, 'www-authenticate').get('h
macdigest', {}) |
| 616 if challenge.get('reason') in ['integrity', 'stale']: |
| 617 return True |
| 618 return False |
| 619 |
| 620 |
| 621 class WsseAuthentication(Authentication): |
| 622 """This is thinly tested and should not be relied upon. |
| 623 At this time there isn't any third party server to test against. |
| 624 Blogger and TypePad implemented this algorithm at one point |
| 625 but Blogger has since switched to Basic over HTTPS and |
| 626 TypePad has implemented it wrong, by never issuing a 401 |
| 627 challenge but instead requiring your client to telepathically know that |
| 628 their endpoint is expecting WSSE profile="UsernameToken".""" |
| 629 def __init__(self, credentials, host, request_uri, headers, response, conten
t, http): |
| 630 Authentication.__init__(self, credentials, host, request_uri, headers, r
esponse, content, http) |
| 631 |
| 632 def request(self, method, request_uri, headers, content): |
| 633 """Modify the request headers to add the appropriate |
| 634 Authorization header.""" |
| 635 headers['authorization'] = 'WSSE profile="UsernameToken"' |
| 636 iso_now = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) |
| 637 cnonce = _cnonce() |
| 638 password_digest = _wsse_username_token(cnonce, iso_now, self.credentials
[1]) |
| 639 headers['X-WSSE'] = 'UsernameToken Username="%s", PasswordDigest="%s", N
once="%s", Created="%s"' % ( |
| 640 self.credentials[0], |
| 641 password_digest, |
| 642 cnonce, |
| 643 iso_now) |
| 644 |
| 645 class GoogleLoginAuthentication(Authentication): |
| 646 def __init__(self, credentials, host, request_uri, headers, response, conten
t, http): |
| 647 from urllib import urlencode |
| 648 Authentication.__init__(self, credentials, host, request_uri, headers, r
esponse, content, http) |
| 649 challenge = _parse_www_authenticate(response, 'www-authenticate') |
| 650 service = challenge['googlelogin'].get('service', 'xapi') |
| 651 # Bloggger actually returns the service in the challenge |
| 652 # For the rest we guess based on the URI |
| 653 if service == 'xapi' and request_uri.find("calendar") > 0: |
| 654 service = "cl" |
| 655 # No point in guessing Base or Spreadsheet |
| 656 #elif request_uri.find("spreadsheets") > 0: |
| 657 # service = "wise" |
| 658 |
| 659 auth = dict(Email=credentials[0], Passwd=credentials[1], service=service
, source=headers['user-agent']) |
| 660 resp, content = self.http.request("https://www.google.com/accounts/Clien
tLogin", method="POST", body=urlencode(auth), headers={'Content-Type': 'applicat
ion/x-www-form-urlencoded'}) |
| 661 lines = content.split('\n') |
| 662 d = dict([tuple(line.split("=", 1)) for line in lines if line]) |
| 663 if resp.status == 403: |
| 664 self.Auth = "" |
| 665 else: |
| 666 self.Auth = d['Auth'] |
| 667 |
| 668 def request(self, method, request_uri, headers, content): |
| 669 """Modify the request headers to add the appropriate |
| 670 Authorization header.""" |
| 671 headers['authorization'] = 'GoogleLogin Auth=' + self.Auth |
| 672 |
| 673 |
| 674 AUTH_SCHEME_CLASSES = { |
| 675 "basic": BasicAuthentication, |
| 676 "wsse": WsseAuthentication, |
| 677 "digest": DigestAuthentication, |
| 678 "hmacdigest": HmacDigestAuthentication, |
| 679 "googlelogin": GoogleLoginAuthentication |
| 680 } |
| 681 |
| 682 AUTH_SCHEME_ORDER = ["hmacdigest", "googlelogin", "digest", "wsse", "basic"] |
| 683 |
| 684 class FileCache(object): |
| 685 """Uses a local directory as a store for cached files. |
| 686 Not really safe to use if multiple threads or processes are going to |
| 687 be running on the same cache. |
| 688 """ |
| 689 def __init__(self, cache, safe=safename): # use safe=lambda x: md5.new(x).he
xdigest() for the old behavior |
| 690 self.cache = cache |
| 691 self.safe = safe |
| 692 if not os.path.exists(cache): |
| 693 os.makedirs(self.cache) |
| 694 |
| 695 def get(self, key): |
| 696 retval = None |
| 697 cacheFullPath = os.path.join(self.cache, self.safe(key)) |
| 698 try: |
| 699 f = file(cacheFullPath, "rb") |
| 700 retval = f.read() |
| 701 f.close() |
| 702 except IOError: |
| 703 pass |
| 704 return retval |
| 705 |
| 706 def set(self, key, value): |
| 707 cacheFullPath = os.path.join(self.cache, self.safe(key)) |
| 708 f = file(cacheFullPath, "wb") |
| 709 f.write(value) |
| 710 f.close() |
| 711 |
| 712 def delete(self, key): |
| 713 cacheFullPath = os.path.join(self.cache, self.safe(key)) |
| 714 if os.path.exists(cacheFullPath): |
| 715 os.remove(cacheFullPath) |
| 716 |
| 717 class Credentials(object): |
| 718 def __init__(self): |
| 719 self.credentials = [] |
| 720 |
| 721 def add(self, name, password, domain=""): |
| 722 self.credentials.append((domain.lower(), name, password)) |
| 723 |
| 724 def clear(self): |
| 725 self.credentials = [] |
| 726 |
| 727 def iter(self, domain): |
| 728 for (cdomain, name, password) in self.credentials: |
| 729 if cdomain == "" or domain == cdomain: |
| 730 yield (name, password) |
| 731 |
| 732 class KeyCerts(Credentials): |
| 733 """Identical to Credentials except that |
| 734 name/password are mapped to key/cert.""" |
| 735 pass |
| 736 |
| 737 class AllHosts(object): |
| 738 pass |
| 739 |
| 740 class ProxyInfo(object): |
| 741 """Collect information required to use a proxy.""" |
| 742 bypass_hosts = () |
| 743 |
| 744 def __init__(self, proxy_type, proxy_host, proxy_port, |
| 745 proxy_rdns=None, proxy_user=None, proxy_pass=None): |
| 746 """The parameter proxy_type must be set to one of socks.PROXY_TYPE_XXX |
| 747 constants. For example: |
| 748 |
| 749 p = ProxyInfo(proxy_type=socks.PROXY_TYPE_HTTP, |
| 750 proxy_host='localhost', proxy_port=8000) |
| 751 """ |
| 752 self.proxy_type = proxy_type |
| 753 self.proxy_host = proxy_host |
| 754 self.proxy_port = proxy_port |
| 755 self.proxy_rdns = proxy_rdns |
| 756 self.proxy_user = proxy_user |
| 757 self.proxy_pass = proxy_pass |
| 758 |
| 759 def astuple(self): |
| 760 return (self.proxy_type, self.proxy_host, self.proxy_port, |
| 761 self.proxy_rdns, self.proxy_user, self.proxy_pass) |
| 762 |
| 763 def isgood(self): |
| 764 return (self.proxy_host != None) and (self.proxy_port != None) |
| 765 |
| 766 @classmethod |
| 767 def from_environment(cls, method='http'): |
| 768 """ |
| 769 Read proxy info from the environment variables. |
| 770 """ |
| 771 if method not in ['http', 'https']: |
| 772 return |
| 773 |
| 774 env_var = method + '_proxy' |
| 775 url = os.environ.get(env_var, os.environ.get(env_var.upper())) |
| 776 if not url: |
| 777 return |
| 778 pi = cls.from_url(url, method) |
| 779 |
| 780 no_proxy = os.environ.get('no_proxy', os.environ.get('NO_PROXY', '')) |
| 781 bypass_hosts = [] |
| 782 if no_proxy: |
| 783 bypass_hosts = no_proxy.split(',') |
| 784 # special case, no_proxy=* means all hosts bypassed |
| 785 if no_proxy == '*': |
| 786 bypass_hosts = AllHosts |
| 787 |
| 788 pi.bypass_hosts = bypass_hosts |
| 789 return pi |
| 790 |
| 791 @classmethod |
| 792 def from_url(cls, url, method='http'): |
| 793 """ |
| 794 Construct a ProxyInfo from a URL (such as http_proxy env var) |
| 795 """ |
| 796 url = urlparse.urlparse(url) |
| 797 username = None |
| 798 password = None |
| 799 port = None |
| 800 if '@' in url[1]: |
| 801 ident, host_port = url[1].split('@', 1) |
| 802 if ':' in ident: |
| 803 username, password = ident.split(':', 1) |
| 804 else: |
| 805 password = ident |
| 806 else: |
| 807 host_port = url[1] |
| 808 if ':' in host_port: |
| 809 host, port = host_port.split(':', 1) |
| 810 else: |
| 811 host = host_port |
| 812 |
| 813 if port: |
| 814 port = int(port) |
| 815 else: |
| 816 port = dict(https=443, http=80)[method] |
| 817 |
| 818 proxy_type = 3 # socks.PROXY_TYPE_HTTP |
| 819 return cls( |
| 820 proxy_type = proxy_type, |
| 821 proxy_host = host, |
| 822 proxy_port = port, |
| 823 proxy_user = username or None, |
| 824 proxy_pass = password or None, |
| 825 ) |
| 826 |
| 827 def applies_to(self, hostname): |
| 828 return not self.bypass_host(hostname) |
| 829 |
| 830 def bypass_host(self, hostname): |
| 831 """Has this host been excluded from the proxy config""" |
| 832 if self.bypass_hosts is AllHosts: |
| 833 return True |
| 834 |
| 835 bypass = False |
| 836 for domain in self.bypass_hosts: |
| 837 if hostname.endswith(domain): |
| 838 bypass = True |
| 839 |
| 840 return bypass |
| 841 |
| 842 |
| 843 class HTTPConnectionWithTimeout(httplib.HTTPConnection): |
| 844 """ |
| 845 HTTPConnection subclass that supports timeouts |
| 846 |
| 847 All timeouts are in seconds. If None is passed for timeout then |
| 848 Python's default timeout for sockets will be used. See for example |
| 849 the docs of socket.setdefaulttimeout(): |
| 850 http://docs.python.org/library/socket.html#socket.setdefaulttimeout |
| 851 """ |
| 852 |
| 853 def __init__(self, host, port=None, strict=None, timeout=None, proxy_info=No
ne): |
| 854 httplib.HTTPConnection.__init__(self, host, port, strict) |
| 855 self.timeout = timeout |
| 856 self.proxy_info = proxy_info |
| 857 |
| 858 def connect(self): |
| 859 """Connect to the host and port specified in __init__.""" |
| 860 # Mostly verbatim from httplib.py. |
| 861 if self.proxy_info and socks is None: |
| 862 raise ProxiesUnavailableError( |
| 863 'Proxy support missing but proxy use was requested!') |
| 864 msg = "getaddrinfo returns an empty list" |
| 865 if self.proxy_info and self.proxy_info.isgood(): |
| 866 use_proxy = True |
| 867 proxy_type, proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pa
ss = self.proxy_info.astuple() |
| 868 else: |
| 869 use_proxy = False |
| 870 if use_proxy and proxy_rdns: |
| 871 host = proxy_host |
| 872 port = proxy_port |
| 873 else: |
| 874 host = self.host |
| 875 port = self.port |
| 876 |
| 877 for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM): |
| 878 af, socktype, proto, canonname, sa = res |
| 879 try: |
| 880 if use_proxy: |
| 881 self.sock = socks.socksocket(af, socktype, proto) |
| 882 self.sock.setproxy(proxy_type, proxy_host, proxy_port, proxy
_rdns, proxy_user, proxy_pass) |
| 883 else: |
| 884 self.sock = socket.socket(af, socktype, proto) |
| 885 self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY,
1) |
| 886 # Different from httplib: support timeouts. |
| 887 if has_timeout(self.timeout): |
| 888 self.sock.settimeout(self.timeout) |
| 889 # End of difference from httplib. |
| 890 if self.debuglevel > 0: |
| 891 print "connect: (%s, %s) ************" % (self.host, self.po
rt) |
| 892 if use_proxy: |
| 893 print "proxy: %s ************" % str((proxy_host, proxy_
port, proxy_rdns, proxy_user, proxy_pass)) |
| 894 |
| 895 self.sock.connect((self.host, self.port) + sa[2:]) |
| 896 except socket.error, msg: |
| 897 if self.debuglevel > 0: |
| 898 print "connect fail: (%s, %s)" % (self.host, self.port) |
| 899 if use_proxy: |
| 900 print "proxy: %s" % str((proxy_host, proxy_port, proxy_r
dns, proxy_user, proxy_pass)) |
| 901 if self.sock: |
| 902 self.sock.close() |
| 903 self.sock = None |
| 904 continue |
| 905 break |
| 906 if not self.sock: |
| 907 raise socket.error, msg |
| 908 |
| 909 class HTTPSConnectionWithTimeout(httplib.HTTPSConnection): |
| 910 """ |
| 911 This class allows communication via SSL. |
| 912 |
| 913 All timeouts are in seconds. If None is passed for timeout then |
| 914 Python's default timeout for sockets will be used. See for example |
| 915 the docs of socket.setdefaulttimeout(): |
| 916 http://docs.python.org/library/socket.html#socket.setdefaulttimeout |
| 917 """ |
| 918 def __init__(self, host, port=None, key_file=None, cert_file=None, |
| 919 strict=None, timeout=None, proxy_info=None, |
| 920 ca_certs=None, disable_ssl_certificate_validation=False): |
| 921 httplib.HTTPSConnection.__init__(self, host, port=port, key_file=key_fil
e, |
| 922 cert_file=cert_file, strict=strict) |
| 923 self.timeout = timeout |
| 924 self.proxy_info = proxy_info |
| 925 if ca_certs is None: |
| 926 ca_certs = CA_CERTS |
| 927 self.ca_certs = ca_certs |
| 928 self.disable_ssl_certificate_validation = \ |
| 929 disable_ssl_certificate_validation |
| 930 |
| 931 # The following two methods were adapted from https_wrapper.py, released |
| 932 # with the Google Appengine SDK at |
| 933 # http://googleappengine.googlecode.com/svn-history/r136/trunk/python/google
/appengine/tools/https_wrapper.py |
| 934 # under the following license: |
| 935 # |
| 936 # Copyright 2007 Google Inc. |
| 937 # |
| 938 # Licensed under the Apache License, Version 2.0 (the "License"); |
| 939 # you may not use this file except in compliance with the License. |
| 940 # You may obtain a copy of the License at |
| 941 # |
| 942 # http://www.apache.org/licenses/LICENSE-2.0 |
| 943 # |
| 944 # Unless required by applicable law or agreed to in writing, software |
| 945 # distributed under the License is distributed on an "AS IS" BASIS, |
| 946 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 947 # See the License for the specific language governing permissions and |
| 948 # limitations under the License. |
| 949 # |
| 950 |
| 951 def _GetValidHostsForCert(self, cert): |
| 952 """Returns a list of valid host globs for an SSL certificate. |
| 953 |
| 954 Args: |
| 955 cert: A dictionary representing an SSL certificate. |
| 956 Returns: |
| 957 list: A list of valid host globs. |
| 958 """ |
| 959 if 'subjectAltName' in cert: |
| 960 return [x[1] for x in cert['subjectAltName'] |
| 961 if x[0].lower() == 'dns'] |
| 962 else: |
| 963 return [x[0][1] for x in cert['subject'] |
| 964 if x[0][0].lower() == 'commonname'] |
| 965 |
| 966 def _ValidateCertificateHostname(self, cert, hostname): |
| 967 """Validates that a given hostname is valid for an SSL certificate. |
| 968 |
| 969 Args: |
| 970 cert: A dictionary representing an SSL certificate. |
| 971 hostname: The hostname to test. |
| 972 Returns: |
| 973 bool: Whether or not the hostname is valid for this certificate. |
| 974 """ |
| 975 hosts = self._GetValidHostsForCert(cert) |
| 976 for host in hosts: |
| 977 host_re = host.replace('.', '\.').replace('*', '[^.]*') |
| 978 if re.search('^%s$' % (host_re,), hostname, re.I): |
| 979 return True |
| 980 return False |
| 981 |
| 982 def connect(self): |
| 983 "Connect to a host on a given (SSL) port." |
| 984 |
| 985 msg = "getaddrinfo returns an empty list" |
| 986 if self.proxy_info and self.proxy_info.isgood(): |
| 987 use_proxy = True |
| 988 proxy_type, proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pa
ss = self.proxy_info.astuple() |
| 989 else: |
| 990 use_proxy = False |
| 991 if use_proxy and proxy_rdns: |
| 992 host = proxy_host |
| 993 port = proxy_port |
| 994 else: |
| 995 host = self.host |
| 996 port = self.port |
| 997 |
| 998 for family, socktype, proto, canonname, sockaddr in socket.getaddrinfo( |
| 999 host, port, 0, socket.SOCK_STREAM): |
| 1000 try: |
| 1001 if use_proxy: |
| 1002 sock = socks.socksocket(family, socktype, proto) |
| 1003 |
| 1004 sock.setproxy(proxy_type, proxy_host, proxy_port, proxy_rdns
, proxy_user, proxy_pass) |
| 1005 else: |
| 1006 sock = socket.socket(family, socktype, proto) |
| 1007 sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) |
| 1008 |
| 1009 if has_timeout(self.timeout): |
| 1010 sock.settimeout(self.timeout) |
| 1011 sock.connect((self.host, self.port)) |
| 1012 self.sock =_ssl_wrap_socket( |
| 1013 sock, self.key_file, self.cert_file, |
| 1014 self.disable_ssl_certificate_validation, self.ca_certs) |
| 1015 if self.debuglevel > 0: |
| 1016 print "connect: (%s, %s)" % (self.host, self.port) |
| 1017 if use_proxy: |
| 1018 print "proxy: %s" % str((proxy_host, proxy_port, proxy_r
dns, proxy_user, proxy_pass)) |
| 1019 if not self.disable_ssl_certificate_validation: |
| 1020 cert = self.sock.getpeercert() |
| 1021 hostname = self.host.split(':', 0)[0] |
| 1022 if not self._ValidateCertificateHostname(cert, hostname): |
| 1023 raise CertificateHostnameMismatch( |
| 1024 'Server presented certificate that does not match ' |
| 1025 'host %s: %s' % (hostname, cert), hostname, cert) |
| 1026 except ssl_SSLError, e: |
| 1027 if sock: |
| 1028 sock.close() |
| 1029 if self.sock: |
| 1030 self.sock.close() |
| 1031 self.sock = None |
| 1032 # Unfortunately the ssl module doesn't seem to provide any way |
| 1033 # to get at more detailed error information, in particular |
| 1034 # whether the error is due to certificate validation or |
| 1035 # something else (such as SSL protocol mismatch). |
| 1036 if e.errno == ssl.SSL_ERROR_SSL: |
| 1037 raise SSLHandshakeError(e) |
| 1038 else: |
| 1039 raise |
| 1040 except (socket.timeout, socket.gaierror): |
| 1041 raise |
| 1042 except socket.error, msg: |
| 1043 if self.debuglevel > 0: |
| 1044 print "connect fail: (%s, %s)" % (self.host, self.port) |
| 1045 if use_proxy: |
| 1046 print "proxy: %s" % str((proxy_host, proxy_port, proxy_rdn
s, proxy_user, proxy_pass)) |
| 1047 if self.sock: |
| 1048 self.sock.close() |
| 1049 self.sock = None |
| 1050 continue |
| 1051 break |
| 1052 if not self.sock: |
| 1053 raise socket.error, msg |
| 1054 |
| 1055 SCHEME_TO_CONNECTION = { |
| 1056 'http': HTTPConnectionWithTimeout, |
| 1057 'https': HTTPSConnectionWithTimeout |
| 1058 } |
| 1059 |
| 1060 # Use a different connection object for Google App Engine |
| 1061 try: |
| 1062 from google.appengine.api import apiproxy_stub_map |
| 1063 if apiproxy_stub_map.apiproxy.GetStub('urlfetch') is None: |
| 1064 raise ImportError # Bail out; we're not actually running on App Engine. |
| 1065 from google.appengine.api.urlfetch import fetch |
| 1066 from google.appengine.api.urlfetch import InvalidURLError |
| 1067 from google.appengine.api.urlfetch import DownloadError |
| 1068 from google.appengine.api.urlfetch import ResponseTooLargeError |
| 1069 from google.appengine.api.urlfetch import SSLCertificateError |
| 1070 |
| 1071 |
| 1072 class ResponseDict(dict): |
| 1073 """Is a dictionary that also has a read() method, so |
| 1074 that it can pass itself off as an httlib.HTTPResponse().""" |
| 1075 def read(self): |
| 1076 pass |
| 1077 |
| 1078 |
| 1079 class AppEngineHttpConnection(object): |
| 1080 """Emulates an httplib.HTTPConnection object, but actually uses the Google |
| 1081 App Engine urlfetch library. This allows the timeout to be properly used on |
| 1082 Google App Engine, and avoids using httplib, which on Google App Engine is |
| 1083 just another wrapper around urlfetch. |
| 1084 """ |
| 1085 def __init__(self, host, port=None, key_file=None, cert_file=None, |
| 1086 strict=None, timeout=None, proxy_info=None, ca_certs=None, |
| 1087 disable_ssl_certificate_validation=False): |
| 1088 self.host = host |
| 1089 self.port = port |
| 1090 self.timeout = timeout |
| 1091 if key_file or cert_file or proxy_info or ca_certs: |
| 1092 raise NotSupportedOnThisPlatform() |
| 1093 self.response = None |
| 1094 self.scheme = 'http' |
| 1095 self.validate_certificate = not disable_ssl_certificate_validation |
| 1096 self.sock = True |
| 1097 |
| 1098 def request(self, method, url, body, headers): |
| 1099 # Calculate the absolute URI, which fetch requires |
| 1100 netloc = self.host |
| 1101 if self.port: |
| 1102 netloc = '%s:%s' % (self.host, self.port) |
| 1103 absolute_uri = '%s://%s%s' % (self.scheme, netloc, url) |
| 1104 try: |
| 1105 response = fetch(absolute_uri, payload=body, method=method, |
| 1106 headers=headers, allow_truncated=False, follow_redirects=False, |
| 1107 deadline=self.timeout, |
| 1108 validate_certificate=self.validate_certificate) |
| 1109 self.response = ResponseDict(response.headers) |
| 1110 self.response['status'] = str(response.status_code) |
| 1111 self.response['reason'] = httplib.responses.get(response.status_code, 'O
k') |
| 1112 self.response.status = response.status_code |
| 1113 setattr(self.response, 'read', lambda : response.content) |
| 1114 |
| 1115 # Make sure the exceptions raised match the exceptions expected. |
| 1116 except InvalidURLError: |
| 1117 raise socket.gaierror('') |
| 1118 except (DownloadError, ResponseTooLargeError, SSLCertificateError): |
| 1119 raise httplib.HTTPException() |
| 1120 |
| 1121 def getresponse(self): |
| 1122 if self.response: |
| 1123 return self.response |
| 1124 else: |
| 1125 raise httplib.HTTPException() |
| 1126 |
| 1127 def set_debuglevel(self, level): |
| 1128 pass |
| 1129 |
| 1130 def connect(self): |
| 1131 pass |
| 1132 |
| 1133 def close(self): |
| 1134 pass |
| 1135 |
| 1136 |
| 1137 class AppEngineHttpsConnection(AppEngineHttpConnection): |
| 1138 """Same as AppEngineHttpConnection, but for HTTPS URIs.""" |
| 1139 def __init__(self, host, port=None, key_file=None, cert_file=None, |
| 1140 strict=None, timeout=None, proxy_info=None, ca_certs=None, |
| 1141 disable_ssl_certificate_validation=False): |
| 1142 AppEngineHttpConnection.__init__(self, host, port, key_file, cert_file, |
| 1143 strict, timeout, proxy_info, ca_certs, disable_ssl_certificate_validat
ion) |
| 1144 self.scheme = 'https' |
| 1145 |
| 1146 # Update the connection classes to use the Googel App Engine specific ones. |
| 1147 SCHEME_TO_CONNECTION = { |
| 1148 'http': AppEngineHttpConnection, |
| 1149 'https': AppEngineHttpsConnection |
| 1150 } |
| 1151 |
| 1152 except ImportError: |
| 1153 pass |
| 1154 |
| 1155 |
| 1156 class Http(object): |
| 1157 """An HTTP client that handles: |
| 1158 - all methods |
| 1159 - caching |
| 1160 - ETags |
| 1161 - compression, |
| 1162 - HTTPS |
| 1163 - Basic |
| 1164 - Digest |
| 1165 - WSSE |
| 1166 |
| 1167 and more. |
| 1168 """ |
| 1169 def __init__(self, cache=None, timeout=None, |
| 1170 proxy_info=ProxyInfo.from_environment, |
| 1171 ca_certs=None, disable_ssl_certificate_validation=False): |
| 1172 """If 'cache' is a string then it is used as a directory name for |
| 1173 a disk cache. Otherwise it must be an object that supports the |
| 1174 same interface as FileCache. |
| 1175 |
| 1176 All timeouts are in seconds. If None is passed for timeout |
| 1177 then Python's default timeout for sockets will be used. See |
| 1178 for example the docs of socket.setdefaulttimeout(): |
| 1179 http://docs.python.org/library/socket.html#socket.setdefaulttimeout |
| 1180 |
| 1181 `proxy_info` may be: |
| 1182 - a callable that takes the http scheme ('http' or 'https') and |
| 1183 returns a ProxyInfo instance per request. By default, uses |
| 1184 ProxyInfo.from_environment. |
| 1185 - a ProxyInfo instance (static proxy config). |
| 1186 - None (proxy disabled). |
| 1187 |
| 1188 ca_certs is the path of a file containing root CA certificates for SSL |
| 1189 server certificate validation. By default, a CA cert file bundled with |
| 1190 httplib2 is used. |
| 1191 |
| 1192 If disable_ssl_certificate_validation is true, SSL cert validation will |
| 1193 not be performed. |
| 1194 """ |
| 1195 self.proxy_info = proxy_info |
| 1196 self.ca_certs = ca_certs |
| 1197 self.disable_ssl_certificate_validation = \ |
| 1198 disable_ssl_certificate_validation |
| 1199 |
| 1200 # Map domain name to an httplib connection |
| 1201 self.connections = {} |
| 1202 # The location of the cache, for now a directory |
| 1203 # where cached responses are held. |
| 1204 if cache and isinstance(cache, basestring): |
| 1205 self.cache = FileCache(cache) |
| 1206 else: |
| 1207 self.cache = cache |
| 1208 |
| 1209 # Name/password |
| 1210 self.credentials = Credentials() |
| 1211 |
| 1212 # Key/cert |
| 1213 self.certificates = KeyCerts() |
| 1214 |
| 1215 # authorization objects |
| 1216 self.authorizations = [] |
| 1217 |
| 1218 # If set to False then no redirects are followed, even safe ones. |
| 1219 self.follow_redirects = True |
| 1220 |
| 1221 # Which HTTP methods do we apply optimistic concurrency to, i.e. |
| 1222 # which methods get an "if-match:" etag header added to them. |
| 1223 self.optimistic_concurrency_methods = ["PUT", "PATCH"] |
| 1224 |
| 1225 # If 'follow_redirects' is True, and this is set to True then |
| 1226 # all redirecs are followed, including unsafe ones. |
| 1227 self.follow_all_redirects = False |
| 1228 |
| 1229 self.ignore_etag = False |
| 1230 |
| 1231 self.force_exception_to_status_code = False |
| 1232 |
| 1233 self.timeout = timeout |
| 1234 |
| 1235 # Keep Authorization: headers on a redirect. |
| 1236 self.forward_authorization_headers = False |
| 1237 |
| 1238 def _auth_from_challenge(self, host, request_uri, headers, response, content
): |
| 1239 """A generator that creates Authorization objects |
| 1240 that can be applied to requests. |
| 1241 """ |
| 1242 challenges = _parse_www_authenticate(response, 'www-authenticate') |
| 1243 for cred in self.credentials.iter(host): |
| 1244 for scheme in AUTH_SCHEME_ORDER: |
| 1245 if challenges.has_key(scheme): |
| 1246 yield AUTH_SCHEME_CLASSES[scheme](cred, host, request_uri, h
eaders, response, content, self) |
| 1247 |
| 1248 def add_credentials(self, name, password, domain=""): |
| 1249 """Add a name and password that will be used |
| 1250 any time a request requires authentication.""" |
| 1251 self.credentials.add(name, password, domain) |
| 1252 |
| 1253 def add_certificate(self, key, cert, domain): |
| 1254 """Add a key and cert that will be used |
| 1255 any time a request requires authentication.""" |
| 1256 self.certificates.add(key, cert, domain) |
| 1257 |
| 1258 def clear_credentials(self): |
| 1259 """Remove all the names and passwords |
| 1260 that are used for authentication""" |
| 1261 self.credentials.clear() |
| 1262 self.authorizations = [] |
| 1263 |
| 1264 def _conn_request(self, conn, request_uri, method, body, headers): |
| 1265 for i in range(RETRIES): |
| 1266 try: |
| 1267 if conn.sock is None: |
| 1268 conn.connect() |
| 1269 conn.request(method, request_uri, body, headers) |
| 1270 except socket.timeout: |
| 1271 raise |
| 1272 except socket.gaierror: |
| 1273 conn.close() |
| 1274 raise ServerNotFoundError("Unable to find the server at %s" % co
nn.host) |
| 1275 except ssl_SSLError: |
| 1276 conn.close() |
| 1277 raise |
| 1278 except socket.error, e: |
| 1279 err = 0 |
| 1280 if hasattr(e, 'args'): |
| 1281 err = getattr(e, 'args')[0] |
| 1282 else: |
| 1283 err = e.errno |
| 1284 if err == errno.ECONNREFUSED: # Connection refused |
| 1285 raise |
| 1286 except httplib.HTTPException: |
| 1287 # Just because the server closed the connection doesn't apparent
ly mean |
| 1288 # that the server didn't send a response. |
| 1289 if conn.sock is None: |
| 1290 if i < RETRIES-1: |
| 1291 conn.close() |
| 1292 conn.connect() |
| 1293 continue |
| 1294 else: |
| 1295 conn.close() |
| 1296 raise |
| 1297 if i < RETRIES-1: |
| 1298 conn.close() |
| 1299 conn.connect() |
| 1300 continue |
| 1301 try: |
| 1302 response = conn.getresponse() |
| 1303 except (socket.error, httplib.HTTPException): |
| 1304 if i < RETRIES-1: |
| 1305 conn.close() |
| 1306 conn.connect() |
| 1307 continue |
| 1308 else: |
| 1309 raise |
| 1310 else: |
| 1311 content = "" |
| 1312 if method == "HEAD": |
| 1313 conn.close() |
| 1314 else: |
| 1315 content = response.read() |
| 1316 response = Response(response) |
| 1317 if method != "HEAD": |
| 1318 content = _decompressContent(response, content) |
| 1319 break |
| 1320 return (response, content) |
| 1321 |
| 1322 |
| 1323 def _request(self, conn, host, absolute_uri, request_uri, method, body, head
ers, redirections, cachekey): |
| 1324 """Do the actual request using the connection object |
| 1325 and also follow one level of redirects if necessary""" |
| 1326 |
| 1327 auths = [(auth.depth(request_uri), auth) for auth in self.authorizations
if auth.inscope(host, request_uri)] |
| 1328 auth = auths and sorted(auths)[0][1] or None |
| 1329 if auth: |
| 1330 auth.request(method, request_uri, headers, body) |
| 1331 |
| 1332 (response, content) = self._conn_request(conn, request_uri, method, body
, headers) |
| 1333 |
| 1334 if auth: |
| 1335 if auth.response(response, body): |
| 1336 auth.request(method, request_uri, headers, body) |
| 1337 (response, content) = self._conn_request(conn, request_uri, meth
od, body, headers ) |
| 1338 response._stale_digest = 1 |
| 1339 |
| 1340 if response.status == 401: |
| 1341 for authorization in self._auth_from_challenge(host, request_uri, he
aders, response, content): |
| 1342 authorization.request(method, request_uri, headers, body) |
| 1343 (response, content) = self._conn_request(conn, request_uri, meth
od, body, headers, ) |
| 1344 if response.status != 401: |
| 1345 self.authorizations.append(authorization) |
| 1346 authorization.response(response, body) |
| 1347 break |
| 1348 |
| 1349 if (self.follow_all_redirects or (method in ["GET", "HEAD"]) or response
.status == 303): |
| 1350 if self.follow_redirects and response.status in [300, 301, 302, 303,
307]: |
| 1351 # Pick out the location header and basically start from the begi
nning |
| 1352 # remembering first to strip the ETag header and decrement our '
depth' |
| 1353 if redirections: |
| 1354 if not response.has_key('location') and response.status != 3
00: |
| 1355 raise RedirectMissingLocation( _("Redirected but the res
ponse is missing a Location: header."), response, content) |
| 1356 # Fix-up relative redirects (which violate an RFC 2616 MUST) |
| 1357 if response.has_key('location'): |
| 1358 location = response['location'] |
| 1359 (scheme, authority, path, query, fragment) = parse_uri(l
ocation) |
| 1360 if authority == None: |
| 1361 response['location'] = urlparse.urljoin(absolute_uri
, location) |
| 1362 if response.status == 301 and method in ["GET", "HEAD"]: |
| 1363 response['-x-permanent-redirect-url'] = response['locati
on'] |
| 1364 if not response.has_key('content-location'): |
| 1365 response['content-location'] = absolute_uri |
| 1366 _updateCache(headers, response, content, self.cache, cac
hekey) |
| 1367 if headers.has_key('if-none-match'): |
| 1368 del headers['if-none-match'] |
| 1369 if headers.has_key('if-modified-since'): |
| 1370 del headers['if-modified-since'] |
| 1371 if 'authorization' in headers and not self.forward_authoriza
tion_headers: |
| 1372 del headers['authorization'] |
| 1373 if response.has_key('location'): |
| 1374 location = response['location'] |
| 1375 old_response = copy.deepcopy(response) |
| 1376 if not old_response.has_key('content-location'): |
| 1377 old_response['content-location'] = absolute_uri |
| 1378 redirect_method = method |
| 1379 if response.status in [302, 303]: |
| 1380 redirect_method = "GET" |
| 1381 body = None |
| 1382 (response, content) = self.request(location, redirect_me
thod, body=body, headers = headers, redirections = redirections - 1) |
| 1383 response.previous = old_response |
| 1384 else: |
| 1385 raise RedirectLimit("Redirected more times than rediection_l
imit allows.", response, content) |
| 1386 elif response.status in [200, 203] and method in ["GET", "HEAD"]: |
| 1387 # Don't cache 206's since we aren't going to handle byte range r
equests |
| 1388 if not response.has_key('content-location'): |
| 1389 response['content-location'] = absolute_uri |
| 1390 _updateCache(headers, response, content, self.cache, cachekey) |
| 1391 |
| 1392 return (response, content) |
| 1393 |
| 1394 def _normalize_headers(self, headers): |
| 1395 return _normalize_headers(headers) |
| 1396 |
| 1397 # Need to catch and rebrand some exceptions |
| 1398 # Then need to optionally turn all exceptions into status codes |
| 1399 # including all socket.* and httplib.* exceptions. |
| 1400 |
| 1401 |
| 1402 def request(self, uri, method="GET", body=None, headers=None, redirections=D
EFAULT_MAX_REDIRECTS, connection_type=None): |
| 1403 """ Performs a single HTTP request. |
| 1404 The 'uri' is the URI of the HTTP resource and can begin |
| 1405 with either 'http' or 'https'. The value of 'uri' must be an absolute URI. |
| 1406 |
| 1407 The 'method' is the HTTP method to perform, such as GET, POST, DELETE, etc. |
| 1408 There is no restriction on the methods allowed. |
| 1409 |
| 1410 The 'body' is the entity body to be sent with the request. It is a string |
| 1411 object. |
| 1412 |
| 1413 Any extra headers that are to be sent with the request should be provided in the |
| 1414 'headers' dictionary. |
| 1415 |
| 1416 The maximum number of redirect to follow before raising an |
| 1417 exception is 'redirections. The default is 5. |
| 1418 |
| 1419 The return value is a tuple of (response, content), the first |
| 1420 being and instance of the 'Response' class, the second being |
| 1421 a string that contains the response entity body. |
| 1422 """ |
| 1423 try: |
| 1424 if headers is None: |
| 1425 headers = {} |
| 1426 else: |
| 1427 headers = self._normalize_headers(headers) |
| 1428 |
| 1429 if not headers.has_key('user-agent'): |
| 1430 headers['user-agent'] = "Python-httplib2/%s (gzip)" % __version_
_ |
| 1431 |
| 1432 uri = iri2uri(uri) |
| 1433 |
| 1434 (scheme, authority, request_uri, defrag_uri) = urlnorm(uri) |
| 1435 domain_port = authority.split(":")[0:2] |
| 1436 if len(domain_port) == 2 and domain_port[1] == '443' and scheme == '
http': |
| 1437 scheme = 'https' |
| 1438 authority = domain_port[0] |
| 1439 |
| 1440 proxy_info = self._get_proxy_info(scheme, authority) |
| 1441 |
| 1442 conn_key = scheme+":"+authority |
| 1443 if conn_key in self.connections: |
| 1444 conn = self.connections[conn_key] |
| 1445 else: |
| 1446 if not connection_type: |
| 1447 connection_type = SCHEME_TO_CONNECTION[scheme] |
| 1448 certs = list(self.certificates.iter(authority)) |
| 1449 if scheme == 'https': |
| 1450 if certs: |
| 1451 conn = self.connections[conn_key] = connection_type( |
| 1452 authority, key_file=certs[0][0], |
| 1453 cert_file=certs[0][1], timeout=self.timeout, |
| 1454 proxy_info=proxy_info, |
| 1455 ca_certs=self.ca_certs, |
| 1456 disable_ssl_certificate_validation= |
| 1457 self.disable_ssl_certificate_validation) |
| 1458 else: |
| 1459 conn = self.connections[conn_key] = connection_type( |
| 1460 authority, timeout=self.timeout, |
| 1461 proxy_info=proxy_info, |
| 1462 ca_certs=self.ca_certs, |
| 1463 disable_ssl_certificate_validation= |
| 1464 self.disable_ssl_certificate_validation) |
| 1465 else: |
| 1466 conn = self.connections[conn_key] = connection_type( |
| 1467 authority, timeout=self.timeout, |
| 1468 proxy_info=proxy_info) |
| 1469 conn.set_debuglevel(debuglevel) |
| 1470 |
| 1471 if 'range' not in headers and 'accept-encoding' not in headers: |
| 1472 headers['accept-encoding'] = 'gzip, deflate' |
| 1473 |
| 1474 info = email.Message.Message() |
| 1475 cached_value = None |
| 1476 if self.cache: |
| 1477 cachekey = defrag_uri |
| 1478 cached_value = self.cache.get(cachekey) |
| 1479 if cached_value: |
| 1480 # info = email.message_from_string(cached_value) |
| 1481 # |
| 1482 # Need to replace the line above with the kludge below |
| 1483 # to fix the non-existent bug not fixed in this |
| 1484 # bug report: http://mail.python.org/pipermail/python-bugs-l
ist/2005-September/030289.html |
| 1485 try: |
| 1486 info, content = cached_value.split('\r\n\r\n', 1) |
| 1487 feedparser = email.FeedParser.FeedParser() |
| 1488 feedparser.feed(info) |
| 1489 info = feedparser.close() |
| 1490 feedparser._parse = None |
| 1491 except (IndexError, ValueError): |
| 1492 self.cache.delete(cachekey) |
| 1493 cachekey = None |
| 1494 cached_value = None |
| 1495 else: |
| 1496 cachekey = None |
| 1497 |
| 1498 if method in self.optimistic_concurrency_methods and self.cache and
info.has_key('etag') and not self.ignore_etag and 'if-match' not in headers: |
| 1499 # http://www.w3.org/1999/04/Editing/ |
| 1500 headers['if-match'] = info['etag'] |
| 1501 |
| 1502 if method not in ["GET", "HEAD"] and self.cache and cachekey: |
| 1503 # RFC 2616 Section 13.10 |
| 1504 self.cache.delete(cachekey) |
| 1505 |
| 1506 # Check the vary header in the cache to see if this request |
| 1507 # matches what varies in the cache. |
| 1508 if method in ['GET', 'HEAD'] and 'vary' in info: |
| 1509 vary = info['vary'] |
| 1510 vary_headers = vary.lower().replace(' ', '').split(',') |
| 1511 for header in vary_headers: |
| 1512 key = '-varied-%s' % header |
| 1513 value = info[key] |
| 1514 if headers.get(header, None) != value: |
| 1515 cached_value = None |
| 1516 break |
| 1517 |
| 1518 if cached_value and method in ["GET", "HEAD"] and self.cache and 'ra
nge' not in headers: |
| 1519 if info.has_key('-x-permanent-redirect-url'): |
| 1520 # Should cached permanent redirects be counted in our redire
ction count? For now, yes. |
| 1521 if redirections <= 0: |
| 1522 raise RedirectLimit("Redirected more times than rediection
_limit allows.", {}, "") |
| 1523 (response, new_content) = self.request(info['-x-permanent-re
direct-url'], "GET", headers = headers, redirections = redirections - 1) |
| 1524 response.previous = Response(info) |
| 1525 response.previous.fromcache = True |
| 1526 else: |
| 1527 # Determine our course of action: |
| 1528 # Is the cached entry fresh or stale? |
| 1529 # Has the client requested a non-cached response? |
| 1530 # |
| 1531 # There seems to be three possible answers: |
| 1532 # 1. [FRESH] Return the cache entry w/o doing a GET |
| 1533 # 2. [STALE] Do the GET (but add in cache validators if avai
lable) |
| 1534 # 3. [TRANSPARENT] Do a GET w/o any cache validators (Cache-
Control: no-cache) on the request |
| 1535 entry_disposition = _entry_disposition(info, headers) |
| 1536 |
| 1537 if entry_disposition == "FRESH": |
| 1538 if not cached_value: |
| 1539 info['status'] = '504' |
| 1540 content = "" |
| 1541 response = Response(info) |
| 1542 if cached_value: |
| 1543 response.fromcache = True |
| 1544 return (response, content) |
| 1545 |
| 1546 if entry_disposition == "STALE": |
| 1547 if info.has_key('etag') and not self.ignore_etag and not
'if-none-match' in headers: |
| 1548 headers['if-none-match'] = info['etag'] |
| 1549 if info.has_key('last-modified') and not 'last-modified'
in headers: |
| 1550 headers['if-modified-since'] = info['last-modified'] |
| 1551 elif entry_disposition == "TRANSPARENT": |
| 1552 pass |
| 1553 |
| 1554 (response, new_content) = self._request(conn, authority, uri
, request_uri, method, body, headers, redirections, cachekey) |
| 1555 |
| 1556 if response.status == 304 and method == "GET": |
| 1557 # Rewrite the cache entry with the new end-to-end headers |
| 1558 # Take all headers that are in response |
| 1559 # and overwrite their values in info. |
| 1560 # unless they are hop-by-hop, or are listed in the connectio
n header. |
| 1561 |
| 1562 for key in _get_end2end_headers(response): |
| 1563 info[key] = response[key] |
| 1564 merged_response = Response(info) |
| 1565 if hasattr(response, "_stale_digest"): |
| 1566 merged_response._stale_digest = response._stale_digest |
| 1567 _updateCache(headers, merged_response, content, self.cache,
cachekey) |
| 1568 response = merged_response |
| 1569 response.status = 200 |
| 1570 response.fromcache = True |
| 1571 |
| 1572 elif response.status == 200: |
| 1573 content = new_content |
| 1574 else: |
| 1575 self.cache.delete(cachekey) |
| 1576 content = new_content |
| 1577 else: |
| 1578 cc = _parse_cache_control(headers) |
| 1579 if cc.has_key('only-if-cached'): |
| 1580 info['status'] = '504' |
| 1581 response = Response(info) |
| 1582 content = "" |
| 1583 else: |
| 1584 (response, content) = self._request(conn, authority, uri, re
quest_uri, method, body, headers, redirections, cachekey) |
| 1585 except Exception, e: |
| 1586 if self.force_exception_to_status_code: |
| 1587 if isinstance(e, HttpLib2ErrorWithResponse): |
| 1588 response = e.response |
| 1589 content = e.content |
| 1590 response.status = 500 |
| 1591 response.reason = str(e) |
| 1592 elif isinstance(e, socket.timeout): |
| 1593 content = "Request Timeout" |
| 1594 response = Response( { |
| 1595 "content-type": "text/plain", |
| 1596 "status": "408", |
| 1597 "content-length": len(content) |
| 1598 }) |
| 1599 response.reason = "Request Timeout" |
| 1600 else: |
| 1601 content = str(e) |
| 1602 response = Response( { |
| 1603 "content-type": "text/plain", |
| 1604 "status": "400", |
| 1605 "content-length": len(content) |
| 1606 }) |
| 1607 response.reason = "Bad Request" |
| 1608 else: |
| 1609 raise |
| 1610 |
| 1611 |
| 1612 return (response, content) |
| 1613 |
| 1614 def _get_proxy_info(self, scheme, authority): |
| 1615 """Return a ProxyInfo instance (or None) based on the scheme |
| 1616 and authority. |
| 1617 """ |
| 1618 hostname, port = urllib.splitport(authority) |
| 1619 proxy_info = self.proxy_info |
| 1620 if callable(proxy_info): |
| 1621 proxy_info = proxy_info(scheme) |
| 1622 |
| 1623 if (hasattr(proxy_info, 'applies_to') |
| 1624 and not proxy_info.applies_to(hostname)): |
| 1625 proxy_info = None |
| 1626 return proxy_info |
| 1627 |
| 1628 |
| 1629 class Response(dict): |
| 1630 """An object more like email.Message than httplib.HTTPResponse.""" |
| 1631 |
| 1632 """Is this response from our local cache""" |
| 1633 fromcache = False |
| 1634 |
| 1635 """HTTP protocol version used by server. 10 for HTTP/1.0, 11 for HTTP/1.1. "
"" |
| 1636 version = 11 |
| 1637 |
| 1638 "Status code returned by server. " |
| 1639 status = 200 |
| 1640 |
| 1641 """Reason phrase returned by server.""" |
| 1642 reason = "Ok" |
| 1643 |
| 1644 previous = None |
| 1645 |
| 1646 def __init__(self, info): |
| 1647 # info is either an email.Message or |
| 1648 # an httplib.HTTPResponse object. |
| 1649 if isinstance(info, httplib.HTTPResponse): |
| 1650 for key, value in info.getheaders(): |
| 1651 self[key.lower()] = value |
| 1652 self.status = info.status |
| 1653 self['status'] = str(self.status) |
| 1654 self.reason = info.reason |
| 1655 self.version = info.version |
| 1656 elif isinstance(info, email.Message.Message): |
| 1657 for key, value in info.items(): |
| 1658 self[key.lower()] = value |
| 1659 self.status = int(self['status']) |
| 1660 else: |
| 1661 for key, value in info.iteritems(): |
| 1662 self[key.lower()] = value |
| 1663 self.status = int(self.get('status', self.status)) |
| 1664 self.reason = self.get('reason', self.reason) |
| 1665 |
| 1666 |
| 1667 def __getattr__(self, name): |
| 1668 if name == 'dict': |
| 1669 return self |
| 1670 else: |
| 1671 raise AttributeError, name |
OLD | NEW |