Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(92)

Side by Side Diff: reviewbot/third_party/google-api-python-client/httplib2/__init__.py

Issue 20515002: Add google-api-python-client in third_party/ (Closed) Base URL: https://src.chromium.org/chrome/trunk/tools/
Patch Set: Created 7 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
Property Changes:
Added: svn:eol-style
+ LF
OLDNEW
(Empty)
1 from __future__ import generators
2 """
3 httplib2
4
5 A caching http interface that supports ETags and gzip
6 to conserve bandwidth.
7
8 Requires Python 2.3 or later
9
10 Changelog:
11 2007-08-18, Rick: Modified so it's able to use a socks proxy if needed.
12
13 """
14
15 __author__ = "Joe Gregorio (joe@bitworking.org)"
16 __copyright__ = "Copyright 2006, Joe Gregorio"
17 __contributors__ = ["Thomas Broyer (t.broyer@ltgt.net)",
18 "James Antill",
19 "Xavier Verges Farrero",
20 "Jonathan Feinberg",
21 "Blair Zajac",
22 "Sam Ruby",
23 "Louis Nyffenegger"]
24 __license__ = "MIT"
25 __version__ = "0.8"
26
27 import re
28 import sys
29 import email
30 import email.Utils
31 import email.Message
32 import email.FeedParser
33 import StringIO
34 import gzip
35 import zlib
36 import httplib
37 import urlparse
38 import urllib
39 import base64
40 import os
41 import copy
42 import calendar
43 import time
44 import random
45 import errno
46 try:
47 from hashlib import sha1 as _sha, md5 as _md5
48 except ImportError:
49 # prior to Python 2.5, these were separate modules
50 import sha
51 import md5
52 _sha = sha.new
53 _md5 = md5.new
54 import hmac
55 from gettext import gettext as _
56 import socket
57
58 try:
59 from httplib2 import socks
60 except ImportError:
61 try:
62 import socks
63 except (ImportError, AttributeError):
64 socks = None
65
66 # Build the appropriate socket wrapper for ssl
67 try:
68 import ssl # python 2.6
69 ssl_SSLError = ssl.SSLError
70 def _ssl_wrap_socket(sock, key_file, cert_file,
71 disable_validation, ca_certs):
72 if disable_validation:
73 cert_reqs = ssl.CERT_NONE
74 else:
75 cert_reqs = ssl.CERT_REQUIRED
76 # We should be specifying SSL version 3 or TLS v1, but the ssl module
77 # doesn't expose the necessary knobs. So we need to go with the default
78 # of SSLv23.
79 return ssl.wrap_socket(sock, keyfile=key_file, certfile=cert_file,
80 cert_reqs=cert_reqs, ca_certs=ca_certs)
81 except (AttributeError, ImportError):
82 ssl_SSLError = None
83 def _ssl_wrap_socket(sock, key_file, cert_file,
84 disable_validation, ca_certs):
85 if not disable_validation:
86 raise CertificateValidationUnsupported(
87 "SSL certificate validation is not supported without "
88 "the ssl module installed. To avoid this error, install "
89 "the ssl module, or explicity disable validation.")
90 ssl_sock = socket.ssl(sock, key_file, cert_file)
91 return httplib.FakeSocket(sock, ssl_sock)
92
93
94 if sys.version_info >= (2,3):
95 from iri2uri import iri2uri
96 else:
97 def iri2uri(uri):
98 return uri
99
100 def has_timeout(timeout): # python 2.6
101 if hasattr(socket, '_GLOBAL_DEFAULT_TIMEOUT'):
102 return (timeout is not None and timeout is not socket._GLOBAL_DEFAULT_TI MEOUT)
103 return (timeout is not None)
104
105 __all__ = [
106 'Http', 'Response', 'ProxyInfo', 'HttpLib2Error', 'RedirectMissingLocation',
107 'RedirectLimit', 'FailedToDecompressContent',
108 'UnimplementedDigestAuthOptionError',
109 'UnimplementedHmacDigestAuthOptionError',
110 'debuglevel', 'ProxiesUnavailableError']
111
112
113 # The httplib debug level, set to a non-zero value to get debug output
114 debuglevel = 0
115
116 # A request will be tried 'RETRIES' times if it fails at the socket/connection l evel.
117 RETRIES = 2
118
119 # Python 2.3 support
120 if sys.version_info < (2,4):
121 def sorted(seq):
122 seq.sort()
123 return seq
124
125 # Python 2.3 support
126 def HTTPResponse__getheaders(self):
127 """Return list of (header, value) tuples."""
128 if self.msg is None:
129 raise httplib.ResponseNotReady()
130 return self.msg.items()
131
132 if not hasattr(httplib.HTTPResponse, 'getheaders'):
133 httplib.HTTPResponse.getheaders = HTTPResponse__getheaders
134
135 # All exceptions raised here derive from HttpLib2Error
136 class HttpLib2Error(Exception): pass
137
138 # Some exceptions can be caught and optionally
139 # be turned back into responses.
140 class HttpLib2ErrorWithResponse(HttpLib2Error):
141 def __init__(self, desc, response, content):
142 self.response = response
143 self.content = content
144 HttpLib2Error.__init__(self, desc)
145
146 class RedirectMissingLocation(HttpLib2ErrorWithResponse): pass
147 class RedirectLimit(HttpLib2ErrorWithResponse): pass
148 class FailedToDecompressContent(HttpLib2ErrorWithResponse): pass
149 class UnimplementedDigestAuthOptionError(HttpLib2ErrorWithResponse): pass
150 class UnimplementedHmacDigestAuthOptionError(HttpLib2ErrorWithResponse): pass
151
152 class MalformedHeader(HttpLib2Error): pass
153 class RelativeURIError(HttpLib2Error): pass
154 class ServerNotFoundError(HttpLib2Error): pass
155 class ProxiesUnavailableError(HttpLib2Error): pass
156 class CertificateValidationUnsupported(HttpLib2Error): pass
157 class SSLHandshakeError(HttpLib2Error): pass
158 class NotSupportedOnThisPlatform(HttpLib2Error): pass
159 class CertificateHostnameMismatch(SSLHandshakeError):
160 def __init__(self, desc, host, cert):
161 HttpLib2Error.__init__(self, desc)
162 self.host = host
163 self.cert = cert
164
165 # Open Items:
166 # -----------
167 # Proxy support
168
169 # Are we removing the cached content too soon on PUT (only delete on 200 Maybe?)
170
171 # Pluggable cache storage (supports storing the cache in
172 # flat files by default. We need a plug-in architecture
173 # that can support Berkeley DB and Squid)
174
175 # == Known Issues ==
176 # Does not handle a resource that uses conneg and Last-Modified but no ETag as a cache validator.
177 # Does not handle Cache-Control: max-stale
178 # Does not use Age: headers when calculating cache freshness.
179
180
181 # The number of redirections to follow before giving up.
182 # Note that only GET redirects are automatically followed.
183 # Will also honor 301 requests by saving that info and never
184 # requesting that URI again.
185 DEFAULT_MAX_REDIRECTS = 5
186
187 try:
188 # Users can optionally provide a module that tells us where the CA_CERTS
189 # are located.
190 import ca_certs_locater
191 CA_CERTS = ca_certs_locater.get()
192 except ImportError:
193 # Default CA certificates file bundled with httplib2.
194 CA_CERTS = os.path.join(
195 os.path.dirname(os.path.abspath(__file__ )), "cacerts.txt")
196
197 # Which headers are hop-by-hop headers by default
198 HOP_BY_HOP = ['connection', 'keep-alive', 'proxy-authenticate', 'proxy-authoriza tion', 'te', 'trailers', 'transfer-encoding', 'upgrade']
199
200 def _get_end2end_headers(response):
201 hopbyhop = list(HOP_BY_HOP)
202 hopbyhop.extend([x.strip() for x in response.get('connection', '').split(',' )])
203 return [header for header in response.keys() if header not in hopbyhop]
204
205 URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?")
206
207 def parse_uri(uri):
208 """Parses a URI using the regex given in Appendix B of RFC 3986.
209
210 (scheme, authority, path, query, fragment) = parse_uri(uri)
211 """
212 groups = URI.match(uri).groups()
213 return (groups[1], groups[3], groups[4], groups[6], groups[8])
214
215 def urlnorm(uri):
216 (scheme, authority, path, query, fragment) = parse_uri(uri)
217 if not scheme or not authority:
218 raise RelativeURIError("Only absolute URIs are allowed. uri = %s" % uri)
219 authority = authority.lower()
220 scheme = scheme.lower()
221 if not path:
222 path = "/"
223 # Could do syntax based normalization of the URI before
224 # computing the digest. See Section 6.2.2 of Std 66.
225 request_uri = query and "?".join([path, query]) or path
226 scheme = scheme.lower()
227 defrag_uri = scheme + "://" + authority + request_uri
228 return scheme, authority, request_uri, defrag_uri
229
230
231 # Cache filename construction (original borrowed from Venus http://intertwingly. net/code/venus/)
232 re_url_scheme = re.compile(r'^\w+://')
233 re_slash = re.compile(r'[?/:|]+')
234
235 def safename(filename):
236 """Return a filename suitable for the cache.
237
238 Strips dangerous and common characters to create a filename we
239 can use to store the cache in.
240 """
241
242 try:
243 if re_url_scheme.match(filename):
244 if isinstance(filename,str):
245 filename = filename.decode('utf-8')
246 filename = filename.encode('idna')
247 else:
248 filename = filename.encode('idna')
249 except UnicodeError:
250 pass
251 if isinstance(filename,unicode):
252 filename=filename.encode('utf-8')
253 filemd5 = _md5(filename).hexdigest()
254 filename = re_url_scheme.sub("", filename)
255 filename = re_slash.sub(",", filename)
256
257 # limit length of filename
258 if len(filename)>200:
259 filename=filename[:200]
260 return ",".join((filename, filemd5))
261
262 NORMALIZE_SPACE = re.compile(r'(?:\r\n)?[ \t]+')
263 def _normalize_headers(headers):
264 return dict([ (key.lower(), NORMALIZE_SPACE.sub(value, ' ').strip()) for (k ey, value) in headers.iteritems()])
265
266 def _parse_cache_control(headers):
267 retval = {}
268 if headers.has_key('cache-control'):
269 parts = headers['cache-control'].split(',')
270 parts_with_args = [tuple([x.strip().lower() for x in part.split("=", 1)] ) for part in parts if -1 != part.find("=")]
271 parts_wo_args = [(name.strip().lower(), 1) for name in parts if -1 == na me.find("=")]
272 retval = dict(parts_with_args + parts_wo_args)
273 return retval
274
275 # Whether to use a strict mode to parse WWW-Authenticate headers
276 # Might lead to bad results in case of ill-formed header value,
277 # so disabled by default, falling back to relaxed parsing.
278 # Set to true to turn on, usefull for testing servers.
279 USE_WWW_AUTH_STRICT_PARSING = 0
280
281 # In regex below:
282 # [^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+ matches a "token" a s defined by HTTP
283 # "(?:[^\0-\x08\x0A-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?" matches a "quoted-s tring" as defined by HTTP, when LWS have already been replaced by a single space
284 # Actually, as an auth-param value can be either a token or a quoted-string, the y are combined in a single pattern which matches both:
285 # \"?((?<=\")(?:[^\0-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?(?=\")|(?<!\")[^\0-\x08 \x0A-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+(?!\"))\"?
286 WWW_AUTH_STRICT = re.compile(r"^(?:\s*(?:,\s*)?([^\0-\x1f\x7f-\xff()<>@,;:\\\"/[ \]?={} \t]+)\s*=\s*\"?((?<=\")(?:[^\0-\x08\x0A-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*? (?=\")|(?<!\")[^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+(?!\"))\"?)(.*)$")
287 WWW_AUTH_RELAXED = re.compile(r"^(?:\s*(?:,\s*)?([^ \t\r\n=]+)\s*=\s*\"?((?<=\") (?:[^\\\"]|\\.)*?(?=\")|(?<!\")[^ \t\r\n,]+(?!\"))\"?)(.*)$")
288 UNQUOTE_PAIRS = re.compile(r'\\(.)')
289 def _parse_www_authenticate(headers, headername='www-authenticate'):
290 """Returns a dictionary of dictionaries, one dict
291 per auth_scheme."""
292 retval = {}
293 if headers.has_key(headername):
294 try:
295
296 authenticate = headers[headername].strip()
297 www_auth = USE_WWW_AUTH_STRICT_PARSING and WWW_AUTH_STRICT or WWW_AU TH_RELAXED
298 while authenticate:
299 # Break off the scheme at the beginning of the line
300 if headername == 'authentication-info':
301 (auth_scheme, the_rest) = ('digest', authenticate)
302 else:
303 (auth_scheme, the_rest) = authenticate.split(" ", 1)
304 # Now loop over all the key value pairs that come after the sche me,
305 # being careful not to roll into the next scheme
306 match = www_auth.search(the_rest)
307 auth_params = {}
308 while match:
309 if match and len(match.groups()) == 3:
310 (key, value, the_rest) = match.groups()
311 auth_params[key.lower()] = UNQUOTE_PAIRS.sub(r'\1', valu e) # '\\'.join([x.replace('\\', '') for x in value.split('\\\\')])
312 match = www_auth.search(the_rest)
313 retval[auth_scheme.lower()] = auth_params
314 authenticate = the_rest.strip()
315
316 except ValueError:
317 raise MalformedHeader("WWW-Authenticate")
318 return retval
319
320
321 def _entry_disposition(response_headers, request_headers):
322 """Determine freshness from the Date, Expires and Cache-Control headers.
323
324 We don't handle the following:
325
326 1. Cache-Control: max-stale
327 2. Age: headers are not used in the calculations.
328
329 Not that this algorithm is simpler than you might think
330 because we are operating as a private (non-shared) cache.
331 This lets us ignore 's-maxage'. We can also ignore
332 'proxy-invalidate' since we aren't a proxy.
333 We will never return a stale document as
334 fresh as a design decision, and thus the non-implementation
335 of 'max-stale'. This also lets us safely ignore 'must-revalidate'
336 since we operate as if every server has sent 'must-revalidate'.
337 Since we are private we get to ignore both 'public' and
338 'private' parameters. We also ignore 'no-transform' since
339 we don't do any transformations.
340 The 'no-store' parameter is handled at a higher level.
341 So the only Cache-Control parameters we look at are:
342
343 no-cache
344 only-if-cached
345 max-age
346 min-fresh
347 """
348
349 retval = "STALE"
350 cc = _parse_cache_control(request_headers)
351 cc_response = _parse_cache_control(response_headers)
352
353 if request_headers.has_key('pragma') and request_headers['pragma'].lower().f ind('no-cache') != -1:
354 retval = "TRANSPARENT"
355 if 'cache-control' not in request_headers:
356 request_headers['cache-control'] = 'no-cache'
357 elif cc.has_key('no-cache'):
358 retval = "TRANSPARENT"
359 elif cc_response.has_key('no-cache'):
360 retval = "STALE"
361 elif cc.has_key('only-if-cached'):
362 retval = "FRESH"
363 elif response_headers.has_key('date'):
364 date = calendar.timegm(email.Utils.parsedate_tz(response_headers['date'] ))
365 now = time.time()
366 current_age = max(0, now - date)
367 if cc_response.has_key('max-age'):
368 try:
369 freshness_lifetime = int(cc_response['max-age'])
370 except ValueError:
371 freshness_lifetime = 0
372 elif response_headers.has_key('expires'):
373 expires = email.Utils.parsedate_tz(response_headers['expires'])
374 if None == expires:
375 freshness_lifetime = 0
376 else:
377 freshness_lifetime = max(0, calendar.timegm(expires) - date)
378 else:
379 freshness_lifetime = 0
380 if cc.has_key('max-age'):
381 try:
382 freshness_lifetime = int(cc['max-age'])
383 except ValueError:
384 freshness_lifetime = 0
385 if cc.has_key('min-fresh'):
386 try:
387 min_fresh = int(cc['min-fresh'])
388 except ValueError:
389 min_fresh = 0
390 current_age += min_fresh
391 if freshness_lifetime > current_age:
392 retval = "FRESH"
393 return retval
394
395 def _decompressContent(response, new_content):
396 content = new_content
397 try:
398 encoding = response.get('content-encoding', None)
399 if encoding in ['gzip', 'deflate']:
400 if encoding == 'gzip':
401 content = gzip.GzipFile(fileobj=StringIO.StringIO(new_content)). read()
402 if encoding == 'deflate':
403 content = zlib.decompress(content)
404 response['content-length'] = str(len(content))
405 # Record the historical presence of the encoding in a way the won't interfere.
406 response['-content-encoding'] = response['content-encoding']
407 del response['content-encoding']
408 except IOError:
409 content = ""
410 raise FailedToDecompressContent(_("Content purported to be compressed wi th %s but failed to decompress.") % response.get('content-encoding'), response, content)
411 return content
412
413 def _updateCache(request_headers, response_headers, content, cache, cachekey):
414 if cachekey:
415 cc = _parse_cache_control(request_headers)
416 cc_response = _parse_cache_control(response_headers)
417 if cc.has_key('no-store') or cc_response.has_key('no-store'):
418 cache.delete(cachekey)
419 else:
420 info = email.Message.Message()
421 for key, value in response_headers.iteritems():
422 if key not in ['status','content-encoding','transfer-encoding']:
423 info[key] = value
424
425 # Add annotations to the cache to indicate what headers
426 # are variant for this request.
427 vary = response_headers.get('vary', None)
428 if vary:
429 vary_headers = vary.lower().replace(' ', '').split(',')
430 for header in vary_headers:
431 key = '-varied-%s' % header
432 try:
433 info[key] = request_headers[header]
434 except KeyError:
435 pass
436
437 status = response_headers.status
438 if status == 304:
439 status = 200
440
441 status_header = 'status: %d\r\n' % status
442
443 header_str = info.as_string()
444
445 header_str = re.sub("\r(?!\n)|(?<!\r)\n", "\r\n", header_str)
446 text = "".join([status_header, header_str, content])
447
448 cache.set(cachekey, text)
449
450 def _cnonce():
451 dig = _md5("%s:%s" % (time.ctime(), ["0123456789"[random.randrange(0, 9)] fo r i in range(20)])).hexdigest()
452 return dig[:16]
453
454 def _wsse_username_token(cnonce, iso_now, password):
455 return base64.b64encode(_sha("%s%s%s" % (cnonce, iso_now, password)).digest( )).strip()
456
457
458 # For credentials we need two things, first
459 # a pool of credential to try (not necesarily tied to BAsic, Digest, etc.)
460 # Then we also need a list of URIs that have already demanded authentication
461 # That list is tricky since sub-URIs can take the same auth, or the
462 # auth scheme may change as you descend the tree.
463 # So we also need each Auth instance to be able to tell us
464 # how close to the 'top' it is.
465
466 class Authentication(object):
467 def __init__(self, credentials, host, request_uri, headers, response, conten t, http):
468 (scheme, authority, path, query, fragment) = parse_uri(request_uri)
469 self.path = path
470 self.host = host
471 self.credentials = credentials
472 self.http = http
473
474 def depth(self, request_uri):
475 (scheme, authority, path, query, fragment) = parse_uri(request_uri)
476 return request_uri[len(self.path):].count("/")
477
478 def inscope(self, host, request_uri):
479 # XXX Should we normalize the request_uri?
480 (scheme, authority, path, query, fragment) = parse_uri(request_uri)
481 return (host == self.host) and path.startswith(self.path)
482
483 def request(self, method, request_uri, headers, content):
484 """Modify the request headers to add the appropriate
485 Authorization header. Over-ride this in sub-classes."""
486 pass
487
488 def response(self, response, content):
489 """Gives us a chance to update with new nonces
490 or such returned from the last authorized response.
491 Over-rise this in sub-classes if necessary.
492
493 Return TRUE is the request is to be retried, for
494 example Digest may return stale=true.
495 """
496 return False
497
498
499
500 class BasicAuthentication(Authentication):
501 def __init__(self, credentials, host, request_uri, headers, response, conten t, http):
502 Authentication.__init__(self, credentials, host, request_uri, headers, r esponse, content, http)
503
504 def request(self, method, request_uri, headers, content):
505 """Modify the request headers to add the appropriate
506 Authorization header."""
507 headers['authorization'] = 'Basic ' + base64.b64encode("%s:%s" % self.cr edentials).strip()
508
509
510 class DigestAuthentication(Authentication):
511 """Only do qop='auth' and MD5, since that
512 is all Apache currently implements"""
513 def __init__(self, credentials, host, request_uri, headers, response, conten t, http):
514 Authentication.__init__(self, credentials, host, request_uri, headers, r esponse, content, http)
515 challenge = _parse_www_authenticate(response, 'www-authenticate')
516 self.challenge = challenge['digest']
517 qop = self.challenge.get('qop', 'auth')
518 self.challenge['qop'] = ('auth' in [x.strip() for x in qop.split()]) and 'auth' or None
519 if self.challenge['qop'] is None:
520 raise UnimplementedDigestAuthOptionError( _("Unsupported value for q op: %s." % qop))
521 self.challenge['algorithm'] = self.challenge.get('algorithm', 'MD5').upp er()
522 if self.challenge['algorithm'] != 'MD5':
523 raise UnimplementedDigestAuthOptionError( _("Unsupported value for a lgorithm: %s." % self.challenge['algorithm']))
524 self.A1 = "".join([self.credentials[0], ":", self.challenge['realm'], ": ", self.credentials[1]])
525 self.challenge['nc'] = 1
526
527 def request(self, method, request_uri, headers, content, cnonce = None):
528 """Modify the request headers"""
529 H = lambda x: _md5(x).hexdigest()
530 KD = lambda s, d: H("%s:%s" % (s, d))
531 A2 = "".join([method, ":", request_uri])
532 self.challenge['cnonce'] = cnonce or _cnonce()
533 request_digest = '"%s"' % KD(H(self.A1), "%s:%s:%s:%s:%s" % (
534 self.challenge['nonce'],
535 '%08x' % self.challenge['nc'],
536 self.challenge['cnonce'],
537 self.challenge['qop'], H(A2)))
538 headers['authorization'] = 'Digest username="%s", realm="%s", nonce="%s" , uri="%s", algorithm=%s, response=%s, qop=%s, nc=%08x, cnonce="%s"' % (
539 self.credentials[0],
540 self.challenge['realm'],
541 self.challenge['nonce'],
542 request_uri,
543 self.challenge['algorithm'],
544 request_digest,
545 self.challenge['qop'],
546 self.challenge['nc'],
547 self.challenge['cnonce'])
548 if self.challenge.get('opaque'):
549 headers['authorization'] += ', opaque="%s"' % self.challenge['opaque ']
550 self.challenge['nc'] += 1
551
552 def response(self, response, content):
553 if not response.has_key('authentication-info'):
554 challenge = _parse_www_authenticate(response, 'www-authenticate').ge t('digest', {})
555 if 'true' == challenge.get('stale'):
556 self.challenge['nonce'] = challenge['nonce']
557 self.challenge['nc'] = 1
558 return True
559 else:
560 updated_challenge = _parse_www_authenticate(response, 'authenticatio n-info').get('digest', {})
561
562 if updated_challenge.has_key('nextnonce'):
563 self.challenge['nonce'] = updated_challenge['nextnonce']
564 self.challenge['nc'] = 1
565 return False
566
567
568 class HmacDigestAuthentication(Authentication):
569 """Adapted from Robert Sayre's code and DigestAuthentication above."""
570 __author__ = "Thomas Broyer (t.broyer@ltgt.net)"
571
572 def __init__(self, credentials, host, request_uri, headers, response, conten t, http):
573 Authentication.__init__(self, credentials, host, request_uri, headers, r esponse, content, http)
574 challenge = _parse_www_authenticate(response, 'www-authenticate')
575 self.challenge = challenge['hmacdigest']
576 # TODO: self.challenge['domain']
577 self.challenge['reason'] = self.challenge.get('reason', 'unauthorized')
578 if self.challenge['reason'] not in ['unauthorized', 'integrity']:
579 self.challenge['reason'] = 'unauthorized'
580 self.challenge['salt'] = self.challenge.get('salt', '')
581 if not self.challenge.get('snonce'):
582 raise UnimplementedHmacDigestAuthOptionError( _("The challenge doesn 't contain a server nonce, or this one is empty."))
583 self.challenge['algorithm'] = self.challenge.get('algorithm', 'HMAC-SHA- 1')
584 if self.challenge['algorithm'] not in ['HMAC-SHA-1', 'HMAC-MD5']:
585 raise UnimplementedHmacDigestAuthOptionError( _("Unsupported value f or algorithm: %s." % self.challenge['algorithm']))
586 self.challenge['pw-algorithm'] = self.challenge.get('pw-algorithm', 'SHA -1')
587 if self.challenge['pw-algorithm'] not in ['SHA-1', 'MD5']:
588 raise UnimplementedHmacDigestAuthOptionError( _("Unsupported value f or pw-algorithm: %s." % self.challenge['pw-algorithm']))
589 if self.challenge['algorithm'] == 'HMAC-MD5':
590 self.hashmod = _md5
591 else:
592 self.hashmod = _sha
593 if self.challenge['pw-algorithm'] == 'MD5':
594 self.pwhashmod = _md5
595 else:
596 self.pwhashmod = _sha
597 self.key = "".join([self.credentials[0], ":",
598 self.pwhashmod.new("".join([self.credentials[1], sel f.challenge['salt']])).hexdigest().lower(),
599 ":", self.challenge['realm']])
600 self.key = self.pwhashmod.new(self.key).hexdigest().lower()
601
602 def request(self, method, request_uri, headers, content):
603 """Modify the request headers"""
604 keys = _get_end2end_headers(headers)
605 keylist = "".join(["%s " % k for k in keys])
606 headers_val = "".join([headers[k] for k in keys])
607 created = time.strftime('%Y-%m-%dT%H:%M:%SZ',time.gmtime())
608 cnonce = _cnonce()
609 request_digest = "%s:%s:%s:%s:%s" % (method, request_uri, cnonce, self.c hallenge['snonce'], headers_val)
610 request_digest = hmac.new(self.key, request_digest, self.hashmod).hexdi gest().lower()
611 headers['authorization'] = 'HMACDigest username="%s", realm="%s", snonce ="%s", cnonce="%s", uri="%s", created="%s", response="%s", headers="%s"' % (
612 self.credentials[0],
613 self.challenge['realm'],
614 self.challenge['snonce'],
615 cnonce,
616 request_uri,
617 created,
618 request_digest,
619 keylist)
620
621 def response(self, response, content):
622 challenge = _parse_www_authenticate(response, 'www-authenticate').get('h macdigest', {})
623 if challenge.get('reason') in ['integrity', 'stale']:
624 return True
625 return False
626
627
628 class WsseAuthentication(Authentication):
629 """This is thinly tested and should not be relied upon.
630 At this time there isn't any third party server to test against.
631 Blogger and TypePad implemented this algorithm at one point
632 but Blogger has since switched to Basic over HTTPS and
633 TypePad has implemented it wrong, by never issuing a 401
634 challenge but instead requiring your client to telepathically know that
635 their endpoint is expecting WSSE profile="UsernameToken"."""
636 def __init__(self, credentials, host, request_uri, headers, response, conten t, http):
637 Authentication.__init__(self, credentials, host, request_uri, headers, r esponse, content, http)
638
639 def request(self, method, request_uri, headers, content):
640 """Modify the request headers to add the appropriate
641 Authorization header."""
642 headers['authorization'] = 'WSSE profile="UsernameToken"'
643 iso_now = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
644 cnonce = _cnonce()
645 password_digest = _wsse_username_token(cnonce, iso_now, self.credentials [1])
646 headers['X-WSSE'] = 'UsernameToken Username="%s", PasswordDigest="%s", N once="%s", Created="%s"' % (
647 self.credentials[0],
648 password_digest,
649 cnonce,
650 iso_now)
651
652 class GoogleLoginAuthentication(Authentication):
653 def __init__(self, credentials, host, request_uri, headers, response, conten t, http):
654 from urllib import urlencode
655 Authentication.__init__(self, credentials, host, request_uri, headers, r esponse, content, http)
656 challenge = _parse_www_authenticate(response, 'www-authenticate')
657 service = challenge['googlelogin'].get('service', 'xapi')
658 # Bloggger actually returns the service in the challenge
659 # For the rest we guess based on the URI
660 if service == 'xapi' and request_uri.find("calendar") > 0:
661 service = "cl"
662 # No point in guessing Base or Spreadsheet
663 #elif request_uri.find("spreadsheets") > 0:
664 # service = "wise"
665
666 auth = dict(Email=credentials[0], Passwd=credentials[1], service=service , source=headers['user-agent'])
667 resp, content = self.http.request("https://www.google.com/accounts/Clien tLogin", method="POST", body=urlencode(auth), headers={'Content-Type': 'applicat ion/x-www-form-urlencoded'})
668 lines = content.split('\n')
669 d = dict([tuple(line.split("=", 1)) for line in lines if line])
670 if resp.status == 403:
671 self.Auth = ""
672 else:
673 self.Auth = d['Auth']
674
675 def request(self, method, request_uri, headers, content):
676 """Modify the request headers to add the appropriate
677 Authorization header."""
678 headers['authorization'] = 'GoogleLogin Auth=' + self.Auth
679
680
681 AUTH_SCHEME_CLASSES = {
682 "basic": BasicAuthentication,
683 "wsse": WsseAuthentication,
684 "digest": DigestAuthentication,
685 "hmacdigest": HmacDigestAuthentication,
686 "googlelogin": GoogleLoginAuthentication
687 }
688
689 AUTH_SCHEME_ORDER = ["hmacdigest", "googlelogin", "digest", "wsse", "basic"]
690
691 class FileCache(object):
692 """Uses a local directory as a store for cached files.
693 Not really safe to use if multiple threads or processes are going to
694 be running on the same cache.
695 """
696 def __init__(self, cache, safe=safename): # use safe=lambda x: md5.new(x).he xdigest() for the old behavior
697 self.cache = cache
698 self.safe = safe
699 if not os.path.exists(cache):
700 os.makedirs(self.cache)
701
702 def get(self, key):
703 retval = None
704 cacheFullPath = os.path.join(self.cache, self.safe(key))
705 try:
706 f = file(cacheFullPath, "rb")
707 retval = f.read()
708 f.close()
709 except IOError:
710 pass
711 return retval
712
713 def set(self, key, value):
714 cacheFullPath = os.path.join(self.cache, self.safe(key))
715 f = file(cacheFullPath, "wb")
716 f.write(value)
717 f.close()
718
719 def delete(self, key):
720 cacheFullPath = os.path.join(self.cache, self.safe(key))
721 if os.path.exists(cacheFullPath):
722 os.remove(cacheFullPath)
723
724 class Credentials(object):
725 def __init__(self):
726 self.credentials = []
727
728 def add(self, name, password, domain=""):
729 self.credentials.append((domain.lower(), name, password))
730
731 def clear(self):
732 self.credentials = []
733
734 def iter(self, domain):
735 for (cdomain, name, password) in self.credentials:
736 if cdomain == "" or domain == cdomain:
737 yield (name, password)
738
739 class KeyCerts(Credentials):
740 """Identical to Credentials except that
741 name/password are mapped to key/cert."""
742 pass
743
744 class AllHosts(object):
745 pass
746
747 class ProxyInfo(object):
748 """Collect information required to use a proxy."""
749 bypass_hosts = ()
750
751 def __init__(self, proxy_type, proxy_host, proxy_port,
752 proxy_rdns=None, proxy_user=None, proxy_pass=None):
753 """The parameter proxy_type must be set to one of socks.PROXY_TYPE_XXX
754 constants. For example:
755
756 p = ProxyInfo(proxy_type=socks.PROXY_TYPE_HTTP,
757 proxy_host='localhost', proxy_port=8000)
758 """
759 self.proxy_type = proxy_type
760 self.proxy_host = proxy_host
761 self.proxy_port = proxy_port
762 self.proxy_rdns = proxy_rdns
763 self.proxy_user = proxy_user
764 self.proxy_pass = proxy_pass
765
766 def astuple(self):
767 return (self.proxy_type, self.proxy_host, self.proxy_port,
768 self.proxy_rdns, self.proxy_user, self.proxy_pass)
769
770 def isgood(self):
771 return (self.proxy_host != None) and (self.proxy_port != None)
772
773 def applies_to(self, hostname):
774 return not self.bypass_host(hostname)
775
776 def bypass_host(self, hostname):
777 """Has this host been excluded from the proxy config"""
778 if self.bypass_hosts is AllHosts:
779 return True
780
781 bypass = False
782 for domain in self.bypass_hosts:
783 if hostname.endswith(domain):
784 bypass = True
785
786 return bypass
787
788
789 def proxy_info_from_environment(method='http'):
790 """
791 Read proxy info from the environment variables.
792 """
793 if method not in ['http', 'https']:
794 return
795
796 env_var = method + '_proxy'
797 url = os.environ.get(env_var, os.environ.get(env_var.upper()))
798 if not url:
799 return
800 pi = proxy_info_from_url(url, method)
801
802 no_proxy = os.environ.get('no_proxy', os.environ.get('NO_PROXY', ''))
803 bypass_hosts = []
804 if no_proxy:
805 bypass_hosts = no_proxy.split(',')
806 # special case, no_proxy=* means all hosts bypassed
807 if no_proxy == '*':
808 bypass_hosts = AllHosts
809
810 pi.bypass_hosts = bypass_hosts
811 return pi
812
813 def proxy_info_from_url(url, method='http'):
814 """
815 Construct a ProxyInfo from a URL (such as http_proxy env var)
816 """
817 url = urlparse.urlparse(url)
818 username = None
819 password = None
820 port = None
821 if '@' in url[1]:
822 ident, host_port = url[1].split('@', 1)
823 if ':' in ident:
824 username, password = ident.split(':', 1)
825 else:
826 password = ident
827 else:
828 host_port = url[1]
829 if ':' in host_port:
830 host, port = host_port.split(':', 1)
831 else:
832 host = host_port
833
834 if port:
835 port = int(port)
836 else:
837 port = dict(https=443, http=80)[method]
838
839 proxy_type = 3 # socks.PROXY_TYPE_HTTP
840 return ProxyInfo(
841 proxy_type = proxy_type,
842 proxy_host = host,
843 proxy_port = port,
844 proxy_user = username or None,
845 proxy_pass = password or None,
846 )
847
848
849 class HTTPConnectionWithTimeout(httplib.HTTPConnection):
850 """
851 HTTPConnection subclass that supports timeouts
852
853 All timeouts are in seconds. If None is passed for timeout then
854 Python's default timeout for sockets will be used. See for example
855 the docs of socket.setdefaulttimeout():
856 http://docs.python.org/library/socket.html#socket.setdefaulttimeout
857 """
858
859 def __init__(self, host, port=None, strict=None, timeout=None, proxy_info=No ne):
860 httplib.HTTPConnection.__init__(self, host, port, strict)
861 self.timeout = timeout
862 self.proxy_info = proxy_info
863
864 def connect(self):
865 """Connect to the host and port specified in __init__."""
866 # Mostly verbatim from httplib.py.
867 if self.proxy_info and socks is None:
868 raise ProxiesUnavailableError(
869 'Proxy support missing but proxy use was requested!')
870 msg = "getaddrinfo returns an empty list"
871 if self.proxy_info and self.proxy_info.isgood():
872 use_proxy = True
873 proxy_type, proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pa ss = self.proxy_info.astuple()
874 else:
875 use_proxy = False
876 if use_proxy and proxy_rdns:
877 host = proxy_host
878 port = proxy_port
879 else:
880 host = self.host
881 port = self.port
882
883 for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):
884 af, socktype, proto, canonname, sa = res
885 try:
886 if use_proxy:
887 self.sock = socks.socksocket(af, socktype, proto)
888 self.sock.setproxy(proxy_type, proxy_host, proxy_port, proxy _rdns, proxy_user, proxy_pass)
889 else:
890 self.sock = socket.socket(af, socktype, proto)
891 self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
892 # Different from httplib: support timeouts.
893 if has_timeout(self.timeout):
894 self.sock.settimeout(self.timeout)
895 # End of difference from httplib.
896 if self.debuglevel > 0:
897 print "connect: (%s, %s) ************" % (self.host, self.po rt)
898 if use_proxy:
899 print "proxy: %s ************" % str((proxy_host, proxy_ port, proxy_rdns, proxy_user, proxy_pass))
900
901 self.sock.connect((self.host, self.port) + sa[2:])
902 except socket.error, msg:
903 if self.debuglevel > 0:
904 print "connect fail: (%s, %s)" % (self.host, self.port)
905 if use_proxy:
906 print "proxy: %s" % str((proxy_host, proxy_port, proxy_r dns, proxy_user, proxy_pass))
907 if self.sock:
908 self.sock.close()
909 self.sock = None
910 continue
911 break
912 if not self.sock:
913 raise socket.error, msg
914
915 class HTTPSConnectionWithTimeout(httplib.HTTPSConnection):
916 """
917 This class allows communication via SSL.
918
919 All timeouts are in seconds. If None is passed for timeout then
920 Python's default timeout for sockets will be used. See for example
921 the docs of socket.setdefaulttimeout():
922 http://docs.python.org/library/socket.html#socket.setdefaulttimeout
923 """
924 def __init__(self, host, port=None, key_file=None, cert_file=None,
925 strict=None, timeout=None, proxy_info=None,
926 ca_certs=None, disable_ssl_certificate_validation=False):
927 httplib.HTTPSConnection.__init__(self, host, port=port,
928 key_file=key_file,
929 cert_file=cert_file, strict=strict)
930 self.timeout = timeout
931 self.proxy_info = proxy_info
932 if ca_certs is None:
933 ca_certs = CA_CERTS
934 self.ca_certs = ca_certs
935 self.disable_ssl_certificate_validation = \
936 disable_ssl_certificate_validation
937
938 # The following two methods were adapted from https_wrapper.py, released
939 # with the Google Appengine SDK at
940 # http://googleappengine.googlecode.com/svn-history/r136/trunk/python/google /appengine/tools/https_wrapper.py
941 # under the following license:
942 #
943 # Copyright 2007 Google Inc.
944 #
945 # Licensed under the Apache License, Version 2.0 (the "License");
946 # you may not use this file except in compliance with the License.
947 # You may obtain a copy of the License at
948 #
949 # http://www.apache.org/licenses/LICENSE-2.0
950 #
951 # Unless required by applicable law or agreed to in writing, software
952 # distributed under the License is distributed on an "AS IS" BASIS,
953 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
954 # See the License for the specific language governing permissions and
955 # limitations under the License.
956 #
957
958 def _GetValidHostsForCert(self, cert):
959 """Returns a list of valid host globs for an SSL certificate.
960
961 Args:
962 cert: A dictionary representing an SSL certificate.
963 Returns:
964 list: A list of valid host globs.
965 """
966 if 'subjectAltName' in cert:
967 return [x[1] for x in cert['subjectAltName']
968 if x[0].lower() == 'dns']
969 else:
970 return [x[0][1] for x in cert['subject']
971 if x[0][0].lower() == 'commonname']
972
973 def _ValidateCertificateHostname(self, cert, hostname):
974 """Validates that a given hostname is valid for an SSL certificate.
975
976 Args:
977 cert: A dictionary representing an SSL certificate.
978 hostname: The hostname to test.
979 Returns:
980 bool: Whether or not the hostname is valid for this certificate.
981 """
982 hosts = self._GetValidHostsForCert(cert)
983 for host in hosts:
984 host_re = host.replace('.', '\.').replace('*', '[^.]*')
985 if re.search('^%s$' % (host_re,), hostname, re.I):
986 return True
987 return False
988
989 def connect(self):
990 "Connect to a host on a given (SSL) port."
991
992 msg = "getaddrinfo returns an empty list"
993 if self.proxy_info and self.proxy_info.isgood():
994 use_proxy = True
995 proxy_type, proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pa ss = self.proxy_info.astuple()
996 else:
997 use_proxy = False
998 if use_proxy and proxy_rdns:
999 host = proxy_host
1000 port = proxy_port
1001 else:
1002 host = self.host
1003 port = self.port
1004
1005 address_info = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
1006 for family, socktype, proto, canonname, sockaddr in address_info:
1007 try:
1008 if use_proxy:
1009 sock = socks.socksocket(family, socktype, proto)
1010
1011 sock.setproxy(proxy_type, proxy_host, proxy_port, proxy_rdns , proxy_user, proxy_pass)
1012 else:
1013 sock = socket.socket(family, socktype, proto)
1014 sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
1015
1016 if has_timeout(self.timeout):
1017 sock.settimeout(self.timeout)
1018 sock.connect((self.host, self.port))
1019 self.sock =_ssl_wrap_socket(
1020 sock, self.key_file, self.cert_file,
1021 self.disable_ssl_certificate_validation, self.ca_certs)
1022 if self.debuglevel > 0:
1023 print "connect: (%s, %s)" % (self.host, self.port)
1024 if use_proxy:
1025 print "proxy: %s" % str((proxy_host, proxy_port, proxy_r dns, proxy_user, proxy_pass))
1026 if not self.disable_ssl_certificate_validation:
1027 cert = self.sock.getpeercert()
1028 hostname = self.host.split(':', 0)[0]
1029 if not self._ValidateCertificateHostname(cert, hostname):
1030 raise CertificateHostnameMismatch(
1031 'Server presented certificate that does not match '
1032 'host %s: %s' % (hostname, cert), hostname, cert)
1033 except ssl_SSLError, e:
1034 if sock:
1035 sock.close()
1036 if self.sock:
1037 self.sock.close()
1038 self.sock = None
1039 # Unfortunately the ssl module doesn't seem to provide any way
1040 # to get at more detailed error information, in particular
1041 # whether the error is due to certificate validation or
1042 # something else (such as SSL protocol mismatch).
1043 if e.errno == ssl.SSL_ERROR_SSL:
1044 raise SSLHandshakeError(e)
1045 else:
1046 raise
1047 except (socket.timeout, socket.gaierror):
1048 raise
1049 except socket.error, msg:
1050 if self.debuglevel > 0:
1051 print "connect fail: (%s, %s)" % (self.host, self.port)
1052 if use_proxy:
1053 print "proxy: %s" % str((proxy_host, proxy_port, proxy_r dns, proxy_user, proxy_pass))
1054 if self.sock:
1055 self.sock.close()
1056 self.sock = None
1057 continue
1058 break
1059 if not self.sock:
1060 raise socket.error, msg
1061
1062 SCHEME_TO_CONNECTION = {
1063 'http': HTTPConnectionWithTimeout,
1064 'https': HTTPSConnectionWithTimeout
1065 }
1066
1067 # Use a different connection object for Google App Engine
1068 try:
1069 try:
1070 from google.appengine.api import apiproxy_stub_map
1071 if apiproxy_stub_map.apiproxy.GetStub('urlfetch') is None:
1072 raise ImportError # Bail out; we're not actually running on App Eng ine.
1073 from google.appengine.api.urlfetch import fetch
1074 from google.appengine.api.urlfetch import InvalidURLError
1075 except (ImportError, AttributeError):
1076 from google3.apphosting.api import apiproxy_stub_map
1077 if apiproxy_stub_map.apiproxy.GetStub('urlfetch') is None:
1078 raise ImportError # Bail out; we're not actually running on App Eng ine.
1079 from google3.apphosting.api.urlfetch import fetch
1080 from google3.apphosting.api.urlfetch import InvalidURLError
1081
1082 def _new_fixed_fetch(validate_certificate):
1083 def fixed_fetch(url, payload=None, method="GET", headers={},
1084 allow_truncated=False, follow_redirects=True,
1085 deadline=5):
1086 return fetch(url, payload=payload, method=method, headers=headers,
1087 allow_truncated=allow_truncated,
1088 follow_redirects=follow_redirects, deadline=deadline,
1089 validate_certificate=validate_certificate)
1090 return fixed_fetch
1091
1092 class AppEngineHttpConnection(httplib.HTTPConnection):
1093 """Use httplib on App Engine, but compensate for its weirdness.
1094
1095 The parameters key_file, cert_file, proxy_info, ca_certs, and
1096 disable_ssl_certificate_validation are all dropped on the ground.
1097 """
1098 def __init__(self, host, port=None, key_file=None, cert_file=None,
1099 strict=None, timeout=None, proxy_info=None, ca_certs=None,
1100 disable_ssl_certificate_validation=False):
1101 httplib.HTTPConnection.__init__(self, host, port=port,
1102 strict=strict, timeout=timeout)
1103
1104 class AppEngineHttpsConnection(httplib.HTTPSConnection):
1105 """Same as AppEngineHttpConnection, but for HTTPS URIs."""
1106 def __init__(self, host, port=None, key_file=None, cert_file=None,
1107 strict=None, timeout=None, proxy_info=None, ca_certs=None,
1108 disable_ssl_certificate_validation=False):
1109 httplib.HTTPSConnection.__init__(self, host, port=port,
1110 key_file=key_file,
1111 cert_file=cert_file, strict=strict,
1112 timeout=timeout)
1113 self._fetch = _new_fixed_fetch(
1114 not disable_ssl_certificate_validation)
1115
1116 # Update the connection classes to use the Googel App Engine specific ones.
1117 SCHEME_TO_CONNECTION = {
1118 'http': AppEngineHttpConnection,
1119 'https': AppEngineHttpsConnection
1120 }
1121 except (ImportError, AttributeError):
1122 pass
1123
1124
1125 class Http(object):
1126 """An HTTP client that handles:
1127
1128 - all methods
1129 - caching
1130 - ETags
1131 - compression,
1132 - HTTPS
1133 - Basic
1134 - Digest
1135 - WSSE
1136
1137 and more.
1138 """
1139 def __init__(self, cache=None, timeout=None,
1140 proxy_info=proxy_info_from_environment,
1141 ca_certs=None, disable_ssl_certificate_validation=False):
1142 """If 'cache' is a string then it is used as a directory name for
1143 a disk cache. Otherwise it must be an object that supports the
1144 same interface as FileCache.
1145
1146 All timeouts are in seconds. If None is passed for timeout
1147 then Python's default timeout for sockets will be used. See
1148 for example the docs of socket.setdefaulttimeout():
1149 http://docs.python.org/library/socket.html#socket.setdefaulttimeout
1150
1151 `proxy_info` may be:
1152 - a callable that takes the http scheme ('http' or 'https') and
1153 returns a ProxyInfo instance per request. By default, uses
1154 proxy_nfo_from_environment.
1155 - a ProxyInfo instance (static proxy config).
1156 - None (proxy disabled).
1157
1158 ca_certs is the path of a file containing root CA certificates for SSL
1159 server certificate validation. By default, a CA cert file bundled with
1160 httplib2 is used.
1161
1162 If disable_ssl_certificate_validation is true, SSL cert validation will
1163 not be performed.
1164 """
1165 self.proxy_info = proxy_info
1166 self.ca_certs = ca_certs
1167 self.disable_ssl_certificate_validation = \
1168 disable_ssl_certificate_validation
1169
1170 # Map domain name to an httplib connection
1171 self.connections = {}
1172 # The location of the cache, for now a directory
1173 # where cached responses are held.
1174 if cache and isinstance(cache, basestring):
1175 self.cache = FileCache(cache)
1176 else:
1177 self.cache = cache
1178
1179 # Name/password
1180 self.credentials = Credentials()
1181
1182 # Key/cert
1183 self.certificates = KeyCerts()
1184
1185 # authorization objects
1186 self.authorizations = []
1187
1188 # If set to False then no redirects are followed, even safe ones.
1189 self.follow_redirects = True
1190
1191 # Which HTTP methods do we apply optimistic concurrency to, i.e.
1192 # which methods get an "if-match:" etag header added to them.
1193 self.optimistic_concurrency_methods = ["PUT", "PATCH"]
1194
1195 # If 'follow_redirects' is True, and this is set to True then
1196 # all redirecs are followed, including unsafe ones.
1197 self.follow_all_redirects = False
1198
1199 self.ignore_etag = False
1200
1201 self.force_exception_to_status_code = False
1202
1203 self.timeout = timeout
1204
1205 # Keep Authorization: headers on a redirect.
1206 self.forward_authorization_headers = False
1207
1208 def __getstate__(self):
1209 state_dict = copy.copy(self.__dict__)
1210 # In case request is augmented by some foreign object such as
1211 # credentials which handle auth
1212 if 'request' in state_dict:
1213 del state_dict['request']
1214 if 'connections' in state_dict:
1215 del state_dict['connections']
1216 return state_dict
1217
1218 def __setstate__(self, state):
1219 self.__dict__.update(state)
1220 self.connections = {}
1221
1222 def _auth_from_challenge(self, host, request_uri, headers, response, content ):
1223 """A generator that creates Authorization objects
1224 that can be applied to requests.
1225 """
1226 challenges = _parse_www_authenticate(response, 'www-authenticate')
1227 for cred in self.credentials.iter(host):
1228 for scheme in AUTH_SCHEME_ORDER:
1229 if challenges.has_key(scheme):
1230 yield AUTH_SCHEME_CLASSES[scheme](cred, host, request_uri, h eaders, response, content, self)
1231
1232 def add_credentials(self, name, password, domain=""):
1233 """Add a name and password that will be used
1234 any time a request requires authentication."""
1235 self.credentials.add(name, password, domain)
1236
1237 def add_certificate(self, key, cert, domain):
1238 """Add a key and cert that will be used
1239 any time a request requires authentication."""
1240 self.certificates.add(key, cert, domain)
1241
1242 def clear_credentials(self):
1243 """Remove all the names and passwords
1244 that are used for authentication"""
1245 self.credentials.clear()
1246 self.authorizations = []
1247
1248 def _conn_request(self, conn, request_uri, method, body, headers):
1249 for i in range(RETRIES):
1250 try:
1251 if hasattr(conn, 'sock') and conn.sock is None:
1252 conn.connect()
1253 conn.request(method, request_uri, body, headers)
1254 except socket.timeout:
1255 raise
1256 except socket.gaierror:
1257 conn.close()
1258 raise ServerNotFoundError("Unable to find the server at %s" % co nn.host)
1259 except ssl_SSLError:
1260 conn.close()
1261 raise
1262 except socket.error, e:
1263 err = 0
1264 if hasattr(e, 'args'):
1265 err = getattr(e, 'args')[0]
1266 else:
1267 err = e.errno
1268 if err == errno.ECONNREFUSED: # Connection refused
1269 raise
1270 except httplib.HTTPException:
1271 # Just because the server closed the connection doesn't apparent ly mean
1272 # that the server didn't send a response.
1273 if hasattr(conn, 'sock') and conn.sock is None:
1274 if i < RETRIES-1:
1275 conn.close()
1276 conn.connect()
1277 continue
1278 else:
1279 conn.close()
1280 raise
1281 if i < RETRIES-1:
1282 conn.close()
1283 conn.connect()
1284 continue
1285 try:
1286 response = conn.getresponse()
1287 except (socket.error, httplib.HTTPException):
1288 if i < RETRIES-1:
1289 conn.close()
1290 conn.connect()
1291 continue
1292 else:
1293 conn.close()
1294 raise
1295 else:
1296 content = ""
1297 if method == "HEAD":
1298 conn.close()
1299 else:
1300 content = response.read()
1301 response = Response(response)
1302 if method != "HEAD":
1303 content = _decompressContent(response, content)
1304 break
1305 return (response, content)
1306
1307
1308 def _request(self, conn, host, absolute_uri, request_uri, method, body, head ers, redirections, cachekey):
1309 """Do the actual request using the connection object
1310 and also follow one level of redirects if necessary"""
1311
1312 auths = [(auth.depth(request_uri), auth) for auth in self.authorizations if auth.inscope(host, request_uri)]
1313 auth = auths and sorted(auths)[0][1] or None
1314 if auth:
1315 auth.request(method, request_uri, headers, body)
1316
1317 (response, content) = self._conn_request(conn, request_uri, method, body , headers)
1318
1319 if auth:
1320 if auth.response(response, body):
1321 auth.request(method, request_uri, headers, body)
1322 (response, content) = self._conn_request(conn, request_uri, meth od, body, headers )
1323 response._stale_digest = 1
1324
1325 if response.status == 401:
1326 for authorization in self._auth_from_challenge(host, request_uri, he aders, response, content):
1327 authorization.request(method, request_uri, headers, body)
1328 (response, content) = self._conn_request(conn, request_uri, meth od, body, headers, )
1329 if response.status != 401:
1330 self.authorizations.append(authorization)
1331 authorization.response(response, body)
1332 break
1333
1334 if (self.follow_all_redirects or (method in ["GET", "HEAD"]) or response .status == 303):
1335 if self.follow_redirects and response.status in [300, 301, 302, 303, 307]:
1336 # Pick out the location header and basically start from the begi nning
1337 # remembering first to strip the ETag header and decrement our ' depth'
1338 if redirections:
1339 if not response.has_key('location') and response.status != 3 00:
1340 raise RedirectMissingLocation( _("Redirected but the res ponse is missing a Location: header."), response, content)
1341 # Fix-up relative redirects (which violate an RFC 2616 MUST)
1342 if response.has_key('location'):
1343 location = response['location']
1344 (scheme, authority, path, query, fragment) = parse_uri(l ocation)
1345 if authority == None:
1346 response['location'] = urlparse.urljoin(absolute_uri , location)
1347 if response.status == 301 and method in ["GET", "HEAD"]:
1348 response['-x-permanent-redirect-url'] = response['locati on']
1349 if not response.has_key('content-location'):
1350 response['content-location'] = absolute_uri
1351 _updateCache(headers, response, content, self.cache, cac hekey)
1352 if headers.has_key('if-none-match'):
1353 del headers['if-none-match']
1354 if headers.has_key('if-modified-since'):
1355 del headers['if-modified-since']
1356 if 'authorization' in headers and not self.forward_authoriza tion_headers:
1357 del headers['authorization']
1358 if response.has_key('location'):
1359 location = response['location']
1360 old_response = copy.deepcopy(response)
1361 if not old_response.has_key('content-location'):
1362 old_response['content-location'] = absolute_uri
1363 redirect_method = method
1364 if response.status in [302, 303]:
1365 redirect_method = "GET"
1366 body = None
1367 (response, content) = self.request(location, redirect_me thod, body=body, headers = headers, redirections = redirections - 1)
1368 response.previous = old_response
1369 else:
1370 raise RedirectLimit("Redirected more times than rediection_l imit allows.", response, content)
1371 elif response.status in [200, 203] and method in ["GET", "HEAD"]:
1372 # Don't cache 206's since we aren't going to handle byte range r equests
1373 if not response.has_key('content-location'):
1374 response['content-location'] = absolute_uri
1375 _updateCache(headers, response, content, self.cache, cachekey)
1376
1377 return (response, content)
1378
1379 def _normalize_headers(self, headers):
1380 return _normalize_headers(headers)
1381
1382 # Need to catch and rebrand some exceptions
1383 # Then need to optionally turn all exceptions into status codes
1384 # including all socket.* and httplib.* exceptions.
1385
1386
1387 def request(self, uri, method="GET", body=None, headers=None, redirections=D EFAULT_MAX_REDIRECTS, connection_type=None):
1388 """ Performs a single HTTP request.
1389
1390 The 'uri' is the URI of the HTTP resource and can begin with either
1391 'http' or 'https'. The value of 'uri' must be an absolute URI.
1392
1393 The 'method' is the HTTP method to perform, such as GET, POST, DELETE,
1394 etc. There is no restriction on the methods allowed.
1395
1396 The 'body' is the entity body to be sent with the request. It is a
1397 string object.
1398
1399 Any extra headers that are to be sent with the request should be
1400 provided in the 'headers' dictionary.
1401
1402 The maximum number of redirect to follow before raising an
1403 exception is 'redirections. The default is 5.
1404
1405 The return value is a tuple of (response, content), the first
1406 being and instance of the 'Response' class, the second being
1407 a string that contains the response entity body.
1408 """
1409 try:
1410 if headers is None:
1411 headers = {}
1412 else:
1413 headers = self._normalize_headers(headers)
1414
1415 if not headers.has_key('user-agent'):
1416 headers['user-agent'] = "Python-httplib2/%s (gzip)" % __version_ _
1417
1418 uri = iri2uri(uri)
1419
1420 (scheme, authority, request_uri, defrag_uri) = urlnorm(uri)
1421 domain_port = authority.split(":")[0:2]
1422 if len(domain_port) == 2 and domain_port[1] == '443' and scheme == ' http':
1423 scheme = 'https'
1424 authority = domain_port[0]
1425
1426 proxy_info = self._get_proxy_info(scheme, authority)
1427
1428 conn_key = scheme+":"+authority
1429 if conn_key in self.connections:
1430 conn = self.connections[conn_key]
1431 else:
1432 if not connection_type:
1433 connection_type = SCHEME_TO_CONNECTION[scheme]
1434 certs = list(self.certificates.iter(authority))
1435 if scheme == 'https':
1436 if certs:
1437 conn = self.connections[conn_key] = connection_type(
1438 authority, key_file=certs[0][0],
1439 cert_file=certs[0][1], timeout=self.timeout,
1440 proxy_info=proxy_info,
1441 ca_certs=self.ca_certs,
1442 disable_ssl_certificate_validation=
1443 self.disable_ssl_certificate_validation)
1444 else:
1445 conn = self.connections[conn_key] = connection_type(
1446 authority, timeout=self.timeout,
1447 proxy_info=proxy_info,
1448 ca_certs=self.ca_certs,
1449 disable_ssl_certificate_validation=
1450 self.disable_ssl_certificate_validation)
1451 else:
1452 conn = self.connections[conn_key] = connection_type(
1453 authority, timeout=self.timeout,
1454 proxy_info=proxy_info)
1455 conn.set_debuglevel(debuglevel)
1456
1457 if 'range' not in headers and 'accept-encoding' not in headers:
1458 headers['accept-encoding'] = 'gzip, deflate'
1459
1460 info = email.Message.Message()
1461 cached_value = None
1462 if self.cache:
1463 cachekey = defrag_uri
1464 cached_value = self.cache.get(cachekey)
1465 if cached_value:
1466 # info = email.message_from_string(cached_value)
1467 #
1468 # Need to replace the line above with the kludge below
1469 # to fix the non-existent bug not fixed in this
1470 # bug report: http://mail.python.org/pipermail/python-bugs-l ist/2005-September/030289.html
1471 try:
1472 info, content = cached_value.split('\r\n\r\n', 1)
1473 feedparser = email.FeedParser.FeedParser()
1474 feedparser.feed(info)
1475 info = feedparser.close()
1476 feedparser._parse = None
1477 except (IndexError, ValueError):
1478 self.cache.delete(cachekey)
1479 cachekey = None
1480 cached_value = None
1481 else:
1482 cachekey = None
1483
1484 if method in self.optimistic_concurrency_methods and self.cache and info.has_key('etag') and not self.ignore_etag and 'if-match' not in headers:
1485 # http://www.w3.org/1999/04/Editing/
1486 headers['if-match'] = info['etag']
1487
1488 if method not in ["GET", "HEAD"] and self.cache and cachekey:
1489 # RFC 2616 Section 13.10
1490 self.cache.delete(cachekey)
1491
1492 # Check the vary header in the cache to see if this request
1493 # matches what varies in the cache.
1494 if method in ['GET', 'HEAD'] and 'vary' in info:
1495 vary = info['vary']
1496 vary_headers = vary.lower().replace(' ', '').split(',')
1497 for header in vary_headers:
1498 key = '-varied-%s' % header
1499 value = info[key]
1500 if headers.get(header, None) != value:
1501 cached_value = None
1502 break
1503
1504 if cached_value and method in ["GET", "HEAD"] and self.cache and 'ra nge' not in headers:
1505 if info.has_key('-x-permanent-redirect-url'):
1506 # Should cached permanent redirects be counted in our redire ction count? For now, yes.
1507 if redirections <= 0:
1508 raise RedirectLimit("Redirected more times than rediecti on_limit allows.", {}, "")
1509 (response, new_content) = self.request(info['-x-permanent-re direct-url'], "GET", headers = headers, redirections = redirections - 1)
1510 response.previous = Response(info)
1511 response.previous.fromcache = True
1512 else:
1513 # Determine our course of action:
1514 # Is the cached entry fresh or stale?
1515 # Has the client requested a non-cached response?
1516 #
1517 # There seems to be three possible answers:
1518 # 1. [FRESH] Return the cache entry w/o doing a GET
1519 # 2. [STALE] Do the GET (but add in cache validators if avai lable)
1520 # 3. [TRANSPARENT] Do a GET w/o any cache validators (Cache- Control: no-cache) on the request
1521 entry_disposition = _entry_disposition(info, headers)
1522
1523 if entry_disposition == "FRESH":
1524 if not cached_value:
1525 info['status'] = '504'
1526 content = ""
1527 response = Response(info)
1528 if cached_value:
1529 response.fromcache = True
1530 return (response, content)
1531
1532 if entry_disposition == "STALE":
1533 if info.has_key('etag') and not self.ignore_etag and not 'if-none-match' in headers:
1534 headers['if-none-match'] = info['etag']
1535 if info.has_key('last-modified') and not 'last-modified' in headers:
1536 headers['if-modified-since'] = info['last-modified']
1537 elif entry_disposition == "TRANSPARENT":
1538 pass
1539
1540 (response, new_content) = self._request(conn, authority, uri , request_uri, method, body, headers, redirections, cachekey)
1541
1542 if response.status == 304 and method == "GET":
1543 # Rewrite the cache entry with the new end-to-end headers
1544 # Take all headers that are in response
1545 # and overwrite their values in info.
1546 # unless they are hop-by-hop, or are listed in the connectio n header.
1547
1548 for key in _get_end2end_headers(response):
1549 info[key] = response[key]
1550 merged_response = Response(info)
1551 if hasattr(response, "_stale_digest"):
1552 merged_response._stale_digest = response._stale_digest
1553 _updateCache(headers, merged_response, content, self.cache, cachekey)
1554 response = merged_response
1555 response.status = 200
1556 response.fromcache = True
1557
1558 elif response.status == 200:
1559 content = new_content
1560 else:
1561 self.cache.delete(cachekey)
1562 content = new_content
1563 else:
1564 cc = _parse_cache_control(headers)
1565 if cc.has_key('only-if-cached'):
1566 info['status'] = '504'
1567 response = Response(info)
1568 content = ""
1569 else:
1570 (response, content) = self._request(conn, authority, uri, re quest_uri, method, body, headers, redirections, cachekey)
1571 except Exception, e:
1572 if self.force_exception_to_status_code:
1573 if isinstance(e, HttpLib2ErrorWithResponse):
1574 response = e.response
1575 content = e.content
1576 response.status = 500
1577 response.reason = str(e)
1578 elif isinstance(e, socket.timeout):
1579 content = "Request Timeout"
1580 response = Response({
1581 "content-type": "text/plain",
1582 "status": "408",
1583 "content-length": len(content)
1584 })
1585 response.reason = "Request Timeout"
1586 else:
1587 content = str(e)
1588 response = Response({
1589 "content-type": "text/plain",
1590 "status": "400",
1591 "content-length": len(content)
1592 })
1593 response.reason = "Bad Request"
1594 else:
1595 raise
1596
1597
1598 return (response, content)
1599
1600 def _get_proxy_info(self, scheme, authority):
1601 """Return a ProxyInfo instance (or None) based on the scheme
1602 and authority.
1603 """
1604 hostname, port = urllib.splitport(authority)
1605 proxy_info = self.proxy_info
1606 if callable(proxy_info):
1607 proxy_info = proxy_info(scheme)
1608
1609 if (hasattr(proxy_info, 'applies_to')
1610 and not proxy_info.applies_to(hostname)):
1611 proxy_info = None
1612 return proxy_info
1613
1614
1615 class Response(dict):
1616 """An object more like email.Message than httplib.HTTPResponse."""
1617
1618 """Is this response from our local cache"""
1619 fromcache = False
1620
1621 """HTTP protocol version used by server. 10 for HTTP/1.0, 11 for HTTP/1.1. " ""
1622 version = 11
1623
1624 "Status code returned by server. "
1625 status = 200
1626
1627 """Reason phrase returned by server."""
1628 reason = "Ok"
1629
1630 previous = None
1631
1632 def __init__(self, info):
1633 # info is either an email.Message or
1634 # an httplib.HTTPResponse object.
1635 if isinstance(info, httplib.HTTPResponse):
1636 for key, value in info.getheaders():
1637 self[key.lower()] = value
1638 self.status = info.status
1639 self['status'] = str(self.status)
1640 self.reason = info.reason
1641 self.version = info.version
1642 elif isinstance(info, email.Message.Message):
1643 for key, value in info.items():
1644 self[key.lower()] = value
1645 self.status = int(self['status'])
1646 else:
1647 for key, value in info.iteritems():
1648 self[key.lower()] = value
1649 self.status = int(self.get('status', self.status))
1650 self.reason = self.get('reason', self.reason)
1651
1652
1653 def __getattr__(self, name):
1654 if name == 'dict':
1655 return self
1656 else:
1657 raise AttributeError, name
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698