Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(7)

Side by Side Diff: third_party/google-endpoints/future/backports/urllib/request.py

Issue 2666783008: Add google-endpoints to third_party/. (Closed)
Patch Set: Created 3 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 """
2 Ported using Python-Future from the Python 3.3 standard library.
3
4 An extensible library for opening URLs using a variety of protocols
5
6 The simplest way to use this module is to call the urlopen function,
7 which accepts a string containing a URL or a Request object (described
8 below). It opens the URL and returns the results as file-like
9 object; the returned object has some extra methods described below.
10
11 The OpenerDirector manages a collection of Handler objects that do
12 all the actual work. Each Handler implements a particular protocol or
13 option. The OpenerDirector is a composite object that invokes the
14 Handlers needed to open the requested URL. For example, the
15 HTTPHandler performs HTTP GET and POST requests and deals with
16 non-error returns. The HTTPRedirectHandler automatically deals with
17 HTTP 301, 302, 303 and 307 redirect errors, and the HTTPDigestAuthHandler
18 deals with digest authentication.
19
20 urlopen(url, data=None) -- Basic usage is the same as original
21 urllib. pass the url and optionally data to post to an HTTP URL, and
22 get a file-like object back. One difference is that you can also pass
23 a Request instance instead of URL. Raises a URLError (subclass of
24 IOError); for HTTP errors, raises an HTTPError, which can also be
25 treated as a valid response.
26
27 build_opener -- Function that creates a new OpenerDirector instance.
28 Will install the default handlers. Accepts one or more Handlers as
29 arguments, either instances or Handler classes that it will
30 instantiate. If one of the argument is a subclass of the default
31 handler, the argument will be installed instead of the default.
32
33 install_opener -- Installs a new opener as the default opener.
34
35 objects of interest:
36
37 OpenerDirector -- Sets up the User Agent as the Python-urllib client and manages
38 the Handler classes, while dealing with requests and responses.
39
40 Request -- An object that encapsulates the state of a request. The
41 state can be as simple as the URL. It can also include extra HTTP
42 headers, e.g. a User-Agent.
43
44 BaseHandler --
45
46 internals:
47 BaseHandler and parent
48 _call_chain conventions
49
50 Example usage:
51
52 import urllib.request
53
54 # set up authentication info
55 authinfo = urllib.request.HTTPBasicAuthHandler()
56 authinfo.add_password(realm='PDQ Application',
57 uri='https://mahler:8092/site-updates.py',
58 user='klem',
59 passwd='geheim$parole')
60
61 proxy_support = urllib.request.ProxyHandler({"http" : "http://ahad-haam:3128"})
62
63 # build a new opener that adds authentication and caching FTP handlers
64 opener = urllib.request.build_opener(proxy_support, authinfo,
65 urllib.request.CacheFTPHandler)
66
67 # install it
68 urllib.request.install_opener(opener)
69
70 f = urllib.request.urlopen('http://www.python.org/')
71 """
72
73 # XXX issues:
74 # If an authentication error handler that tries to perform
75 # authentication for some reason but fails, how should the error be
76 # signalled? The client needs to know the HTTP error code. But if
77 # the handler knows that the problem was, e.g., that it didn't know
78 # that hash algo that requested in the challenge, it would be good to
79 # pass that information along to the client, too.
80 # ftp errors aren't handled cleanly
81 # check digest against correct (i.e. non-apache) implementation
82
83 # Possible extensions:
84 # complex proxies XXX not sure what exactly was meant by this
85 # abstract factory for opener
86
87 from __future__ import absolute_import, division, print_function, unicode_litera ls
88 from future.builtins import bytes, dict, filter, input, int, map, open, str
89 from future.utils import PY2, PY3, raise_with_traceback
90
91 import base64
92 import bisect
93 import hashlib
94 import array
95
96 from future.backports import email
97 from future.backports.http import client as http_client
98 from .error import URLError, HTTPError, ContentTooShortError
99 from .parse import (
100 urlparse, urlsplit, urljoin, unwrap, quote, unquote,
101 splittype, splithost, splitport, splituser, splitpasswd,
102 splitattr, splitquery, splitvalue, splittag, to_bytes, urlunparse)
103 from .response import addinfourl, addclosehook
104
105 import io
106 import os
107 import posixpath
108 import re
109 import socket
110 import sys
111 import time
112 import collections
113 import tempfile
114 import contextlib
115 import warnings
116
117 # check for SSL
118 try:
119 import ssl
120 # Not available in the SSL module in Py2:
121 from ssl import SSLContext
122 except ImportError:
123 _have_ssl = False
124 else:
125 _have_ssl = True
126
127 __all__ = [
128 # Classes
129 'Request', 'OpenerDirector', 'BaseHandler', 'HTTPDefaultErrorHandler',
130 'HTTPRedirectHandler', 'HTTPCookieProcessor', 'ProxyHandler',
131 'HTTPPasswordMgr', 'HTTPPasswordMgrWithDefaultRealm',
132 'AbstractBasicAuthHandler', 'HTTPBasicAuthHandler', 'ProxyBasicAuthHandler',
133 'AbstractDigestAuthHandler', 'HTTPDigestAuthHandler', 'ProxyDigestAuthHandle r',
134 'HTTPHandler', 'FileHandler', 'FTPHandler', 'CacheFTPHandler',
135 'UnknownHandler', 'HTTPErrorProcessor',
136 # Functions
137 'urlopen', 'install_opener', 'build_opener',
138 'pathname2url', 'url2pathname', 'getproxies',
139 # Legacy interface
140 'urlretrieve', 'urlcleanup', 'URLopener', 'FancyURLopener',
141 ]
142
143 # used in User-Agent header sent
144 __version__ = sys.version[:3]
145
146 _opener = None
147 def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, **_3to2kwarg s):
148 if 'cadefault' in _3to2kwargs: cadefault = _3to2kwargs['cadefault']; del _3t o2kwargs['cadefault']
149 else: cadefault = False
150 if 'capath' in _3to2kwargs: capath = _3to2kwargs['capath']; del _3to2kwargs[ 'capath']
151 else: capath = None
152 if 'cafile' in _3to2kwargs: cafile = _3to2kwargs['cafile']; del _3to2kwargs[ 'cafile']
153 else: cafile = None
154 global _opener
155 if cafile or capath or cadefault:
156 if not _have_ssl:
157 raise ValueError('SSL support not available')
158 context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
159 context.options |= ssl.OP_NO_SSLv2
160 context.verify_mode = ssl.CERT_REQUIRED
161 if cafile or capath:
162 context.load_verify_locations(cafile, capath)
163 else:
164 context.set_default_verify_paths()
165 https_handler = HTTPSHandler(context=context, check_hostname=True)
166 opener = build_opener(https_handler)
167 elif _opener is None:
168 _opener = opener = build_opener()
169 else:
170 opener = _opener
171 return opener.open(url, data, timeout)
172
173 def install_opener(opener):
174 global _opener
175 _opener = opener
176
177 _url_tempfiles = []
178 def urlretrieve(url, filename=None, reporthook=None, data=None):
179 """
180 Retrieve a URL into a temporary location on disk.
181
182 Requires a URL argument. If a filename is passed, it is used as
183 the temporary file location. The reporthook argument should be
184 a callable that accepts a block number, a read size, and the
185 total file size of the URL target. The data argument should be
186 valid URL encoded data.
187
188 If a filename is passed and the URL points to a local resource,
189 the result is a copy from local file to new file.
190
191 Returns a tuple containing the path to the newly created
192 data file as well as the resulting HTTPMessage object.
193 """
194 url_type, path = splittype(url)
195
196 with contextlib.closing(urlopen(url, data)) as fp:
197 headers = fp.info()
198
199 # Just return the local path and the "headers" for file://
200 # URLs. No sense in performing a copy unless requested.
201 if url_type == "file" and not filename:
202 return os.path.normpath(path), headers
203
204 # Handle temporary file setup.
205 if filename:
206 tfp = open(filename, 'wb')
207 else:
208 tfp = tempfile.NamedTemporaryFile(delete=False)
209 filename = tfp.name
210 _url_tempfiles.append(filename)
211
212 with tfp:
213 result = filename, headers
214 bs = 1024*8
215 size = -1
216 read = 0
217 blocknum = 0
218 if "content-length" in headers:
219 size = int(headers["Content-Length"])
220
221 if reporthook:
222 reporthook(blocknum, bs, size)
223
224 while True:
225 block = fp.read(bs)
226 if not block:
227 break
228 read += len(block)
229 tfp.write(block)
230 blocknum += 1
231 if reporthook:
232 reporthook(blocknum, bs, size)
233
234 if size >= 0 and read < size:
235 raise ContentTooShortError(
236 "retrieval incomplete: got only %i out of %i bytes"
237 % (read, size), result)
238
239 return result
240
241 def urlcleanup():
242 for temp_file in _url_tempfiles:
243 try:
244 os.unlink(temp_file)
245 except EnvironmentError:
246 pass
247
248 del _url_tempfiles[:]
249 global _opener
250 if _opener:
251 _opener = None
252
253 if PY3:
254 _cut_port_re = re.compile(r":\d+$", re.ASCII)
255 else:
256 _cut_port_re = re.compile(r":\d+$")
257
258 def request_host(request):
259
260 """Return request-host, as defined by RFC 2965.
261
262 Variation from RFC: returned value is lowercased, for convenient
263 comparison.
264
265 """
266 url = request.full_url
267 host = urlparse(url)[1]
268 if host == "":
269 host = request.get_header("Host", "")
270
271 # remove port, if present
272 host = _cut_port_re.sub("", host, 1)
273 return host.lower()
274
275 class Request(object):
276
277 def __init__(self, url, data=None, headers={},
278 origin_req_host=None, unverifiable=False,
279 method=None):
280 # unwrap('<URL:type://host/path>') --> 'type://host/path'
281 self.full_url = unwrap(url)
282 self.full_url, self.fragment = splittag(self.full_url)
283 self.data = data
284 self.headers = {}
285 self._tunnel_host = None
286 for key, value in headers.items():
287 self.add_header(key, value)
288 self.unredirected_hdrs = {}
289 if origin_req_host is None:
290 origin_req_host = request_host(self)
291 self.origin_req_host = origin_req_host
292 self.unverifiable = unverifiable
293 self.method = method
294 self._parse()
295
296 def _parse(self):
297 self.type, rest = splittype(self.full_url)
298 if self.type is None:
299 raise ValueError("unknown url type: %r" % self.full_url)
300 self.host, self.selector = splithost(rest)
301 if self.host:
302 self.host = unquote(self.host)
303
304 def get_method(self):
305 """Return a string indicating the HTTP request method."""
306 if self.method is not None:
307 return self.method
308 elif self.data is not None:
309 return "POST"
310 else:
311 return "GET"
312
313 def get_full_url(self):
314 if self.fragment:
315 return '%s#%s' % (self.full_url, self.fragment)
316 else:
317 return self.full_url
318
319 # Begin deprecated methods
320
321 def add_data(self, data):
322 msg = "Request.add_data method is deprecated."
323 warnings.warn(msg, DeprecationWarning, stacklevel=1)
324 self.data = data
325
326 def has_data(self):
327 msg = "Request.has_data method is deprecated."
328 warnings.warn(msg, DeprecationWarning, stacklevel=1)
329 return self.data is not None
330
331 def get_data(self):
332 msg = "Request.get_data method is deprecated."
333 warnings.warn(msg, DeprecationWarning, stacklevel=1)
334 return self.data
335
336 def get_type(self):
337 msg = "Request.get_type method is deprecated."
338 warnings.warn(msg, DeprecationWarning, stacklevel=1)
339 return self.type
340
341 def get_host(self):
342 msg = "Request.get_host method is deprecated."
343 warnings.warn(msg, DeprecationWarning, stacklevel=1)
344 return self.host
345
346 def get_selector(self):
347 msg = "Request.get_selector method is deprecated."
348 warnings.warn(msg, DeprecationWarning, stacklevel=1)
349 return self.selector
350
351 def is_unverifiable(self):
352 msg = "Request.is_unverifiable method is deprecated."
353 warnings.warn(msg, DeprecationWarning, stacklevel=1)
354 return self.unverifiable
355
356 def get_origin_req_host(self):
357 msg = "Request.get_origin_req_host method is deprecated."
358 warnings.warn(msg, DeprecationWarning, stacklevel=1)
359 return self.origin_req_host
360
361 # End deprecated methods
362
363 def set_proxy(self, host, type):
364 if self.type == 'https' and not self._tunnel_host:
365 self._tunnel_host = self.host
366 else:
367 self.type= type
368 self.selector = self.full_url
369 self.host = host
370
371 def has_proxy(self):
372 return self.selector == self.full_url
373
374 def add_header(self, key, val):
375 # useful for something like authentication
376 self.headers[key.capitalize()] = val
377
378 def add_unredirected_header(self, key, val):
379 # will not be added to a redirected request
380 self.unredirected_hdrs[key.capitalize()] = val
381
382 def has_header(self, header_name):
383 return (header_name in self.headers or
384 header_name in self.unredirected_hdrs)
385
386 def get_header(self, header_name, default=None):
387 return self.headers.get(
388 header_name,
389 self.unredirected_hdrs.get(header_name, default))
390
391 def header_items(self):
392 hdrs = self.unredirected_hdrs.copy()
393 hdrs.update(self.headers)
394 return list(hdrs.items())
395
396 class OpenerDirector(object):
397 def __init__(self):
398 client_version = "Python-urllib/%s" % __version__
399 self.addheaders = [('User-agent', client_version)]
400 # self.handlers is retained only for backward compatibility
401 self.handlers = []
402 # manage the individual handlers
403 self.handle_open = {}
404 self.handle_error = {}
405 self.process_response = {}
406 self.process_request = {}
407
408 def add_handler(self, handler):
409 if not hasattr(handler, "add_parent"):
410 raise TypeError("expected BaseHandler instance, got %r" %
411 type(handler))
412
413 added = False
414 for meth in dir(handler):
415 if meth in ["redirect_request", "do_open", "proxy_open"]:
416 # oops, coincidental match
417 continue
418
419 i = meth.find("_")
420 protocol = meth[:i]
421 condition = meth[i+1:]
422
423 if condition.startswith("error"):
424 j = condition.find("_") + i + 1
425 kind = meth[j+1:]
426 try:
427 kind = int(kind)
428 except ValueError:
429 pass
430 lookup = self.handle_error.get(protocol, {})
431 self.handle_error[protocol] = lookup
432 elif condition == "open":
433 kind = protocol
434 lookup = self.handle_open
435 elif condition == "response":
436 kind = protocol
437 lookup = self.process_response
438 elif condition == "request":
439 kind = protocol
440 lookup = self.process_request
441 else:
442 continue
443
444 handlers = lookup.setdefault(kind, [])
445 if handlers:
446 bisect.insort(handlers, handler)
447 else:
448 handlers.append(handler)
449 added = True
450
451 if added:
452 bisect.insort(self.handlers, handler)
453 handler.add_parent(self)
454
455 def close(self):
456 # Only exists for backwards compatibility.
457 pass
458
459 def _call_chain(self, chain, kind, meth_name, *args):
460 # Handlers raise an exception if no one else should try to handle
461 # the request, or return None if they can't but another handler
462 # could. Otherwise, they return the response.
463 handlers = chain.get(kind, ())
464 for handler in handlers:
465 func = getattr(handler, meth_name)
466 result = func(*args)
467 if result is not None:
468 return result
469
470 def open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
471 """
472 Accept a URL or a Request object
473
474 Python-Future: if the URL is passed as a byte-string, decode it first.
475 """
476 if isinstance(fullurl, bytes):
477 fullurl = fullurl.decode()
478 if isinstance(fullurl, str):
479 req = Request(fullurl, data)
480 else:
481 req = fullurl
482 if data is not None:
483 req.data = data
484
485 req.timeout = timeout
486 protocol = req.type
487
488 # pre-process request
489 meth_name = protocol+"_request"
490 for processor in self.process_request.get(protocol, []):
491 meth = getattr(processor, meth_name)
492 req = meth(req)
493
494 response = self._open(req, data)
495
496 # post-process response
497 meth_name = protocol+"_response"
498 for processor in self.process_response.get(protocol, []):
499 meth = getattr(processor, meth_name)
500 response = meth(req, response)
501
502 return response
503
504 def _open(self, req, data=None):
505 result = self._call_chain(self.handle_open, 'default',
506 'default_open', req)
507 if result:
508 return result
509
510 protocol = req.type
511 result = self._call_chain(self.handle_open, protocol, protocol +
512 '_open', req)
513 if result:
514 return result
515
516 return self._call_chain(self.handle_open, 'unknown',
517 'unknown_open', req)
518
519 def error(self, proto, *args):
520 if proto in ('http', 'https'):
521 # XXX http[s] protocols are special-cased
522 dict = self.handle_error['http'] # https is not different than http
523 proto = args[2] # YUCK!
524 meth_name = 'http_error_%s' % proto
525 http_err = 1
526 orig_args = args
527 else:
528 dict = self.handle_error
529 meth_name = proto + '_error'
530 http_err = 0
531 args = (dict, proto, meth_name) + args
532 result = self._call_chain(*args)
533 if result:
534 return result
535
536 if http_err:
537 args = (dict, 'default', 'http_error_default') + orig_args
538 return self._call_chain(*args)
539
540 # XXX probably also want an abstract factory that knows when it makes
541 # sense to skip a superclass in favor of a subclass and when it might
542 # make sense to include both
543
544 def build_opener(*handlers):
545 """Create an opener object from a list of handlers.
546
547 The opener will use several default handlers, including support
548 for HTTP, FTP and when applicable HTTPS.
549
550 If any of the handlers passed as arguments are subclasses of the
551 default handlers, the default handlers will not be used.
552 """
553 def isclass(obj):
554 return isinstance(obj, type) or hasattr(obj, "__bases__")
555
556 opener = OpenerDirector()
557 default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
558 HTTPDefaultErrorHandler, HTTPRedirectHandler,
559 FTPHandler, FileHandler, HTTPErrorProcessor]
560 if hasattr(http_client, "HTTPSConnection"):
561 default_classes.append(HTTPSHandler)
562 skip = set()
563 for klass in default_classes:
564 for check in handlers:
565 if isclass(check):
566 if issubclass(check, klass):
567 skip.add(klass)
568 elif isinstance(check, klass):
569 skip.add(klass)
570 for klass in skip:
571 default_classes.remove(klass)
572
573 for klass in default_classes:
574 opener.add_handler(klass())
575
576 for h in handlers:
577 if isclass(h):
578 h = h()
579 opener.add_handler(h)
580 return opener
581
582 class BaseHandler(object):
583 handler_order = 500
584
585 def add_parent(self, parent):
586 self.parent = parent
587
588 def close(self):
589 # Only exists for backwards compatibility
590 pass
591
592 def __lt__(self, other):
593 if not hasattr(other, "handler_order"):
594 # Try to preserve the old behavior of having custom classes
595 # inserted after default ones (works only for custom user
596 # classes which are not aware of handler_order).
597 return True
598 return self.handler_order < other.handler_order
599
600
601 class HTTPErrorProcessor(BaseHandler):
602 """Process HTTP error responses."""
603 handler_order = 1000 # after all other processing
604
605 def http_response(self, request, response):
606 code, msg, hdrs = response.code, response.msg, response.info()
607
608 # According to RFC 2616, "2xx" code indicates that the client's
609 # request was successfully received, understood, and accepted.
610 if not (200 <= code < 300):
611 response = self.parent.error(
612 'http', request, response, code, msg, hdrs)
613
614 return response
615
616 https_response = http_response
617
618 class HTTPDefaultErrorHandler(BaseHandler):
619 def http_error_default(self, req, fp, code, msg, hdrs):
620 raise HTTPError(req.full_url, code, msg, hdrs, fp)
621
622 class HTTPRedirectHandler(BaseHandler):
623 # maximum number of redirections to any single URL
624 # this is needed because of the state that cookies introduce
625 max_repeats = 4
626 # maximum total number of redirections (regardless of URL) before
627 # assuming we're in a loop
628 max_redirections = 10
629
630 def redirect_request(self, req, fp, code, msg, headers, newurl):
631 """Return a Request or None in response to a redirect.
632
633 This is called by the http_error_30x methods when a
634 redirection response is received. If a redirection should
635 take place, return a new Request to allow http_error_30x to
636 perform the redirect. Otherwise, raise HTTPError if no-one
637 else should try to handle this url. Return None if you can't
638 but another Handler might.
639 """
640 m = req.get_method()
641 if (not (code in (301, 302, 303, 307) and m in ("GET", "HEAD")
642 or code in (301, 302, 303) and m == "POST")):
643 raise HTTPError(req.full_url, code, msg, headers, fp)
644
645 # Strictly (according to RFC 2616), 301 or 302 in response to
646 # a POST MUST NOT cause a redirection without confirmation
647 # from the user (of urllib.request, in this case). In practice,
648 # essentially all clients do redirect in this case, so we do
649 # the same.
650 # be conciliant with URIs containing a space
651 newurl = newurl.replace(' ', '%20')
652 CONTENT_HEADERS = ("content-length", "content-type")
653 newheaders = dict((k, v) for k, v in req.headers.items()
654 if k.lower() not in CONTENT_HEADERS)
655 return Request(newurl,
656 headers=newheaders,
657 origin_req_host=req.origin_req_host,
658 unverifiable=True)
659
660 # Implementation note: To avoid the server sending us into an
661 # infinite loop, the request object needs to track what URLs we
662 # have already seen. Do this by adding a handler-specific
663 # attribute to the Request object.
664 def http_error_302(self, req, fp, code, msg, headers):
665 # Some servers (incorrectly) return multiple Location headers
666 # (so probably same goes for URI). Use first header.
667 if "location" in headers:
668 newurl = headers["location"]
669 elif "uri" in headers:
670 newurl = headers["uri"]
671 else:
672 return
673
674 # fix a possible malformed URL
675 urlparts = urlparse(newurl)
676
677 # For security reasons we don't allow redirection to anything other
678 # than http, https or ftp.
679
680 if urlparts.scheme not in ('http', 'https', 'ftp', ''):
681 raise HTTPError(
682 newurl, code,
683 "%s - Redirection to url '%s' is not allowed" % (msg, newurl),
684 headers, fp)
685
686 if not urlparts.path:
687 urlparts = list(urlparts)
688 urlparts[2] = "/"
689 newurl = urlunparse(urlparts)
690
691 newurl = urljoin(req.full_url, newurl)
692
693 # XXX Probably want to forget about the state of the current
694 # request, although that might interact poorly with other
695 # handlers that also use handler-specific request attributes
696 new = self.redirect_request(req, fp, code, msg, headers, newurl)
697 if new is None:
698 return
699
700 # loop detection
701 # .redirect_dict has a key url if url was previously visited.
702 if hasattr(req, 'redirect_dict'):
703 visited = new.redirect_dict = req.redirect_dict
704 if (visited.get(newurl, 0) >= self.max_repeats or
705 len(visited) >= self.max_redirections):
706 raise HTTPError(req.full_url, code,
707 self.inf_msg + msg, headers, fp)
708 else:
709 visited = new.redirect_dict = req.redirect_dict = {}
710 visited[newurl] = visited.get(newurl, 0) + 1
711
712 # Don't close the fp until we are sure that we won't use it
713 # with HTTPError.
714 fp.read()
715 fp.close()
716
717 return self.parent.open(new, timeout=req.timeout)
718
719 http_error_301 = http_error_303 = http_error_307 = http_error_302
720
721 inf_msg = "The HTTP server returned a redirect error that would " \
722 "lead to an infinite loop.\n" \
723 "The last 30x error message was:\n"
724
725
726 def _parse_proxy(proxy):
727 """Return (scheme, user, password, host/port) given a URL or an authority.
728
729 If a URL is supplied, it must have an authority (host:port) component.
730 According to RFC 3986, having an authority component means the URL must
731 have two slashes after the scheme:
732
733 >>> _parse_proxy('file:/ftp.example.com/')
734 Traceback (most recent call last):
735 ValueError: proxy URL with no authority: 'file:/ftp.example.com/'
736
737 The first three items of the returned tuple may be None.
738
739 Examples of authority parsing:
740
741 >>> _parse_proxy('proxy.example.com')
742 (None, None, None, 'proxy.example.com')
743 >>> _parse_proxy('proxy.example.com:3128')
744 (None, None, None, 'proxy.example.com:3128')
745
746 The authority component may optionally include userinfo (assumed to be
747 username:password):
748
749 >>> _parse_proxy('joe:password@proxy.example.com')
750 (None, 'joe', 'password', 'proxy.example.com')
751 >>> _parse_proxy('joe:password@proxy.example.com:3128')
752 (None, 'joe', 'password', 'proxy.example.com:3128')
753
754 Same examples, but with URLs instead:
755
756 >>> _parse_proxy('http://proxy.example.com/')
757 ('http', None, None, 'proxy.example.com')
758 >>> _parse_proxy('http://proxy.example.com:3128/')
759 ('http', None, None, 'proxy.example.com:3128')
760 >>> _parse_proxy('http://joe:password@proxy.example.com/')
761 ('http', 'joe', 'password', 'proxy.example.com')
762 >>> _parse_proxy('http://joe:password@proxy.example.com:3128')
763 ('http', 'joe', 'password', 'proxy.example.com:3128')
764
765 Everything after the authority is ignored:
766
767 >>> _parse_proxy('ftp://joe:password@proxy.example.com/rubbish:3128')
768 ('ftp', 'joe', 'password', 'proxy.example.com')
769
770 Test for no trailing '/' case:
771
772 >>> _parse_proxy('http://joe:password@proxy.example.com')
773 ('http', 'joe', 'password', 'proxy.example.com')
774
775 """
776 scheme, r_scheme = splittype(proxy)
777 if not r_scheme.startswith("/"):
778 # authority
779 scheme = None
780 authority = proxy
781 else:
782 # URL
783 if not r_scheme.startswith("//"):
784 raise ValueError("proxy URL with no authority: %r" % proxy)
785 # We have an authority, so for RFC 3986-compliant URLs (by ss 3.
786 # and 3.3.), path is empty or starts with '/'
787 end = r_scheme.find("/", 2)
788 if end == -1:
789 end = None
790 authority = r_scheme[2:end]
791 userinfo, hostport = splituser(authority)
792 if userinfo is not None:
793 user, password = splitpasswd(userinfo)
794 else:
795 user = password = None
796 return scheme, user, password, hostport
797
798 class ProxyHandler(BaseHandler):
799 # Proxies must be in front
800 handler_order = 100
801
802 def __init__(self, proxies=None):
803 if proxies is None:
804 proxies = getproxies()
805 assert hasattr(proxies, 'keys'), "proxies must be a mapping"
806 self.proxies = proxies
807 for type, url in proxies.items():
808 setattr(self, '%s_open' % type,
809 lambda r, proxy=url, type=type, meth=self.proxy_open:
810 meth(r, proxy, type))
811
812 def proxy_open(self, req, proxy, type):
813 orig_type = req.type
814 proxy_type, user, password, hostport = _parse_proxy(proxy)
815 if proxy_type is None:
816 proxy_type = orig_type
817
818 if req.host and proxy_bypass(req.host):
819 return None
820
821 if user and password:
822 user_pass = '%s:%s' % (unquote(user),
823 unquote(password))
824 creds = base64.b64encode(user_pass.encode()).decode("ascii")
825 req.add_header('Proxy-authorization', 'Basic ' + creds)
826 hostport = unquote(hostport)
827 req.set_proxy(hostport, proxy_type)
828 if orig_type == proxy_type or orig_type == 'https':
829 # let other handlers take care of it
830 return None
831 else:
832 # need to start over, because the other handlers don't
833 # grok the proxy's URL type
834 # e.g. if we have a constructor arg proxies like so:
835 # {'http': 'ftp://proxy.example.com'}, we may end up turning
836 # a request for http://acme.example.com/a into one for
837 # ftp://proxy.example.com/a
838 return self.parent.open(req, timeout=req.timeout)
839
840 class HTTPPasswordMgr(object):
841
842 def __init__(self):
843 self.passwd = {}
844
845 def add_password(self, realm, uri, user, passwd):
846 # uri could be a single URI or a sequence
847 if isinstance(uri, str):
848 uri = [uri]
849 if realm not in self.passwd:
850 self.passwd[realm] = {}
851 for default_port in True, False:
852 reduced_uri = tuple(
853 [self.reduce_uri(u, default_port) for u in uri])
854 self.passwd[realm][reduced_uri] = (user, passwd)
855
856 def find_user_password(self, realm, authuri):
857 domains = self.passwd.get(realm, {})
858 for default_port in True, False:
859 reduced_authuri = self.reduce_uri(authuri, default_port)
860 for uris, authinfo in domains.items():
861 for uri in uris:
862 if self.is_suburi(uri, reduced_authuri):
863 return authinfo
864 return None, None
865
866 def reduce_uri(self, uri, default_port=True):
867 """Accept authority or URI and extract only the authority and path."""
868 # note HTTP URLs do not have a userinfo component
869 parts = urlsplit(uri)
870 if parts[1]:
871 # URI
872 scheme = parts[0]
873 authority = parts[1]
874 path = parts[2] or '/'
875 else:
876 # host or host:port
877 scheme = None
878 authority = uri
879 path = '/'
880 host, port = splitport(authority)
881 if default_port and port is None and scheme is not None:
882 dport = {"http": 80,
883 "https": 443,
884 }.get(scheme)
885 if dport is not None:
886 authority = "%s:%d" % (host, dport)
887 return authority, path
888
889 def is_suburi(self, base, test):
890 """Check if test is below base in a URI tree
891
892 Both args must be URIs in reduced form.
893 """
894 if base == test:
895 return True
896 if base[0] != test[0]:
897 return False
898 common = posixpath.commonprefix((base[1], test[1]))
899 if len(common) == len(base[1]):
900 return True
901 return False
902
903
904 class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):
905
906 def find_user_password(self, realm, authuri):
907 user, password = HTTPPasswordMgr.find_user_password(self, realm,
908 authuri)
909 if user is not None:
910 return user, password
911 return HTTPPasswordMgr.find_user_password(self, None, authuri)
912
913
914 class AbstractBasicAuthHandler(object):
915
916 # XXX this allows for multiple auth-schemes, but will stupidly pick
917 # the last one with a realm specified.
918
919 # allow for double- and single-quoted realm values
920 # (single quotes are a violation of the RFC, but appear in the wild)
921 rx = re.compile('(?:.*,)*[ \t]*([^ \t]+)[ \t]+'
922 'realm=(["\']?)([^"\']*)\\2', re.I)
923
924 # XXX could pre-emptively send auth info already accepted (RFC 2617,
925 # end of section 2, and section 1.2 immediately after "credentials"
926 # production).
927
928 def __init__(self, password_mgr=None):
929 if password_mgr is None:
930 password_mgr = HTTPPasswordMgr()
931 self.passwd = password_mgr
932 self.add_password = self.passwd.add_password
933 self.retried = 0
934
935 def reset_retry_count(self):
936 self.retried = 0
937
938 def http_error_auth_reqed(self, authreq, host, req, headers):
939 # host may be an authority (without userinfo) or a URL with an
940 # authority
941 # XXX could be multiple headers
942 authreq = headers.get(authreq, None)
943
944 if self.retried > 5:
945 # retry sending the username:password 5 times before failing.
946 raise HTTPError(req.get_full_url(), 401, "basic auth failed",
947 headers, None)
948 else:
949 self.retried += 1
950
951 if authreq:
952 scheme = authreq.split()[0]
953 if scheme.lower() != 'basic':
954 raise ValueError("AbstractBasicAuthHandler does not"
955 " support the following scheme: '%s'" %
956 scheme)
957 else:
958 mo = AbstractBasicAuthHandler.rx.search(authreq)
959 if mo:
960 scheme, quote, realm = mo.groups()
961 if quote not in ['"',"'"]:
962 warnings.warn("Basic Auth Realm was unquoted",
963 UserWarning, 2)
964 if scheme.lower() == 'basic':
965 response = self.retry_http_basic_auth(host, req, realm)
966 if response and response.code != 401:
967 self.retried = 0
968 return response
969
970 def retry_http_basic_auth(self, host, req, realm):
971 user, pw = self.passwd.find_user_password(realm, host)
972 if pw is not None:
973 raw = "%s:%s" % (user, pw)
974 auth = "Basic " + base64.b64encode(raw.encode()).decode("ascii")
975 if req.headers.get(self.auth_header, None) == auth:
976 return None
977 req.add_unredirected_header(self.auth_header, auth)
978 return self.parent.open(req, timeout=req.timeout)
979 else:
980 return None
981
982
983 class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
984
985 auth_header = 'Authorization'
986
987 def http_error_401(self, req, fp, code, msg, headers):
988 url = req.full_url
989 response = self.http_error_auth_reqed('www-authenticate',
990 url, req, headers)
991 self.reset_retry_count()
992 return response
993
994
995 class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
996
997 auth_header = 'Proxy-authorization'
998
999 def http_error_407(self, req, fp, code, msg, headers):
1000 # http_error_auth_reqed requires that there is no userinfo component in
1001 # authority. Assume there isn't one, since urllib.request does not (and
1002 # should not, RFC 3986 s. 3.2.1) support requests for URLs containing
1003 # userinfo.
1004 authority = req.host
1005 response = self.http_error_auth_reqed('proxy-authenticate',
1006 authority, req, headers)
1007 self.reset_retry_count()
1008 return response
1009
1010
1011 # Return n random bytes.
1012 _randombytes = os.urandom
1013
1014
1015 class AbstractDigestAuthHandler(object):
1016 # Digest authentication is specified in RFC 2617.
1017
1018 # XXX The client does not inspect the Authentication-Info header
1019 # in a successful response.
1020
1021 # XXX It should be possible to test this implementation against
1022 # a mock server that just generates a static set of challenges.
1023
1024 # XXX qop="auth-int" supports is shaky
1025
1026 def __init__(self, passwd=None):
1027 if passwd is None:
1028 passwd = HTTPPasswordMgr()
1029 self.passwd = passwd
1030 self.add_password = self.passwd.add_password
1031 self.retried = 0
1032 self.nonce_count = 0
1033 self.last_nonce = None
1034
1035 def reset_retry_count(self):
1036 self.retried = 0
1037
1038 def http_error_auth_reqed(self, auth_header, host, req, headers):
1039 authreq = headers.get(auth_header, None)
1040 if self.retried > 5:
1041 # Don't fail endlessly - if we failed once, we'll probably
1042 # fail a second time. Hm. Unless the Password Manager is
1043 # prompting for the information. Crap. This isn't great
1044 # but it's better than the current 'repeat until recursion
1045 # depth exceeded' approach <wink>
1046 raise HTTPError(req.full_url, 401, "digest auth failed",
1047 headers, None)
1048 else:
1049 self.retried += 1
1050 if authreq:
1051 scheme = authreq.split()[0]
1052 if scheme.lower() == 'digest':
1053 return self.retry_http_digest_auth(req, authreq)
1054 elif scheme.lower() != 'basic':
1055 raise ValueError("AbstractDigestAuthHandler does not support"
1056 " the following scheme: '%s'" % scheme)
1057
1058 def retry_http_digest_auth(self, req, auth):
1059 token, challenge = auth.split(' ', 1)
1060 chal = parse_keqv_list(filter(None, parse_http_list(challenge)))
1061 auth = self.get_authorization(req, chal)
1062 if auth:
1063 auth_val = 'Digest %s' % auth
1064 if req.headers.get(self.auth_header, None) == auth_val:
1065 return None
1066 req.add_unredirected_header(self.auth_header, auth_val)
1067 resp = self.parent.open(req, timeout=req.timeout)
1068 return resp
1069
1070 def get_cnonce(self, nonce):
1071 # The cnonce-value is an opaque
1072 # quoted string value provided by the client and used by both client
1073 # and server to avoid chosen plaintext attacks, to provide mutual
1074 # authentication, and to provide some message integrity protection.
1075 # This isn't a fabulous effort, but it's probably Good Enough.
1076 s = "%s:%s:%s:" % (self.nonce_count, nonce, time.ctime())
1077 b = s.encode("ascii") + _randombytes(8)
1078 dig = hashlib.sha1(b).hexdigest()
1079 return dig[:16]
1080
1081 def get_authorization(self, req, chal):
1082 try:
1083 realm = chal['realm']
1084 nonce = chal['nonce']
1085 qop = chal.get('qop')
1086 algorithm = chal.get('algorithm', 'MD5')
1087 # mod_digest doesn't send an opaque, even though it isn't
1088 # supposed to be optional
1089 opaque = chal.get('opaque', None)
1090 except KeyError:
1091 return None
1092
1093 H, KD = self.get_algorithm_impls(algorithm)
1094 if H is None:
1095 return None
1096
1097 user, pw = self.passwd.find_user_password(realm, req.full_url)
1098 if user is None:
1099 return None
1100
1101 # XXX not implemented yet
1102 if req.data is not None:
1103 entdig = self.get_entity_digest(req.data, chal)
1104 else:
1105 entdig = None
1106
1107 A1 = "%s:%s:%s" % (user, realm, pw)
1108 A2 = "%s:%s" % (req.get_method(),
1109 # XXX selector: what about proxies and full urls
1110 req.selector)
1111 if qop == 'auth':
1112 if nonce == self.last_nonce:
1113 self.nonce_count += 1
1114 else:
1115 self.nonce_count = 1
1116 self.last_nonce = nonce
1117 ncvalue = '%08x' % self.nonce_count
1118 cnonce = self.get_cnonce(nonce)
1119 noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, H(A2))
1120 respdig = KD(H(A1), noncebit)
1121 elif qop is None:
1122 respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
1123 else:
1124 # XXX handle auth-int.
1125 raise URLError("qop '%s' is not supported." % qop)
1126
1127 # XXX should the partial digests be encoded too?
1128
1129 base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
1130 'response="%s"' % (user, realm, nonce, req.selector,
1131 respdig)
1132 if opaque:
1133 base += ', opaque="%s"' % opaque
1134 if entdig:
1135 base += ', digest="%s"' % entdig
1136 base += ', algorithm="%s"' % algorithm
1137 if qop:
1138 base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce)
1139 return base
1140
1141 def get_algorithm_impls(self, algorithm):
1142 # lambdas assume digest modules are imported at the top level
1143 if algorithm == 'MD5':
1144 H = lambda x: hashlib.md5(x.encode("ascii")).hexdigest()
1145 elif algorithm == 'SHA':
1146 H = lambda x: hashlib.sha1(x.encode("ascii")).hexdigest()
1147 # XXX MD5-sess
1148 KD = lambda s, d: H("%s:%s" % (s, d))
1149 return H, KD
1150
1151 def get_entity_digest(self, data, chal):
1152 # XXX not implemented yet
1153 return None
1154
1155
1156 class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
1157 """An authentication protocol defined by RFC 2069
1158
1159 Digest authentication improves on basic authentication because it
1160 does not transmit passwords in the clear.
1161 """
1162
1163 auth_header = 'Authorization'
1164 handler_order = 490 # before Basic auth
1165
1166 def http_error_401(self, req, fp, code, msg, headers):
1167 host = urlparse(req.full_url)[1]
1168 retry = self.http_error_auth_reqed('www-authenticate',
1169 host, req, headers)
1170 self.reset_retry_count()
1171 return retry
1172
1173
1174 class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
1175
1176 auth_header = 'Proxy-Authorization'
1177 handler_order = 490 # before Basic auth
1178
1179 def http_error_407(self, req, fp, code, msg, headers):
1180 host = req.host
1181 retry = self.http_error_auth_reqed('proxy-authenticate',
1182 host, req, headers)
1183 self.reset_retry_count()
1184 return retry
1185
1186 class AbstractHTTPHandler(BaseHandler):
1187
1188 def __init__(self, debuglevel=0):
1189 self._debuglevel = debuglevel
1190
1191 def set_http_debuglevel(self, level):
1192 self._debuglevel = level
1193
1194 def do_request_(self, request):
1195 host = request.host
1196 if not host:
1197 raise URLError('no host given')
1198
1199 if request.data is not None: # POST
1200 data = request.data
1201 if isinstance(data, str):
1202 msg = "POST data should be bytes or an iterable of bytes. " \
1203 "It cannot be of type str."
1204 raise TypeError(msg)
1205 if not request.has_header('Content-type'):
1206 request.add_unredirected_header(
1207 'Content-type',
1208 'application/x-www-form-urlencoded')
1209 if not request.has_header('Content-length'):
1210 size = None
1211 try:
1212 ### For Python-Future:
1213 if PY2 and isinstance(data, array.array):
1214 # memoryviews of arrays aren't supported
1215 # in Py2.7. (e.g. memoryview(array.array('I',
1216 # [1, 2, 3, 4])) raises a TypeError.)
1217 # So we calculate the size manually instead:
1218 size = len(data) * data.itemsize
1219 ###
1220 else:
1221 mv = memoryview(data)
1222 size = len(mv) * mv.itemsize
1223 except TypeError:
1224 if isinstance(data, collections.Iterable):
1225 raise ValueError("Content-Length should be specified "
1226 "for iterable data of type %r %r" % (type(data),
1227 data))
1228 else:
1229 request.add_unredirected_header(
1230 'Content-length', '%d' % size)
1231
1232 sel_host = host
1233 if request.has_proxy():
1234 scheme, sel = splittype(request.selector)
1235 sel_host, sel_path = splithost(sel)
1236 if not request.has_header('Host'):
1237 request.add_unredirected_header('Host', sel_host)
1238 for name, value in self.parent.addheaders:
1239 name = name.capitalize()
1240 if not request.has_header(name):
1241 request.add_unredirected_header(name, value)
1242
1243 return request
1244
1245 def do_open(self, http_class, req, **http_conn_args):
1246 """Return an HTTPResponse object for the request, using http_class.
1247
1248 http_class must implement the HTTPConnection API from http.client.
1249 """
1250 host = req.host
1251 if not host:
1252 raise URLError('no host given')
1253
1254 # will parse host:port
1255 h = http_class(host, timeout=req.timeout, **http_conn_args)
1256
1257 headers = dict(req.unredirected_hdrs)
1258 headers.update(dict((k, v) for k, v in req.headers.items()
1259 if k not in headers))
1260
1261 # TODO(jhylton): Should this be redesigned to handle
1262 # persistent connections?
1263
1264 # We want to make an HTTP/1.1 request, but the addinfourl
1265 # class isn't prepared to deal with a persistent connection.
1266 # It will try to read all remaining data from the socket,
1267 # which will block while the server waits for the next request.
1268 # So make sure the connection gets closed after the (only)
1269 # request.
1270 headers["Connection"] = "close"
1271 headers = dict((name.title(), val) for name, val in headers.items())
1272
1273 if req._tunnel_host:
1274 tunnel_headers = {}
1275 proxy_auth_hdr = "Proxy-Authorization"
1276 if proxy_auth_hdr in headers:
1277 tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
1278 # Proxy-Authorization should not be sent to origin
1279 # server.
1280 del headers[proxy_auth_hdr]
1281 h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
1282
1283 try:
1284 h.request(req.get_method(), req.selector, req.data, headers)
1285 except socket.error as err: # timeout error
1286 h.close()
1287 raise URLError(err)
1288 else:
1289 r = h.getresponse()
1290 # If the server does not send us a 'Connection: close' header,
1291 # HTTPConnection assumes the socket should be left open. Manually
1292 # mark the socket to be closed when this response object goes away.
1293 if h.sock:
1294 h.sock.close()
1295 h.sock = None
1296
1297
1298 r.url = req.get_full_url()
1299 # This line replaces the .msg attribute of the HTTPResponse
1300 # with .headers, because urllib clients expect the response to
1301 # have the reason in .msg. It would be good to mark this
1302 # attribute is deprecated and get then to use info() or
1303 # .headers.
1304 r.msg = r.reason
1305 return r
1306
1307
1308 class HTTPHandler(AbstractHTTPHandler):
1309
1310 def http_open(self, req):
1311 return self.do_open(http_client.HTTPConnection, req)
1312
1313 http_request = AbstractHTTPHandler.do_request_
1314
1315 if hasattr(http_client, 'HTTPSConnection'):
1316
1317 class HTTPSHandler(AbstractHTTPHandler):
1318
1319 def __init__(self, debuglevel=0, context=None, check_hostname=None):
1320 AbstractHTTPHandler.__init__(self, debuglevel)
1321 self._context = context
1322 self._check_hostname = check_hostname
1323
1324 def https_open(self, req):
1325 return self.do_open(http_client.HTTPSConnection, req,
1326 context=self._context, check_hostname=self._check_hostname)
1327
1328 https_request = AbstractHTTPHandler.do_request_
1329
1330 __all__.append('HTTPSHandler')
1331
1332 class HTTPCookieProcessor(BaseHandler):
1333 def __init__(self, cookiejar=None):
1334 import future.backports.http.cookiejar as http_cookiejar
1335 if cookiejar is None:
1336 cookiejar = http_cookiejar.CookieJar()
1337 self.cookiejar = cookiejar
1338
1339 def http_request(self, request):
1340 self.cookiejar.add_cookie_header(request)
1341 return request
1342
1343 def http_response(self, request, response):
1344 self.cookiejar.extract_cookies(response, request)
1345 return response
1346
1347 https_request = http_request
1348 https_response = http_response
1349
1350 class UnknownHandler(BaseHandler):
1351 def unknown_open(self, req):
1352 type = req.type
1353 raise URLError('unknown url type: %s' % type)
1354
1355 def parse_keqv_list(l):
1356 """Parse list of key=value strings where keys are not duplicated."""
1357 parsed = {}
1358 for elt in l:
1359 k, v = elt.split('=', 1)
1360 if v[0] == '"' and v[-1] == '"':
1361 v = v[1:-1]
1362 parsed[k] = v
1363 return parsed
1364
1365 def parse_http_list(s):
1366 """Parse lists as described by RFC 2068 Section 2.
1367
1368 In particular, parse comma-separated lists where the elements of
1369 the list may include quoted-strings. A quoted-string could
1370 contain a comma. A non-quoted string could have quotes in the
1371 middle. Neither commas nor quotes count if they are escaped.
1372 Only double-quotes count, not single-quotes.
1373 """
1374 res = []
1375 part = ''
1376
1377 escape = quote = False
1378 for cur in s:
1379 if escape:
1380 part += cur
1381 escape = False
1382 continue
1383 if quote:
1384 if cur == '\\':
1385 escape = True
1386 continue
1387 elif cur == '"':
1388 quote = False
1389 part += cur
1390 continue
1391
1392 if cur == ',':
1393 res.append(part)
1394 part = ''
1395 continue
1396
1397 if cur == '"':
1398 quote = True
1399
1400 part += cur
1401
1402 # append last part
1403 if part:
1404 res.append(part)
1405
1406 return [part.strip() for part in res]
1407
1408 class FileHandler(BaseHandler):
1409 # Use local file or FTP depending on form of URL
1410 def file_open(self, req):
1411 url = req.selector
1412 if url[:2] == '//' and url[2:3] != '/' and (req.host and
1413 req.host != 'localhost'):
1414 if not req.host is self.get_names():
1415 raise URLError("file:// scheme is supported only on localhost")
1416 else:
1417 return self.open_local_file(req)
1418
1419 # names for the localhost
1420 names = None
1421 def get_names(self):
1422 if FileHandler.names is None:
1423 try:
1424 FileHandler.names = tuple(
1425 socket.gethostbyname_ex('localhost')[2] +
1426 socket.gethostbyname_ex(socket.gethostname())[2])
1427 except socket.gaierror:
1428 FileHandler.names = (socket.gethostbyname('localhost'),)
1429 return FileHandler.names
1430
1431 # not entirely sure what the rules are here
1432 def open_local_file(self, req):
1433 import future.backports.email.utils as email_utils
1434 import mimetypes
1435 host = req.host
1436 filename = req.selector
1437 localfile = url2pathname(filename)
1438 try:
1439 stats = os.stat(localfile)
1440 size = stats.st_size
1441 modified = email_utils.formatdate(stats.st_mtime, usegmt=True)
1442 mtype = mimetypes.guess_type(filename)[0]
1443 headers = email.message_from_string(
1444 'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' %
1445 (mtype or 'text/plain', size, modified))
1446 if host:
1447 host, port = splitport(host)
1448 if not host or \
1449 (not port and _safe_gethostbyname(host) in self.get_names()):
1450 if host:
1451 origurl = 'file://' + host + filename
1452 else:
1453 origurl = 'file://' + filename
1454 return addinfourl(open(localfile, 'rb'), headers, origurl)
1455 except OSError as exp:
1456 # users shouldn't expect OSErrors coming from urlopen()
1457 raise URLError(exp)
1458 raise URLError('file not on local host')
1459
1460 def _safe_gethostbyname(host):
1461 try:
1462 return socket.gethostbyname(host)
1463 except socket.gaierror:
1464 return None
1465
1466 class FTPHandler(BaseHandler):
1467 def ftp_open(self, req):
1468 import ftplib
1469 import mimetypes
1470 host = req.host
1471 if not host:
1472 raise URLError('ftp error: no host given')
1473 host, port = splitport(host)
1474 if port is None:
1475 port = ftplib.FTP_PORT
1476 else:
1477 port = int(port)
1478
1479 # username/password handling
1480 user, host = splituser(host)
1481 if user:
1482 user, passwd = splitpasswd(user)
1483 else:
1484 passwd = None
1485 host = unquote(host)
1486 user = user or ''
1487 passwd = passwd or ''
1488
1489 try:
1490 host = socket.gethostbyname(host)
1491 except socket.error as msg:
1492 raise URLError(msg)
1493 path, attrs = splitattr(req.selector)
1494 dirs = path.split('/')
1495 dirs = list(map(unquote, dirs))
1496 dirs, file = dirs[:-1], dirs[-1]
1497 if dirs and not dirs[0]:
1498 dirs = dirs[1:]
1499 try:
1500 fw = self.connect_ftp(user, passwd, host, port, dirs, req.timeout)
1501 type = file and 'I' or 'D'
1502 for attr in attrs:
1503 attr, value = splitvalue(attr)
1504 if attr.lower() == 'type' and \
1505 value in ('a', 'A', 'i', 'I', 'd', 'D'):
1506 type = value.upper()
1507 fp, retrlen = fw.retrfile(file, type)
1508 headers = ""
1509 mtype = mimetypes.guess_type(req.full_url)[0]
1510 if mtype:
1511 headers += "Content-type: %s\n" % mtype
1512 if retrlen is not None and retrlen >= 0:
1513 headers += "Content-length: %d\n" % retrlen
1514 headers = email.message_from_string(headers)
1515 return addinfourl(fp, headers, req.full_url)
1516 except ftplib.all_errors as exp:
1517 exc = URLError('ftp error: %r' % exp)
1518 raise_with_traceback(exc)
1519
1520 def connect_ftp(self, user, passwd, host, port, dirs, timeout):
1521 return ftpwrapper(user, passwd, host, port, dirs, timeout,
1522 persistent=False)
1523
1524 class CacheFTPHandler(FTPHandler):
1525 # XXX would be nice to have pluggable cache strategies
1526 # XXX this stuff is definitely not thread safe
1527 def __init__(self):
1528 self.cache = {}
1529 self.timeout = {}
1530 self.soonest = 0
1531 self.delay = 60
1532 self.max_conns = 16
1533
1534 def setTimeout(self, t):
1535 self.delay = t
1536
1537 def setMaxConns(self, m):
1538 self.max_conns = m
1539
1540 def connect_ftp(self, user, passwd, host, port, dirs, timeout):
1541 key = user, host, port, '/'.join(dirs), timeout
1542 if key in self.cache:
1543 self.timeout[key] = time.time() + self.delay
1544 else:
1545 self.cache[key] = ftpwrapper(user, passwd, host, port,
1546 dirs, timeout)
1547 self.timeout[key] = time.time() + self.delay
1548 self.check_cache()
1549 return self.cache[key]
1550
1551 def check_cache(self):
1552 # first check for old ones
1553 t = time.time()
1554 if self.soonest <= t:
1555 for k, v in list(self.timeout.items()):
1556 if v < t:
1557 self.cache[k].close()
1558 del self.cache[k]
1559 del self.timeout[k]
1560 self.soonest = min(list(self.timeout.values()))
1561
1562 # then check the size
1563 if len(self.cache) == self.max_conns:
1564 for k, v in list(self.timeout.items()):
1565 if v == self.soonest:
1566 del self.cache[k]
1567 del self.timeout[k]
1568 break
1569 self.soonest = min(list(self.timeout.values()))
1570
1571 def clear_cache(self):
1572 for conn in self.cache.values():
1573 conn.close()
1574 self.cache.clear()
1575 self.timeout.clear()
1576
1577
1578 # Code move from the old urllib module
1579
1580 MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
1581
1582 # Helper for non-unix systems
1583 if os.name == 'nt':
1584 from nturl2path import url2pathname, pathname2url
1585 else:
1586 def url2pathname(pathname):
1587 """OS-specific conversion from a relative URL of the 'file' scheme
1588 to a file system path; not recommended for general use."""
1589 return unquote(pathname)
1590
1591 def pathname2url(pathname):
1592 """OS-specific conversion from a file system path to a relative URL
1593 of the 'file' scheme; not recommended for general use."""
1594 return quote(pathname)
1595
1596 # This really consists of two pieces:
1597 # (1) a class which handles opening of all sorts of URLs
1598 # (plus assorted utilities etc.)
1599 # (2) a set of functions for parsing URLs
1600 # XXX Should these be separated out into different modules?
1601
1602
1603 ftpcache = {}
1604 class URLopener(object):
1605 """Class to open URLs.
1606 This is a class rather than just a subroutine because we may need
1607 more than one set of global protocol-specific options.
1608 Note -- this is a base class for those who don't want the
1609 automatic handling of errors type 302 (relocated) and 401
1610 (authorization needed)."""
1611
1612 __tempfiles = None
1613
1614 version = "Python-urllib/%s" % __version__
1615
1616 # Constructor
1617 def __init__(self, proxies=None, **x509):
1618 msg = "%(class)s style of invoking requests is deprecated. " \
1619 "Use newer urlopen functions/methods" % {'class': self.__class__._ _name__}
1620 warnings.warn(msg, DeprecationWarning, stacklevel=3)
1621 if proxies is None:
1622 proxies = getproxies()
1623 assert hasattr(proxies, 'keys'), "proxies must be a mapping"
1624 self.proxies = proxies
1625 self.key_file = x509.get('key_file')
1626 self.cert_file = x509.get('cert_file')
1627 self.addheaders = [('User-Agent', self.version)]
1628 self.__tempfiles = []
1629 self.__unlink = os.unlink # See cleanup()
1630 self.tempcache = None
1631 # Undocumented feature: if you assign {} to tempcache,
1632 # it is used to cache files retrieved with
1633 # self.retrieve(). This is not enabled by default
1634 # since it does not work for changing documents (and I
1635 # haven't got the logic to check expiration headers
1636 # yet).
1637 self.ftpcache = ftpcache
1638 # Undocumented feature: you can use a different
1639 # ftp cache by assigning to the .ftpcache member;
1640 # in case you want logically independent URL openers
1641 # XXX This is not threadsafe. Bah.
1642
1643 def __del__(self):
1644 self.close()
1645
1646 def close(self):
1647 self.cleanup()
1648
1649 def cleanup(self):
1650 # This code sometimes runs when the rest of this module
1651 # has already been deleted, so it can't use any globals
1652 # or import anything.
1653 if self.__tempfiles:
1654 for file in self.__tempfiles:
1655 try:
1656 self.__unlink(file)
1657 except OSError:
1658 pass
1659 del self.__tempfiles[:]
1660 if self.tempcache:
1661 self.tempcache.clear()
1662
1663 def addheader(self, *args):
1664 """Add a header to be used by the HTTP interface only
1665 e.g. u.addheader('Accept', 'sound/basic')"""
1666 self.addheaders.append(args)
1667
1668 # External interface
1669 def open(self, fullurl, data=None):
1670 """Use URLopener().open(file) instead of open(file, 'r')."""
1671 fullurl = unwrap(to_bytes(fullurl))
1672 fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|")
1673 if self.tempcache and fullurl in self.tempcache:
1674 filename, headers = self.tempcache[fullurl]
1675 fp = open(filename, 'rb')
1676 return addinfourl(fp, headers, fullurl)
1677 urltype, url = splittype(fullurl)
1678 if not urltype:
1679 urltype = 'file'
1680 if urltype in self.proxies:
1681 proxy = self.proxies[urltype]
1682 urltype, proxyhost = splittype(proxy)
1683 host, selector = splithost(proxyhost)
1684 url = (host, fullurl) # Signal special case to open_*()
1685 else:
1686 proxy = None
1687 name = 'open_' + urltype
1688 self.type = urltype
1689 name = name.replace('-', '_')
1690 if not hasattr(self, name):
1691 if proxy:
1692 return self.open_unknown_proxy(proxy, fullurl, data)
1693 else:
1694 return self.open_unknown(fullurl, data)
1695 try:
1696 if data is None:
1697 return getattr(self, name)(url)
1698 else:
1699 return getattr(self, name)(url, data)
1700 except HTTPError:
1701 raise
1702 except socket.error as msg:
1703 raise_with_traceback(IOError('socket error', msg))
1704
1705 def open_unknown(self, fullurl, data=None):
1706 """Overridable interface to open unknown URL type."""
1707 type, url = splittype(fullurl)
1708 raise IOError('url error', 'unknown url type', type)
1709
1710 def open_unknown_proxy(self, proxy, fullurl, data=None):
1711 """Overridable interface to open unknown URL type."""
1712 type, url = splittype(fullurl)
1713 raise IOError('url error', 'invalid proxy for %s' % type, proxy)
1714
1715 # External interface
1716 def retrieve(self, url, filename=None, reporthook=None, data=None):
1717 """retrieve(url) returns (filename, headers) for a local object
1718 or (tempfilename, headers) for a remote object."""
1719 url = unwrap(to_bytes(url))
1720 if self.tempcache and url in self.tempcache:
1721 return self.tempcache[url]
1722 type, url1 = splittype(url)
1723 if filename is None and (not type or type == 'file'):
1724 try:
1725 fp = self.open_local_file(url1)
1726 hdrs = fp.info()
1727 fp.close()
1728 return url2pathname(splithost(url1)[1]), hdrs
1729 except IOError as msg:
1730 pass
1731 fp = self.open(url, data)
1732 try:
1733 headers = fp.info()
1734 if filename:
1735 tfp = open(filename, 'wb')
1736 else:
1737 import tempfile
1738 garbage, path = splittype(url)
1739 garbage, path = splithost(path or "")
1740 path, garbage = splitquery(path or "")
1741 path, garbage = splitattr(path or "")
1742 suffix = os.path.splitext(path)[1]
1743 (fd, filename) = tempfile.mkstemp(suffix)
1744 self.__tempfiles.append(filename)
1745 tfp = os.fdopen(fd, 'wb')
1746 try:
1747 result = filename, headers
1748 if self.tempcache is not None:
1749 self.tempcache[url] = result
1750 bs = 1024*8
1751 size = -1
1752 read = 0
1753 blocknum = 0
1754 if "content-length" in headers:
1755 size = int(headers["Content-Length"])
1756 if reporthook:
1757 reporthook(blocknum, bs, size)
1758 while 1:
1759 block = fp.read(bs)
1760 if not block:
1761 break
1762 read += len(block)
1763 tfp.write(block)
1764 blocknum += 1
1765 if reporthook:
1766 reporthook(blocknum, bs, size)
1767 finally:
1768 tfp.close()
1769 finally:
1770 fp.close()
1771
1772 # raise exception if actual size does not match content-length header
1773 if size >= 0 and read < size:
1774 raise ContentTooShortError(
1775 "retrieval incomplete: got only %i out of %i bytes"
1776 % (read, size), result)
1777
1778 return result
1779
1780 # Each method named open_<type> knows how to open that type of URL
1781
1782 def _open_generic_http(self, connection_factory, url, data):
1783 """Make an HTTP connection using connection_class.
1784
1785 This is an internal method that should be called from
1786 open_http() or open_https().
1787
1788 Arguments:
1789 - connection_factory should take a host name and return an
1790 HTTPConnection instance.
1791 - url is the url to retrieval or a host, relative-path pair.
1792 - data is payload for a POST request or None.
1793 """
1794
1795 user_passwd = None
1796 proxy_passwd= None
1797 if isinstance(url, str):
1798 host, selector = splithost(url)
1799 if host:
1800 user_passwd, host = splituser(host)
1801 host = unquote(host)
1802 realhost = host
1803 else:
1804 host, selector = url
1805 # check whether the proxy contains authorization information
1806 proxy_passwd, host = splituser(host)
1807 # now we proceed with the url we want to obtain
1808 urltype, rest = splittype(selector)
1809 url = rest
1810 user_passwd = None
1811 if urltype.lower() != 'http':
1812 realhost = None
1813 else:
1814 realhost, rest = splithost(rest)
1815 if realhost:
1816 user_passwd, realhost = splituser(realhost)
1817 if user_passwd:
1818 selector = "%s://%s%s" % (urltype, realhost, rest)
1819 if proxy_bypass(realhost):
1820 host = realhost
1821
1822 if not host: raise IOError('http error', 'no host given')
1823
1824 if proxy_passwd:
1825 proxy_passwd = unquote(proxy_passwd)
1826 proxy_auth = base64.b64encode(proxy_passwd.encode()).decode('ascii')
1827 else:
1828 proxy_auth = None
1829
1830 if user_passwd:
1831 user_passwd = unquote(user_passwd)
1832 auth = base64.b64encode(user_passwd.encode()).decode('ascii')
1833 else:
1834 auth = None
1835 http_conn = connection_factory(host)
1836 headers = {}
1837 if proxy_auth:
1838 headers["Proxy-Authorization"] = "Basic %s" % proxy_auth
1839 if auth:
1840 headers["Authorization"] = "Basic %s" % auth
1841 if realhost:
1842 headers["Host"] = realhost
1843
1844 # Add Connection:close as we don't support persistent connections yet.
1845 # This helps in closing the socket and avoiding ResourceWarning
1846
1847 headers["Connection"] = "close"
1848
1849 for header, value in self.addheaders:
1850 headers[header] = value
1851
1852 if data is not None:
1853 headers["Content-Type"] = "application/x-www-form-urlencoded"
1854 http_conn.request("POST", selector, data, headers)
1855 else:
1856 http_conn.request("GET", selector, headers=headers)
1857
1858 try:
1859 response = http_conn.getresponse()
1860 except http_client.BadStatusLine:
1861 # something went wrong with the HTTP status line
1862 raise URLError("http protocol error: bad status line")
1863
1864 # According to RFC 2616, "2xx" code indicates that the client's
1865 # request was successfully received, understood, and accepted.
1866 if 200 <= response.status < 300:
1867 return addinfourl(response, response.msg, "http:" + url,
1868 response.status)
1869 else:
1870 return self.http_error(
1871 url, response.fp,
1872 response.status, response.reason, response.msg, data)
1873
1874 def open_http(self, url, data=None):
1875 """Use HTTP protocol."""
1876 return self._open_generic_http(http_client.HTTPConnection, url, data)
1877
1878 def http_error(self, url, fp, errcode, errmsg, headers, data=None):
1879 """Handle http errors.
1880
1881 Derived class can override this, or provide specific handlers
1882 named http_error_DDD where DDD is the 3-digit error code."""
1883 # First check if there's a specific handler for this error
1884 name = 'http_error_%d' % errcode
1885 if hasattr(self, name):
1886 method = getattr(self, name)
1887 if data is None:
1888 result = method(url, fp, errcode, errmsg, headers)
1889 else:
1890 result = method(url, fp, errcode, errmsg, headers, data)
1891 if result: return result
1892 return self.http_error_default(url, fp, errcode, errmsg, headers)
1893
1894 def http_error_default(self, url, fp, errcode, errmsg, headers):
1895 """Default error handler: close the connection and raise IOError."""
1896 fp.close()
1897 raise HTTPError(url, errcode, errmsg, headers, None)
1898
1899 if _have_ssl:
1900 def _https_connection(self, host):
1901 return http_client.HTTPSConnection(host,
1902 key_file=self.key_file,
1903 cert_file=self.cert_file)
1904
1905 def open_https(self, url, data=None):
1906 """Use HTTPS protocol."""
1907 return self._open_generic_http(self._https_connection, url, data)
1908
1909 def open_file(self, url):
1910 """Use local file or FTP depending on form of URL."""
1911 if not isinstance(url, str):
1912 raise URLError('file error: proxy support for file protocol currentl y not implemented')
1913 if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localho st/':
1914 raise ValueError("file:// scheme is supported only on localhost")
1915 else:
1916 return self.open_local_file(url)
1917
1918 def open_local_file(self, url):
1919 """Use local file."""
1920 import future.backports.email.utils as email_utils
1921 import mimetypes
1922 host, file = splithost(url)
1923 localname = url2pathname(file)
1924 try:
1925 stats = os.stat(localname)
1926 except OSError as e:
1927 raise URLError(e.strerror, e.filename)
1928 size = stats.st_size
1929 modified = email_utils.formatdate(stats.st_mtime, usegmt=True)
1930 mtype = mimetypes.guess_type(url)[0]
1931 headers = email.message_from_string(
1932 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
1933 (mtype or 'text/plain', size, modified))
1934 if not host:
1935 urlfile = file
1936 if file[:1] == '/':
1937 urlfile = 'file://' + file
1938 return addinfourl(open(localname, 'rb'), headers, urlfile)
1939 host, port = splitport(host)
1940 if (not port
1941 and socket.gethostbyname(host) in ((localhost(),) + thishost())):
1942 urlfile = file
1943 if file[:1] == '/':
1944 urlfile = 'file://' + file
1945 elif file[:2] == './':
1946 raise ValueError("local file url may start with / or file:. Unkn own url of type: %s" % url)
1947 return addinfourl(open(localname, 'rb'), headers, urlfile)
1948 raise URLError('local file error: not on local host')
1949
1950 def open_ftp(self, url):
1951 """Use FTP protocol."""
1952 if not isinstance(url, str):
1953 raise URLError('ftp error: proxy support for ftp protocol currently not implemented')
1954 import mimetypes
1955 host, path = splithost(url)
1956 if not host: raise URLError('ftp error: no host given')
1957 host, port = splitport(host)
1958 user, host = splituser(host)
1959 if user: user, passwd = splitpasswd(user)
1960 else: passwd = None
1961 host = unquote(host)
1962 user = unquote(user or '')
1963 passwd = unquote(passwd or '')
1964 host = socket.gethostbyname(host)
1965 if not port:
1966 import ftplib
1967 port = ftplib.FTP_PORT
1968 else:
1969 port = int(port)
1970 path, attrs = splitattr(path)
1971 path = unquote(path)
1972 dirs = path.split('/')
1973 dirs, file = dirs[:-1], dirs[-1]
1974 if dirs and not dirs[0]: dirs = dirs[1:]
1975 if dirs and not dirs[0]: dirs[0] = '/'
1976 key = user, host, port, '/'.join(dirs)
1977 # XXX thread unsafe!
1978 if len(self.ftpcache) > MAXFTPCACHE:
1979 # Prune the cache, rather arbitrarily
1980 for k in self.ftpcache.keys():
1981 if k != key:
1982 v = self.ftpcache[k]
1983 del self.ftpcache[k]
1984 v.close()
1985 try:
1986 if key not in self.ftpcache:
1987 self.ftpcache[key] = \
1988 ftpwrapper(user, passwd, host, port, dirs)
1989 if not file: type = 'D'
1990 else: type = 'I'
1991 for attr in attrs:
1992 attr, value = splitvalue(attr)
1993 if attr.lower() == 'type' and \
1994 value in ('a', 'A', 'i', 'I', 'd', 'D'):
1995 type = value.upper()
1996 (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
1997 mtype = mimetypes.guess_type("ftp:" + url)[0]
1998 headers = ""
1999 if mtype:
2000 headers += "Content-Type: %s\n" % mtype
2001 if retrlen is not None and retrlen >= 0:
2002 headers += "Content-Length: %d\n" % retrlen
2003 headers = email.message_from_string(headers)
2004 return addinfourl(fp, headers, "ftp:" + url)
2005 except ftperrors() as exp:
2006 raise_with_traceback(URLError('ftp error %r' % exp))
2007
2008 def open_data(self, url, data=None):
2009 """Use "data" URL."""
2010 if not isinstance(url, str):
2011 raise URLError('data error: proxy support for data protocol currentl y not implemented')
2012 # ignore POSTed data
2013 #
2014 # syntax of data URLs:
2015 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
2016 # mediatype := [ type "/" subtype ] *( ";" parameter )
2017 # data := *urlchar
2018 # parameter := attribute "=" value
2019 try:
2020 [type, data] = url.split(',', 1)
2021 except ValueError:
2022 raise IOError('data error', 'bad data URL')
2023 if not type:
2024 type = 'text/plain;charset=US-ASCII'
2025 semi = type.rfind(';')
2026 if semi >= 0 and '=' not in type[semi:]:
2027 encoding = type[semi+1:]
2028 type = type[:semi]
2029 else:
2030 encoding = ''
2031 msg = []
2032 msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT',
2033 time.gmtime(time.time())))
2034 msg.append('Content-type: %s' % type)
2035 if encoding == 'base64':
2036 # XXX is this encoding/decoding ok?
2037 data = base64.decodebytes(data.encode('ascii')).decode('latin-1')
2038 else:
2039 data = unquote(data)
2040 msg.append('Content-Length: %d' % len(data))
2041 msg.append('')
2042 msg.append(data)
2043 msg = '\n'.join(msg)
2044 headers = email.message_from_string(msg)
2045 f = io.StringIO(msg)
2046 #f.fileno = None # needed for addinfourl
2047 return addinfourl(f, headers, url)
2048
2049
2050 class FancyURLopener(URLopener):
2051 """Derived class with handlers for errors we can handle (perhaps)."""
2052
2053 def __init__(self, *args, **kwargs):
2054 URLopener.__init__(self, *args, **kwargs)
2055 self.auth_cache = {}
2056 self.tries = 0
2057 self.maxtries = 10
2058
2059 def http_error_default(self, url, fp, errcode, errmsg, headers):
2060 """Default error handling -- don't raise an exception."""
2061 return addinfourl(fp, headers, "http:" + url, errcode)
2062
2063 def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
2064 """Error 302 -- relocated (temporarily)."""
2065 self.tries += 1
2066 if self.maxtries and self.tries >= self.maxtries:
2067 if hasattr(self, "http_error_500"):
2068 meth = self.http_error_500
2069 else:
2070 meth = self.http_error_default
2071 self.tries = 0
2072 return meth(url, fp, 500,
2073 "Internal Server Error: Redirect Recursion", headers)
2074 result = self.redirect_internal(url, fp, errcode, errmsg, headers,
2075 data)
2076 self.tries = 0
2077 return result
2078
2079 def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
2080 if 'location' in headers:
2081 newurl = headers['location']
2082 elif 'uri' in headers:
2083 newurl = headers['uri']
2084 else:
2085 return
2086 fp.close()
2087
2088 # In case the server sent a relative URL, join with original:
2089 newurl = urljoin(self.type + ":" + url, newurl)
2090
2091 urlparts = urlparse(newurl)
2092
2093 # For security reasons, we don't allow redirection to anything other
2094 # than http, https and ftp.
2095
2096 # We are using newer HTTPError with older redirect_internal method
2097 # This older method will get deprecated in 3.3
2098
2099 if urlparts.scheme not in ('http', 'https', 'ftp', ''):
2100 raise HTTPError(newurl, errcode,
2101 errmsg +
2102 " Redirection to url '%s' is not allowed." % newurl,
2103 headers, fp)
2104
2105 return self.open(newurl)
2106
2107 def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
2108 """Error 301 -- also relocated (permanently)."""
2109 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2110
2111 def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
2112 """Error 303 -- also relocated (essentially identical to 302)."""
2113 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2114
2115 def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
2116 """Error 307 -- relocated, but turn POST into error."""
2117 if data is None:
2118 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2119 else:
2120 return self.http_error_default(url, fp, errcode, errmsg, headers)
2121
2122 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None,
2123 retry=False):
2124 """Error 401 -- authentication required.
2125 This function supports Basic authentication only."""
2126 if 'www-authenticate' not in headers:
2127 URLopener.http_error_default(self, url, fp,
2128 errcode, errmsg, headers)
2129 stuff = headers['www-authenticate']
2130 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
2131 if not match:
2132 URLopener.http_error_default(self, url, fp,
2133 errcode, errmsg, headers)
2134 scheme, realm = match.groups()
2135 if scheme.lower() != 'basic':
2136 URLopener.http_error_default(self, url, fp,
2137 errcode, errmsg, headers)
2138 if not retry:
2139 URLopener.http_error_default(self, url, fp, errcode, errmsg,
2140 headers)
2141 name = 'retry_' + self.type + '_basic_auth'
2142 if data is None:
2143 return getattr(self,name)(url, realm)
2144 else:
2145 return getattr(self,name)(url, realm, data)
2146
2147 def http_error_407(self, url, fp, errcode, errmsg, headers, data=None,
2148 retry=False):
2149 """Error 407 -- proxy authentication required.
2150 This function supports Basic authentication only."""
2151 if 'proxy-authenticate' not in headers:
2152 URLopener.http_error_default(self, url, fp,
2153 errcode, errmsg, headers)
2154 stuff = headers['proxy-authenticate']
2155 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
2156 if not match:
2157 URLopener.http_error_default(self, url, fp,
2158 errcode, errmsg, headers)
2159 scheme, realm = match.groups()
2160 if scheme.lower() != 'basic':
2161 URLopener.http_error_default(self, url, fp,
2162 errcode, errmsg, headers)
2163 if not retry:
2164 URLopener.http_error_default(self, url, fp, errcode, errmsg,
2165 headers)
2166 name = 'retry_proxy_' + self.type + '_basic_auth'
2167 if data is None:
2168 return getattr(self,name)(url, realm)
2169 else:
2170 return getattr(self,name)(url, realm, data)
2171
2172 def retry_proxy_http_basic_auth(self, url, realm, data=None):
2173 host, selector = splithost(url)
2174 newurl = 'http://' + host + selector
2175 proxy = self.proxies['http']
2176 urltype, proxyhost = splittype(proxy)
2177 proxyhost, proxyselector = splithost(proxyhost)
2178 i = proxyhost.find('@') + 1
2179 proxyhost = proxyhost[i:]
2180 user, passwd = self.get_user_passwd(proxyhost, realm, i)
2181 if not (user or passwd): return None
2182 proxyhost = "%s:%s@%s" % (quote(user, safe=''),
2183 quote(passwd, safe=''), proxyhost)
2184 self.proxies['http'] = 'http://' + proxyhost + proxyselector
2185 if data is None:
2186 return self.open(newurl)
2187 else:
2188 return self.open(newurl, data)
2189
2190 def retry_proxy_https_basic_auth(self, url, realm, data=None):
2191 host, selector = splithost(url)
2192 newurl = 'https://' + host + selector
2193 proxy = self.proxies['https']
2194 urltype, proxyhost = splittype(proxy)
2195 proxyhost, proxyselector = splithost(proxyhost)
2196 i = proxyhost.find('@') + 1
2197 proxyhost = proxyhost[i:]
2198 user, passwd = self.get_user_passwd(proxyhost, realm, i)
2199 if not (user or passwd): return None
2200 proxyhost = "%s:%s@%s" % (quote(user, safe=''),
2201 quote(passwd, safe=''), proxyhost)
2202 self.proxies['https'] = 'https://' + proxyhost + proxyselector
2203 if data is None:
2204 return self.open(newurl)
2205 else:
2206 return self.open(newurl, data)
2207
2208 def retry_http_basic_auth(self, url, realm, data=None):
2209 host, selector = splithost(url)
2210 i = host.find('@') + 1
2211 host = host[i:]
2212 user, passwd = self.get_user_passwd(host, realm, i)
2213 if not (user or passwd): return None
2214 host = "%s:%s@%s" % (quote(user, safe=''),
2215 quote(passwd, safe=''), host)
2216 newurl = 'http://' + host + selector
2217 if data is None:
2218 return self.open(newurl)
2219 else:
2220 return self.open(newurl, data)
2221
2222 def retry_https_basic_auth(self, url, realm, data=None):
2223 host, selector = splithost(url)
2224 i = host.find('@') + 1
2225 host = host[i:]
2226 user, passwd = self.get_user_passwd(host, realm, i)
2227 if not (user or passwd): return None
2228 host = "%s:%s@%s" % (quote(user, safe=''),
2229 quote(passwd, safe=''), host)
2230 newurl = 'https://' + host + selector
2231 if data is None:
2232 return self.open(newurl)
2233 else:
2234 return self.open(newurl, data)
2235
2236 def get_user_passwd(self, host, realm, clear_cache=0):
2237 key = realm + '@' + host.lower()
2238 if key in self.auth_cache:
2239 if clear_cache:
2240 del self.auth_cache[key]
2241 else:
2242 return self.auth_cache[key]
2243 user, passwd = self.prompt_user_passwd(host, realm)
2244 if user or passwd: self.auth_cache[key] = (user, passwd)
2245 return user, passwd
2246
2247 def prompt_user_passwd(self, host, realm):
2248 """Override this in a GUI environment!"""
2249 import getpass
2250 try:
2251 user = input("Enter username for %s at %s: " % (realm, host))
2252 passwd = getpass.getpass("Enter password for %s in %s at %s: " %
2253 (user, realm, host))
2254 return user, passwd
2255 except KeyboardInterrupt:
2256 print()
2257 return None, None
2258
2259
2260 # Utility functions
2261
2262 _localhost = None
2263 def localhost():
2264 """Return the IP address of the magic hostname 'localhost'."""
2265 global _localhost
2266 if _localhost is None:
2267 _localhost = socket.gethostbyname('localhost')
2268 return _localhost
2269
2270 _thishost = None
2271 def thishost():
2272 """Return the IP addresses of the current host."""
2273 global _thishost
2274 if _thishost is None:
2275 try:
2276 _thishost = tuple(socket.gethostbyname_ex(socket.gethostname())[2])
2277 except socket.gaierror:
2278 _thishost = tuple(socket.gethostbyname_ex('localhost')[2])
2279 return _thishost
2280
2281 _ftperrors = None
2282 def ftperrors():
2283 """Return the set of errors raised by the FTP class."""
2284 global _ftperrors
2285 if _ftperrors is None:
2286 import ftplib
2287 _ftperrors = ftplib.all_errors
2288 return _ftperrors
2289
2290 _noheaders = None
2291 def noheaders():
2292 """Return an empty email Message object."""
2293 global _noheaders
2294 if _noheaders is None:
2295 _noheaders = email.message_from_string("")
2296 return _noheaders
2297
2298
2299 # Utility classes
2300
2301 class ftpwrapper(object):
2302 """Class used by open_ftp() for cache of open FTP connections."""
2303
2304 def __init__(self, user, passwd, host, port, dirs, timeout=None,
2305 persistent=True):
2306 self.user = user
2307 self.passwd = passwd
2308 self.host = host
2309 self.port = port
2310 self.dirs = dirs
2311 self.timeout = timeout
2312 self.refcount = 0
2313 self.keepalive = persistent
2314 self.init()
2315
2316 def init(self):
2317 import ftplib
2318 self.busy = 0
2319 self.ftp = ftplib.FTP()
2320 self.ftp.connect(self.host, self.port, self.timeout)
2321 self.ftp.login(self.user, self.passwd)
2322 _target = '/'.join(self.dirs)
2323 self.ftp.cwd(_target)
2324
2325 def retrfile(self, file, type):
2326 import ftplib
2327 self.endtransfer()
2328 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
2329 else: cmd = 'TYPE ' + type; isdir = 0
2330 try:
2331 self.ftp.voidcmd(cmd)
2332 except ftplib.all_errors:
2333 self.init()
2334 self.ftp.voidcmd(cmd)
2335 conn = None
2336 if file and not isdir:
2337 # Try to retrieve as a file
2338 try:
2339 cmd = 'RETR ' + file
2340 conn, retrlen = self.ftp.ntransfercmd(cmd)
2341 except ftplib.error_perm as reason:
2342 if str(reason)[:3] != '550':
2343 raise_with_traceback(URLError('ftp error: %r' % reason))
2344 if not conn:
2345 # Set transfer mode to ASCII!
2346 self.ftp.voidcmd('TYPE A')
2347 # Try a directory listing. Verify that directory exists.
2348 if file:
2349 pwd = self.ftp.pwd()
2350 try:
2351 try:
2352 self.ftp.cwd(file)
2353 except ftplib.error_perm as reason:
2354 ### Was:
2355 # raise URLError('ftp error: %r' % reason) from reason
2356 exc = URLError('ftp error: %r' % reason)
2357 exc.__cause__ = reason
2358 raise exc
2359 finally:
2360 self.ftp.cwd(pwd)
2361 cmd = 'LIST ' + file
2362 else:
2363 cmd = 'LIST'
2364 conn, retrlen = self.ftp.ntransfercmd(cmd)
2365 self.busy = 1
2366
2367 ftpobj = addclosehook(conn.makefile('rb'), self.file_close)
2368 self.refcount += 1
2369 conn.close()
2370 # Pass back both a suitably decorated object and a retrieval length
2371 return (ftpobj, retrlen)
2372
2373 def endtransfer(self):
2374 self.busy = 0
2375
2376 def close(self):
2377 self.keepalive = False
2378 if self.refcount <= 0:
2379 self.real_close()
2380
2381 def file_close(self):
2382 self.endtransfer()
2383 self.refcount -= 1
2384 if self.refcount <= 0 and not self.keepalive:
2385 self.real_close()
2386
2387 def real_close(self):
2388 self.endtransfer()
2389 try:
2390 self.ftp.close()
2391 except ftperrors():
2392 pass
2393
2394 # Proxy handling
2395 def getproxies_environment():
2396 """Return a dictionary of scheme -> proxy server URL mappings.
2397
2398 Scan the environment for variables named <scheme>_proxy;
2399 this seems to be the standard convention. If you need a
2400 different way, you can pass a proxies dictionary to the
2401 [Fancy]URLopener constructor.
2402
2403 """
2404 proxies = {}
2405 for name, value in os.environ.items():
2406 name = name.lower()
2407 if value and name[-6:] == '_proxy':
2408 proxies[name[:-6]] = value
2409 return proxies
2410
2411 def proxy_bypass_environment(host):
2412 """Test if proxies should not be used for a particular host.
2413
2414 Checks the environment for a variable named no_proxy, which should
2415 be a list of DNS suffixes separated by commas, or '*' for all hosts.
2416 """
2417 no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')
2418 # '*' is special case for always bypass
2419 if no_proxy == '*':
2420 return 1
2421 # strip port off host
2422 hostonly, port = splitport(host)
2423 # check if the host ends with any of the DNS suffixes
2424 no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')]
2425 for name in no_proxy_list:
2426 if name and (hostonly.endswith(name) or host.endswith(name)):
2427 return 1
2428 # otherwise, don't bypass
2429 return 0
2430
2431
2432 # This code tests an OSX specific data structure but is testable on all
2433 # platforms
2434 def _proxy_bypass_macosx_sysconf(host, proxy_settings):
2435 """
2436 Return True iff this host shouldn't be accessed using a proxy
2437
2438 This function uses the MacOSX framework SystemConfiguration
2439 to fetch the proxy information.
2440
2441 proxy_settings come from _scproxy._get_proxy_settings or get mocked ie:
2442 { 'exclude_simple': bool,
2443 'exceptions': ['foo.bar', '*.bar.com', '127.0.0.1', '10.1', '10.0/16']
2444 }
2445 """
2446 from fnmatch import fnmatch
2447
2448 hostonly, port = splitport(host)
2449
2450 def ip2num(ipAddr):
2451 parts = ipAddr.split('.')
2452 parts = list(map(int, parts))
2453 if len(parts) != 4:
2454 parts = (parts + [0, 0, 0, 0])[:4]
2455 return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]
2456
2457 # Check for simple host names:
2458 if '.' not in host:
2459 if proxy_settings['exclude_simple']:
2460 return True
2461
2462 hostIP = None
2463
2464 for value in proxy_settings.get('exceptions', ()):
2465 # Items in the list are strings like these: *.local, 169.254/16
2466 if not value: continue
2467
2468 m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
2469 if m is not None:
2470 if hostIP is None:
2471 try:
2472 hostIP = socket.gethostbyname(hostonly)
2473 hostIP = ip2num(hostIP)
2474 except socket.error:
2475 continue
2476
2477 base = ip2num(m.group(1))
2478 mask = m.group(2)
2479 if mask is None:
2480 mask = 8 * (m.group(1).count('.') + 1)
2481 else:
2482 mask = int(mask[1:])
2483 mask = 32 - mask
2484
2485 if (hostIP >> mask) == (base >> mask):
2486 return True
2487
2488 elif fnmatch(host, value):
2489 return True
2490
2491 return False
2492
2493
2494 if sys.platform == 'darwin':
2495 from _scproxy import _get_proxy_settings, _get_proxies
2496
2497 def proxy_bypass_macosx_sysconf(host):
2498 proxy_settings = _get_proxy_settings()
2499 return _proxy_bypass_macosx_sysconf(host, proxy_settings)
2500
2501 def getproxies_macosx_sysconf():
2502 """Return a dictionary of scheme -> proxy server URL mappings.
2503
2504 This function uses the MacOSX framework SystemConfiguration
2505 to fetch the proxy information.
2506 """
2507 return _get_proxies()
2508
2509
2510
2511 def proxy_bypass(host):
2512 if getproxies_environment():
2513 return proxy_bypass_environment(host)
2514 else:
2515 return proxy_bypass_macosx_sysconf(host)
2516
2517 def getproxies():
2518 return getproxies_environment() or getproxies_macosx_sysconf()
2519
2520
2521 elif os.name == 'nt':
2522 def getproxies_registry():
2523 """Return a dictionary of scheme -> proxy server URL mappings.
2524
2525 Win32 uses the registry to store proxies.
2526
2527 """
2528 proxies = {}
2529 try:
2530 import winreg
2531 except ImportError:
2532 # Std module, so should be around - but you never know!
2533 return proxies
2534 try:
2535 internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
2536 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
2537 proxyEnable = winreg.QueryValueEx(internetSettings,
2538 'ProxyEnable')[0]
2539 if proxyEnable:
2540 # Returned as Unicode but problems if not converted to ASCII
2541 proxyServer = str(winreg.QueryValueEx(internetSettings,
2542 'ProxyServer')[0])
2543 if '=' in proxyServer:
2544 # Per-protocol settings
2545 for p in proxyServer.split(';'):
2546 protocol, address = p.split('=', 1)
2547 # See if address has a type:// prefix
2548 if not re.match('^([^/:]+)://', address):
2549 address = '%s://%s' % (protocol, address)
2550 proxies[protocol] = address
2551 else:
2552 # Use one setting for all protocols
2553 if proxyServer[:5] == 'http:':
2554 proxies['http'] = proxyServer
2555 else:
2556 proxies['http'] = 'http://%s' % proxyServer
2557 proxies['https'] = 'https://%s' % proxyServer
2558 proxies['ftp'] = 'ftp://%s' % proxyServer
2559 internetSettings.Close()
2560 except (WindowsError, ValueError, TypeError):
2561 # Either registry key not found etc, or the value in an
2562 # unexpected format.
2563 # proxies already set up to be empty so nothing to do
2564 pass
2565 return proxies
2566
2567 def getproxies():
2568 """Return a dictionary of scheme -> proxy server URL mappings.
2569
2570 Returns settings gathered from the environment, if specified,
2571 or the registry.
2572
2573 """
2574 return getproxies_environment() or getproxies_registry()
2575
2576 def proxy_bypass_registry(host):
2577 try:
2578 import winreg
2579 except ImportError:
2580 # Std modules, so should be around - but you never know!
2581 return 0
2582 try:
2583 internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
2584 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
2585 proxyEnable = winreg.QueryValueEx(internetSettings,
2586 'ProxyEnable')[0]
2587 proxyOverride = str(winreg.QueryValueEx(internetSettings,
2588 'ProxyOverride')[0])
2589 # ^^^^ Returned as Unicode but problems if not converted to ASCII
2590 except WindowsError:
2591 return 0
2592 if not proxyEnable or not proxyOverride:
2593 return 0
2594 # try to make a host list from name and IP address.
2595 rawHost, port = splitport(host)
2596 host = [rawHost]
2597 try:
2598 addr = socket.gethostbyname(rawHost)
2599 if addr != rawHost:
2600 host.append(addr)
2601 except socket.error:
2602 pass
2603 try:
2604 fqdn = socket.getfqdn(rawHost)
2605 if fqdn != rawHost:
2606 host.append(fqdn)
2607 except socket.error:
2608 pass
2609 # make a check value list from the registry entry: replace the
2610 # '<local>' string by the localhost entry and the corresponding
2611 # canonical entry.
2612 proxyOverride = proxyOverride.split(';')
2613 # now check if we match one of the registry values.
2614 for test in proxyOverride:
2615 if test == '<local>':
2616 if '.' not in rawHost:
2617 return 1
2618 test = test.replace(".", r"\.") # mask dots
2619 test = test.replace("*", r".*") # change glob sequence
2620 test = test.replace("?", r".") # change glob char
2621 for val in host:
2622 if re.match(test, val, re.I):
2623 return 1
2624 return 0
2625
2626 def proxy_bypass(host):
2627 """Return a dictionary of scheme -> proxy server URL mappings.
2628
2629 Returns settings gathered from the environment, if specified,
2630 or the registry.
2631
2632 """
2633 if getproxies_environment():
2634 return proxy_bypass_environment(host)
2635 else:
2636 return proxy_bypass_registry(host)
2637
2638 else:
2639 # By default use environment variables
2640 getproxies = getproxies_environment
2641 proxy_bypass = proxy_bypass_environment
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698