OLD | NEW |
1 from __future__ import generators | 1 from __future__ import generators |
2 """ | 2 """ |
3 httplib2 | 3 httplib2 |
4 | 4 |
5 A caching http interface that supports ETags and gzip | 5 A caching http interface that supports ETags and gzip |
6 to conserve bandwidth. | 6 to conserve bandwidth. |
7 | 7 |
8 Requires Python 2.3 or later | 8 Requires Python 2.3 or later |
9 | 9 |
10 Changelog: | 10 Changelog: |
11 2007-08-18, Rick: Modified so it's able to use a socks proxy if needed. | 11 2007-08-18, Rick: Modified so it's able to use a socks proxy if needed. |
12 | 12 |
13 """ | 13 """ |
14 | 14 |
15 __author__ = "Joe Gregorio (joe@bitworking.org)" | 15 __author__ = "Joe Gregorio (joe@bitworking.org)" |
16 __copyright__ = "Copyright 2006, Joe Gregorio" | 16 __copyright__ = "Copyright 2006, Joe Gregorio" |
17 __contributors__ = ["Thomas Broyer (t.broyer@ltgt.net)", | 17 __contributors__ = ["Thomas Broyer (t.broyer@ltgt.net)", |
18 "James Antill", | 18 "James Antill", |
19 "Xavier Verges Farrero", | 19 "Xavier Verges Farrero", |
20 "Jonathan Feinberg", | 20 "Jonathan Feinberg", |
21 "Blair Zajac", | 21 "Blair Zajac", |
22 "Sam Ruby", | 22 "Sam Ruby", |
23 "Louis Nyffenegger"] | 23 "Louis Nyffenegger"] |
24 __license__ = "MIT" | 24 __license__ = "MIT" |
25 __version__ = "0.8" | 25 __version__ = "0.9.2" |
26 | 26 |
27 import re | 27 import re |
28 import sys | 28 import sys |
29 import email | 29 import email |
30 import email.Utils | 30 import email.Utils |
31 import email.Message | 31 import email.Message |
32 import email.FeedParser | 32 import email.FeedParser |
33 import StringIO | 33 import StringIO |
34 import gzip | 34 import gzip |
35 import zlib | 35 import zlib |
(...skipping 706 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
742 pass | 742 pass |
743 | 743 |
744 class AllHosts(object): | 744 class AllHosts(object): |
745 pass | 745 pass |
746 | 746 |
747 class ProxyInfo(object): | 747 class ProxyInfo(object): |
748 """Collect information required to use a proxy.""" | 748 """Collect information required to use a proxy.""" |
749 bypass_hosts = () | 749 bypass_hosts = () |
750 | 750 |
751 def __init__(self, proxy_type, proxy_host, proxy_port, | 751 def __init__(self, proxy_type, proxy_host, proxy_port, |
752 proxy_rdns=None, proxy_user=None, proxy_pass=None): | 752 proxy_rdns=True, proxy_user=None, proxy_pass=None): |
753 """The parameter proxy_type must be set to one of socks.PROXY_TYPE_XXX | 753 """ |
754 constants. For example: | 754 Args: |
| 755 proxy_type: The type of proxy server. This must be set to one of |
| 756 socks.PROXY_TYPE_XXX constants. For example: |
755 | 757 |
756 p = ProxyInfo(proxy_type=socks.PROXY_TYPE_HTTP, | 758 p = ProxyInfo(proxy_type=socks.PROXY_TYPE_HTTP, |
757 proxy_host='localhost', proxy_port=8000) | 759 proxy_host='localhost', proxy_port=8000) |
| 760 |
| 761 proxy_host: The hostname or IP address of the proxy server. |
| 762 |
| 763 proxy_port: The port that the proxy server is running on. |
| 764 |
| 765 proxy_rdns: If True (default), DNS queries will not be performed |
| 766 locally, and instead, handed to the proxy to resolve. This is useful |
| 767 if the network does not allow resolution of non-local names. In |
| 768 httplib2 0.9 and earlier, this defaulted to False. |
| 769 |
| 770 proxy_user: The username used to authenticate with the proxy server. |
| 771 |
| 772 proxy_pass: The password used to authenticate with the proxy server. |
758 """ | 773 """ |
759 self.proxy_type = proxy_type | 774 self.proxy_type = proxy_type |
760 self.proxy_host = proxy_host | 775 self.proxy_host = proxy_host |
761 self.proxy_port = proxy_port | 776 self.proxy_port = proxy_port |
762 self.proxy_rdns = proxy_rdns | 777 self.proxy_rdns = proxy_rdns |
763 self.proxy_user = proxy_user | 778 self.proxy_user = proxy_user |
764 self.proxy_pass = proxy_pass | 779 self.proxy_pass = proxy_pass |
765 | 780 |
766 def astuple(self): | 781 def astuple(self): |
767 return (self.proxy_type, self.proxy_host, self.proxy_port, | 782 return (self.proxy_type, self.proxy_host, self.proxy_port, |
(...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
864 def connect(self): | 879 def connect(self): |
865 """Connect to the host and port specified in __init__.""" | 880 """Connect to the host and port specified in __init__.""" |
866 # Mostly verbatim from httplib.py. | 881 # Mostly verbatim from httplib.py. |
867 if self.proxy_info and socks is None: | 882 if self.proxy_info and socks is None: |
868 raise ProxiesUnavailableError( | 883 raise ProxiesUnavailableError( |
869 'Proxy support missing but proxy use was requested!') | 884 'Proxy support missing but proxy use was requested!') |
870 msg = "getaddrinfo returns an empty list" | 885 msg = "getaddrinfo returns an empty list" |
871 if self.proxy_info and self.proxy_info.isgood(): | 886 if self.proxy_info and self.proxy_info.isgood(): |
872 use_proxy = True | 887 use_proxy = True |
873 proxy_type, proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pa
ss = self.proxy_info.astuple() | 888 proxy_type, proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pa
ss = self.proxy_info.astuple() |
874 else: | 889 |
875 use_proxy = False | |
876 if use_proxy and proxy_rdns: | |
877 host = proxy_host | 890 host = proxy_host |
878 port = proxy_port | 891 port = proxy_port |
879 else: | 892 else: |
| 893 use_proxy = False |
| 894 |
880 host = self.host | 895 host = self.host |
881 port = self.port | 896 port = self.port |
882 | 897 |
883 for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM): | 898 for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM): |
884 af, socktype, proto, canonname, sa = res | 899 af, socktype, proto, canonname, sa = res |
885 try: | 900 try: |
886 if use_proxy: | 901 if use_proxy: |
887 self.sock = socks.socksocket(af, socktype, proto) | 902 self.sock = socks.socksocket(af, socktype, proto) |
888 self.sock.setproxy(proxy_type, proxy_host, proxy_port, proxy
_rdns, proxy_user, proxy_pass) | 903 self.sock.setproxy(proxy_type, proxy_host, proxy_port, proxy
_rdns, proxy_user, proxy_pass) |
889 else: | 904 else: |
(...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
986 return True | 1001 return True |
987 return False | 1002 return False |
988 | 1003 |
989 def connect(self): | 1004 def connect(self): |
990 "Connect to a host on a given (SSL) port." | 1005 "Connect to a host on a given (SSL) port." |
991 | 1006 |
992 msg = "getaddrinfo returns an empty list" | 1007 msg = "getaddrinfo returns an empty list" |
993 if self.proxy_info and self.proxy_info.isgood(): | 1008 if self.proxy_info and self.proxy_info.isgood(): |
994 use_proxy = True | 1009 use_proxy = True |
995 proxy_type, proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pa
ss = self.proxy_info.astuple() | 1010 proxy_type, proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pa
ss = self.proxy_info.astuple() |
996 else: | 1011 |
997 use_proxy = False | |
998 if use_proxy and proxy_rdns: | |
999 host = proxy_host | 1012 host = proxy_host |
1000 port = proxy_port | 1013 port = proxy_port |
1001 else: | 1014 else: |
| 1015 use_proxy = False |
| 1016 |
1002 host = self.host | 1017 host = self.host |
1003 port = self.port | 1018 port = self.port |
1004 | 1019 |
1005 address_info = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM) | 1020 address_info = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM) |
1006 for family, socktype, proto, canonname, sockaddr in address_info: | 1021 for family, socktype, proto, canonname, sockaddr in address_info: |
1007 try: | 1022 try: |
1008 if use_proxy: | 1023 if use_proxy: |
1009 sock = socks.socksocket(family, socktype, proto) | 1024 sock = socks.socksocket(family, socktype, proto) |
1010 | 1025 |
1011 sock.setproxy(proxy_type, proxy_host, proxy_port, proxy_rdns
, proxy_user, proxy_pass) | 1026 sock.setproxy(proxy_type, proxy_host, proxy_port, proxy_rdns
, proxy_user, proxy_pass) |
(...skipping 251 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1263 raise ServerNotFoundError("Unable to find the server at %s" % co
nn.host) | 1278 raise ServerNotFoundError("Unable to find the server at %s" % co
nn.host) |
1264 except ssl_SSLError: | 1279 except ssl_SSLError: |
1265 conn.close() | 1280 conn.close() |
1266 raise | 1281 raise |
1267 except socket.error, e: | 1282 except socket.error, e: |
1268 err = 0 | 1283 err = 0 |
1269 if hasattr(e, 'args'): | 1284 if hasattr(e, 'args'): |
1270 err = getattr(e, 'args')[0] | 1285 err = getattr(e, 'args')[0] |
1271 else: | 1286 else: |
1272 err = e.errno | 1287 err = e.errno |
1273 if err == errno.ECONNREFUSED: # Connection refused | 1288 if err in (errno.ENETUNREACH, errno.EADDRNOTAVAIL) and i < RETRI
ES: |
1274 raise | 1289 continue # retry on potentially transient socket errors |
| 1290 raise |
1275 except httplib.HTTPException: | 1291 except httplib.HTTPException: |
1276 # Just because the server closed the connection doesn't apparent
ly mean | 1292 # Just because the server closed the connection doesn't apparent
ly mean |
1277 # that the server didn't send a response. | 1293 # that the server didn't send a response. |
1278 if hasattr(conn, 'sock') and conn.sock is None: | 1294 if hasattr(conn, 'sock') and conn.sock is None: |
1279 if i < RETRIES-1: | 1295 if i < RETRIES-1: |
1280 conn.close() | 1296 conn.close() |
1281 conn.connect() | 1297 conn.connect() |
1282 continue | 1298 continue |
1283 else: | 1299 else: |
1284 conn.close() | 1300 conn.close() |
(...skipping 189 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1474 authority, timeout=self.timeout, | 1490 authority, timeout=self.timeout, |
1475 proxy_info=proxy_info) | 1491 proxy_info=proxy_info) |
1476 conn.set_debuglevel(debuglevel) | 1492 conn.set_debuglevel(debuglevel) |
1477 | 1493 |
1478 if 'range' not in headers and 'accept-encoding' not in headers: | 1494 if 'range' not in headers and 'accept-encoding' not in headers: |
1479 headers['accept-encoding'] = 'gzip, deflate' | 1495 headers['accept-encoding'] = 'gzip, deflate' |
1480 | 1496 |
1481 info = email.Message.Message() | 1497 info = email.Message.Message() |
1482 cached_value = None | 1498 cached_value = None |
1483 if self.cache: | 1499 if self.cache: |
1484 cachekey = defrag_uri | 1500 cachekey = defrag_uri.encode('utf-8') |
1485 cached_value = self.cache.get(cachekey) | 1501 cached_value = self.cache.get(cachekey) |
1486 if cached_value: | 1502 if cached_value: |
1487 # info = email.message_from_string(cached_value) | 1503 # info = email.message_from_string(cached_value) |
1488 # | 1504 # |
1489 # Need to replace the line above with the kludge below | 1505 # Need to replace the line above with the kludge below |
1490 # to fix the non-existent bug not fixed in this | 1506 # to fix the non-existent bug not fixed in this |
1491 # bug report: http://mail.python.org/pipermail/python-bugs-l
ist/2005-September/030289.html | 1507 # bug report: http://mail.python.org/pipermail/python-bugs-l
ist/2005-September/030289.html |
1492 try: | 1508 try: |
1493 info, content = cached_value.split('\r\n\r\n', 1) | 1509 info, content = cached_value.split('\r\n\r\n', 1) |
1494 feedparser = email.FeedParser.FeedParser() | 1510 feedparser = email.FeedParser.FeedParser() |
(...skipping 176 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1671 self[key.lower()] = value | 1687 self[key.lower()] = value |
1672 self.status = int(self.get('status', self.status)) | 1688 self.status = int(self.get('status', self.status)) |
1673 self.reason = self.get('reason', self.reason) | 1689 self.reason = self.get('reason', self.reason) |
1674 | 1690 |
1675 | 1691 |
1676 def __getattr__(self, name): | 1692 def __getattr__(self, name): |
1677 if name == 'dict': | 1693 if name == 'dict': |
1678 return self | 1694 return self |
1679 else: | 1695 else: |
1680 raise AttributeError, name | 1696 raise AttributeError, name |
OLD | NEW |