OLD | NEW |
(Empty) | |
| 1 from __future__ import absolute_import |
| 2 from collections import namedtuple |
| 3 |
| 4 from ..exceptions import LocationParseError |
| 5 |
| 6 |
| 7 url_attrs = ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'] |
| 8 |
| 9 |
| 10 class Url(namedtuple('Url', url_attrs)): |
| 11 """ |
| 12 Datastructure for representing an HTTP URL. Used as a return value for |
| 13 :func:`parse_url`. Both the scheme and host are normalized as they are |
| 14 both case-insensitive according to RFC 3986. |
| 15 """ |
| 16 __slots__ = () |
| 17 |
| 18 def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None, |
| 19 query=None, fragment=None): |
| 20 if path and not path.startswith('/'): |
| 21 path = '/' + path |
| 22 if scheme: |
| 23 scheme = scheme.lower() |
| 24 if host: |
| 25 host = host.lower() |
| 26 return super(Url, cls).__new__(cls, scheme, auth, host, port, path, |
| 27 query, fragment) |
| 28 |
| 29 @property |
| 30 def hostname(self): |
| 31 """For backwards-compatibility with urlparse. We're nice like that.""" |
| 32 return self.host |
| 33 |
| 34 @property |
| 35 def request_uri(self): |
| 36 """Absolute path including the query string.""" |
| 37 uri = self.path or '/' |
| 38 |
| 39 if self.query is not None: |
| 40 uri += '?' + self.query |
| 41 |
| 42 return uri |
| 43 |
| 44 @property |
| 45 def netloc(self): |
| 46 """Network location including host and port""" |
| 47 if self.port: |
| 48 return '%s:%d' % (self.host, self.port) |
| 49 return self.host |
| 50 |
| 51 @property |
| 52 def url(self): |
| 53 """ |
| 54 Convert self into a url |
| 55 |
| 56 This function should more or less round-trip with :func:`.parse_url`. Th
e |
| 57 returned url may not be exactly the same as the url inputted to |
| 58 :func:`.parse_url`, but it should be equivalent by the RFC (e.g., urls |
| 59 with a blank port will have : removed). |
| 60 |
| 61 Example: :: |
| 62 |
| 63 >>> U = parse_url('http://google.com/mail/') |
| 64 >>> U.url |
| 65 'http://google.com/mail/' |
| 66 >>> Url('http', 'username:password', 'host.com', 80, |
| 67 ... '/path', 'query', 'fragment').url |
| 68 'http://username:password@host.com:80/path?query#fragment' |
| 69 """ |
| 70 scheme, auth, host, port, path, query, fragment = self |
| 71 url = '' |
| 72 |
| 73 # We use "is not None" we want things to happen with empty strings (or 0
port) |
| 74 if scheme is not None: |
| 75 url += scheme + '://' |
| 76 if auth is not None: |
| 77 url += auth + '@' |
| 78 if host is not None: |
| 79 url += host |
| 80 if port is not None: |
| 81 url += ':' + str(port) |
| 82 if path is not None: |
| 83 url += path |
| 84 if query is not None: |
| 85 url += '?' + query |
| 86 if fragment is not None: |
| 87 url += '#' + fragment |
| 88 |
| 89 return url |
| 90 |
| 91 def __str__(self): |
| 92 return self.url |
| 93 |
| 94 |
| 95 def split_first(s, delims): |
| 96 """ |
| 97 Given a string and an iterable of delimiters, split on the first found |
| 98 delimiter. Return two split parts and the matched delimiter. |
| 99 |
| 100 If not found, then the first part is the full input string. |
| 101 |
| 102 Example:: |
| 103 |
| 104 >>> split_first('foo/bar?baz', '?/=') |
| 105 ('foo', 'bar?baz', '/') |
| 106 >>> split_first('foo/bar?baz', '123') |
| 107 ('foo/bar?baz', '', None) |
| 108 |
| 109 Scales linearly with number of delims. Not ideal for large number of delims. |
| 110 """ |
| 111 min_idx = None |
| 112 min_delim = None |
| 113 for d in delims: |
| 114 idx = s.find(d) |
| 115 if idx < 0: |
| 116 continue |
| 117 |
| 118 if min_idx is None or idx < min_idx: |
| 119 min_idx = idx |
| 120 min_delim = d |
| 121 |
| 122 if min_idx is None or min_idx < 0: |
| 123 return s, '', None |
| 124 |
| 125 return s[:min_idx], s[min_idx + 1:], min_delim |
| 126 |
| 127 |
| 128 def parse_url(url): |
| 129 """ |
| 130 Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is |
| 131 performed to parse incomplete urls. Fields not provided will be None. |
| 132 |
| 133 Partly backwards-compatible with :mod:`urlparse`. |
| 134 |
| 135 Example:: |
| 136 |
| 137 >>> parse_url('http://google.com/mail/') |
| 138 Url(scheme='http', host='google.com', port=None, path='/mail/', ...) |
| 139 >>> parse_url('google.com:80') |
| 140 Url(scheme=None, host='google.com', port=80, path=None, ...) |
| 141 >>> parse_url('/foo?bar') |
| 142 Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...) |
| 143 """ |
| 144 |
| 145 # While this code has overlap with stdlib's urlparse, it is much |
| 146 # simplified for our needs and less annoying. |
| 147 # Additionally, this implementations does silly things to be optimal |
| 148 # on CPython. |
| 149 |
| 150 if not url: |
| 151 # Empty |
| 152 return Url() |
| 153 |
| 154 scheme = None |
| 155 auth = None |
| 156 host = None |
| 157 port = None |
| 158 path = None |
| 159 fragment = None |
| 160 query = None |
| 161 |
| 162 # Scheme |
| 163 if '://' in url: |
| 164 scheme, url = url.split('://', 1) |
| 165 |
| 166 # Find the earliest Authority Terminator |
| 167 # (http://tools.ietf.org/html/rfc3986#section-3.2) |
| 168 url, path_, delim = split_first(url, ['/', '?', '#']) |
| 169 |
| 170 if delim: |
| 171 # Reassemble the path |
| 172 path = delim + path_ |
| 173 |
| 174 # Auth |
| 175 if '@' in url: |
| 176 # Last '@' denotes end of auth part |
| 177 auth, url = url.rsplit('@', 1) |
| 178 |
| 179 # IPv6 |
| 180 if url and url[0] == '[': |
| 181 host, url = url.split(']', 1) |
| 182 host += ']' |
| 183 |
| 184 # Port |
| 185 if ':' in url: |
| 186 _host, port = url.split(':', 1) |
| 187 |
| 188 if not host: |
| 189 host = _host |
| 190 |
| 191 if port: |
| 192 # If given, ports must be integers. No whitespace, no plus or |
| 193 # minus prefixes, no non-integer digits such as ^2 (superscript). |
| 194 if not port.isdigit(): |
| 195 raise LocationParseError(url) |
| 196 try: |
| 197 port = int(port) |
| 198 except ValueError: |
| 199 raise LocationParseError(url) |
| 200 else: |
| 201 # Blank ports are cool, too. (rfc3986#section-3.2.3) |
| 202 port = None |
| 203 |
| 204 elif not host and url: |
| 205 host = url |
| 206 |
| 207 if not path: |
| 208 return Url(scheme, auth, host, port, path, query, fragment) |
| 209 |
| 210 # Fragment |
| 211 if '#' in path: |
| 212 path, fragment = path.split('#', 1) |
| 213 |
| 214 # Query |
| 215 if '?' in path: |
| 216 path, query = path.split('?', 1) |
| 217 |
| 218 return Url(scheme, auth, host, port, path, query, fragment) |
| 219 |
| 220 |
| 221 def get_host(url): |
| 222 """ |
| 223 Deprecated. Use :func:`parse_url` instead. |
| 224 """ |
| 225 p = parse_url(url) |
| 226 return p.scheme or 'http', p.hostname, p.port |
OLD | NEW |