OLD | NEW |
| (Empty) |
1 # -*- coding: utf-8 -*- | |
2 | |
3 """ | |
4 requests.utils | |
5 ~~~~~~~~~~~~~~ | |
6 | |
7 This module provides utility functions that are used within Requests | |
8 that are also useful for external consumption. | |
9 | |
10 """ | |
11 | |
12 import cgi | |
13 import codecs | |
14 import collections | |
15 import os | |
16 import platform | |
17 import re | |
18 import sys | |
19 from netrc import netrc, NetrcParseError | |
20 | |
21 from . import __version__ | |
22 from . import certs | |
23 from .compat import parse_http_list as _parse_list_header | |
24 from .compat import (quote, urlparse, bytes, str, OrderedDict, urlunparse, | |
25 is_py2, is_py3, builtin_str, getproxies, proxy_bypass) | |
26 from .cookies import RequestsCookieJar, cookiejar_from_dict | |
27 from .structures import CaseInsensitiveDict | |
28 from .exceptions import MissingSchema, InvalidURL | |
29 | |
30 _hush_pyflakes = (RequestsCookieJar,) | |
31 | |
32 NETRC_FILES = ('.netrc', '_netrc') | |
33 | |
34 DEFAULT_CA_BUNDLE_PATH = certs.where() | |
35 | |
36 | |
37 def dict_to_sequence(d): | |
38 """Returns an internal sequence dictionary update.""" | |
39 | |
40 if hasattr(d, 'items'): | |
41 d = d.items() | |
42 | |
43 return d | |
44 | |
45 | |
46 def super_len(o): | |
47 if hasattr(o, '__len__'): | |
48 return len(o) | |
49 if hasattr(o, 'len'): | |
50 return o.len | |
51 if hasattr(o, 'fileno'): | |
52 return os.fstat(o.fileno()).st_size | |
53 | |
54 | |
55 def get_netrc_auth(url): | |
56 """Returns the Requests tuple auth for a given url from netrc.""" | |
57 | |
58 try: | |
59 locations = (os.path.expanduser('~/{0}'.format(f)) for f in NETRC_FILES) | |
60 netrc_path = None | |
61 | |
62 for loc in locations: | |
63 if os.path.exists(loc) and not netrc_path: | |
64 netrc_path = loc | |
65 | |
66 # Abort early if there isn't one. | |
67 if netrc_path is None: | |
68 return netrc_path | |
69 | |
70 ri = urlparse(url) | |
71 | |
72 # Strip port numbers from netloc | |
73 host = ri.netloc.split(':')[0] | |
74 | |
75 try: | |
76 _netrc = netrc(netrc_path).authenticators(host) | |
77 if _netrc: | |
78 # Return with login / password | |
79 login_i = (0 if _netrc[0] else 1) | |
80 return (_netrc[login_i], _netrc[2]) | |
81 except (NetrcParseError, IOError): | |
82 # If there was a parsing error or a permissions issue reading the fi
le, | |
83 # we'll just skip netrc auth | |
84 pass | |
85 | |
86 # AppEngine hackiness. | |
87 except (ImportError, AttributeError): | |
88 pass | |
89 | |
90 | |
91 def guess_filename(obj): | |
92 """Tries to guess the filename of the given object.""" | |
93 name = getattr(obj, 'name', None) | |
94 if name and name[0] != '<' and name[-1] != '>': | |
95 return os.path.basename(name) | |
96 | |
97 | |
98 def from_key_val_list(value): | |
99 """Take an object and test to see if it can be represented as a | |
100 dictionary. Unless it can not be represented as such, return an | |
101 OrderedDict, e.g., | |
102 | |
103 :: | |
104 | |
105 >>> from_key_val_list([('key', 'val')]) | |
106 OrderedDict([('key', 'val')]) | |
107 >>> from_key_val_list('string') | |
108 ValueError: need more than 1 value to unpack | |
109 >>> from_key_val_list({'key': 'val'}) | |
110 OrderedDict([('key', 'val')]) | |
111 """ | |
112 if value is None: | |
113 return None | |
114 | |
115 if isinstance(value, (str, bytes, bool, int)): | |
116 raise ValueError('cannot encode objects that are not 2-tuples') | |
117 | |
118 return OrderedDict(value) | |
119 | |
120 | |
121 def to_key_val_list(value): | |
122 """Take an object and test to see if it can be represented as a | |
123 dictionary. If it can be, return a list of tuples, e.g., | |
124 | |
125 :: | |
126 | |
127 >>> to_key_val_list([('key', 'val')]) | |
128 [('key', 'val')] | |
129 >>> to_key_val_list({'key': 'val'}) | |
130 [('key', 'val')] | |
131 >>> to_key_val_list('string') | |
132 ValueError: cannot encode objects that are not 2-tuples. | |
133 """ | |
134 if value is None: | |
135 return None | |
136 | |
137 if isinstance(value, (str, bytes, bool, int)): | |
138 raise ValueError('cannot encode objects that are not 2-tuples') | |
139 | |
140 if isinstance(value, collections.Mapping): | |
141 value = value.items() | |
142 | |
143 return list(value) | |
144 | |
145 | |
146 # From mitsuhiko/werkzeug (used with permission). | |
147 def parse_list_header(value): | |
148 """Parse lists as described by RFC 2068 Section 2. | |
149 | |
150 In particular, parse comma-separated lists where the elements of | |
151 the list may include quoted-strings. A quoted-string could | |
152 contain a comma. A non-quoted string could have quotes in the | |
153 middle. Quotes are removed automatically after parsing. | |
154 | |
155 It basically works like :func:`parse_set_header` just that items | |
156 may appear multiple times and case sensitivity is preserved. | |
157 | |
158 The return value is a standard :class:`list`: | |
159 | |
160 >>> parse_list_header('token, "quoted value"') | |
161 ['token', 'quoted value'] | |
162 | |
163 To create a header from the :class:`list` again, use the | |
164 :func:`dump_header` function. | |
165 | |
166 :param value: a string with a list header. | |
167 :return: :class:`list` | |
168 """ | |
169 result = [] | |
170 for item in _parse_list_header(value): | |
171 if item[:1] == item[-1:] == '"': | |
172 item = unquote_header_value(item[1:-1]) | |
173 result.append(item) | |
174 return result | |
175 | |
176 | |
177 # From mitsuhiko/werkzeug (used with permission). | |
178 def parse_dict_header(value): | |
179 """Parse lists of key, value pairs as described by RFC 2068 Section 2 and | |
180 convert them into a python dict: | |
181 | |
182 >>> d = parse_dict_header('foo="is a fish", bar="as well"') | |
183 >>> type(d) is dict | |
184 True | |
185 >>> sorted(d.items()) | |
186 [('bar', 'as well'), ('foo', 'is a fish')] | |
187 | |
188 If there is no value for a key it will be `None`: | |
189 | |
190 >>> parse_dict_header('key_without_value') | |
191 {'key_without_value': None} | |
192 | |
193 To create a header from the :class:`dict` again, use the | |
194 :func:`dump_header` function. | |
195 | |
196 :param value: a string with a dict header. | |
197 :return: :class:`dict` | |
198 """ | |
199 result = {} | |
200 for item in _parse_list_header(value): | |
201 if '=' not in item: | |
202 result[item] = None | |
203 continue | |
204 name, value = item.split('=', 1) | |
205 if value[:1] == value[-1:] == '"': | |
206 value = unquote_header_value(value[1:-1]) | |
207 result[name] = value | |
208 return result | |
209 | |
210 | |
211 # From mitsuhiko/werkzeug (used with permission). | |
212 def unquote_header_value(value, is_filename=False): | |
213 r"""Unquotes a header value. (Reversal of :func:`quote_header_value`). | |
214 This does not use the real unquoting but what browsers are actually | |
215 using for quoting. | |
216 | |
217 :param value: the header value to unquote. | |
218 """ | |
219 if value and value[0] == value[-1] == '"': | |
220 # this is not the real unquoting, but fixing this so that the | |
221 # RFC is met will result in bugs with internet explorer and | |
222 # probably some other browsers as well. IE for example is | |
223 # uploading files with "C:\foo\bar.txt" as filename | |
224 value = value[1:-1] | |
225 | |
226 # if this is a filename and the starting characters look like | |
227 # a UNC path, then just return the value without quotes. Using the | |
228 # replace sequence below on a UNC path has the effect of turning | |
229 # the leading double slash into a single slash and then | |
230 # _fix_ie_filename() doesn't work correctly. See #458. | |
231 if not is_filename or value[:2] != '\\\\': | |
232 return value.replace('\\\\', '\\').replace('\\"', '"') | |
233 return value | |
234 | |
235 | |
236 def dict_from_cookiejar(cj): | |
237 """Returns a key/value dictionary from a CookieJar. | |
238 | |
239 :param cj: CookieJar object to extract cookies from. | |
240 """ | |
241 | |
242 cookie_dict = {} | |
243 | |
244 for cookie in cj: | |
245 cookie_dict[cookie.name] = cookie.value | |
246 | |
247 return cookie_dict | |
248 | |
249 | |
250 def add_dict_to_cookiejar(cj, cookie_dict): | |
251 """Returns a CookieJar from a key/value dictionary. | |
252 | |
253 :param cj: CookieJar to insert cookies into. | |
254 :param cookie_dict: Dict of key/values to insert into CookieJar. | |
255 """ | |
256 | |
257 cj2 = cookiejar_from_dict(cookie_dict) | |
258 cj.update(cj2) | |
259 return cj | |
260 | |
261 | |
262 def get_encodings_from_content(content): | |
263 """Returns encodings from given content string. | |
264 | |
265 :param content: bytestring to extract encodings from. | |
266 """ | |
267 | |
268 charset_re = re.compile(r'<meta.*?charset=["\']*(.+?)["\'>]', flags=re.I) | |
269 pragma_re = re.compile(r'<meta.*?content=["\']*;?charset=(.+?)["\'>]', flags
=re.I) | |
270 xml_re = re.compile(r'^<\?xml.*?encoding=["\']*(.+?)["\'>]') | |
271 | |
272 return (charset_re.findall(content) + | |
273 pragma_re.findall(content) + | |
274 xml_re.findall(content)) | |
275 | |
276 | |
277 def get_encoding_from_headers(headers): | |
278 """Returns encodings from given HTTP Header Dict. | |
279 | |
280 :param headers: dictionary to extract encoding from. | |
281 """ | |
282 | |
283 content_type = headers.get('content-type') | |
284 | |
285 if not content_type: | |
286 return None | |
287 | |
288 content_type, params = cgi.parse_header(content_type) | |
289 | |
290 if 'charset' in params: | |
291 return params['charset'].strip("'\"") | |
292 | |
293 if 'text' in content_type: | |
294 return 'ISO-8859-1' | |
295 | |
296 | |
297 def stream_decode_response_unicode(iterator, r): | |
298 """Stream decodes a iterator.""" | |
299 | |
300 if r.encoding is None: | |
301 for item in iterator: | |
302 yield item | |
303 return | |
304 | |
305 decoder = codecs.getincrementaldecoder(r.encoding)(errors='replace') | |
306 for chunk in iterator: | |
307 rv = decoder.decode(chunk) | |
308 if rv: | |
309 yield rv | |
310 rv = decoder.decode(b'', final=True) | |
311 if rv: | |
312 yield rv | |
313 | |
314 | |
315 def iter_slices(string, slice_length): | |
316 """Iterate over slices of a string.""" | |
317 pos = 0 | |
318 while pos < len(string): | |
319 yield string[pos:pos + slice_length] | |
320 pos += slice_length | |
321 | |
322 | |
323 def get_unicode_from_response(r): | |
324 """Returns the requested content back in unicode. | |
325 | |
326 :param r: Response object to get unicode content from. | |
327 | |
328 Tried: | |
329 | |
330 1. charset from content-type | |
331 | |
332 2. every encodings from ``<meta ... charset=XXX>`` | |
333 | |
334 3. fall back and replace all unicode characters | |
335 | |
336 """ | |
337 | |
338 tried_encodings = [] | |
339 | |
340 # Try charset from content-type | |
341 encoding = get_encoding_from_headers(r.headers) | |
342 | |
343 if encoding: | |
344 try: | |
345 return str(r.content, encoding) | |
346 except UnicodeError: | |
347 tried_encodings.append(encoding) | |
348 | |
349 # Fall back: | |
350 try: | |
351 return str(r.content, encoding, errors='replace') | |
352 except TypeError: | |
353 return r.content | |
354 | |
355 | |
356 # The unreserved URI characters (RFC 3986) | |
357 UNRESERVED_SET = frozenset( | |
358 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" | |
359 + "0123456789-._~") | |
360 | |
361 | |
362 def unquote_unreserved(uri): | |
363 """Un-escape any percent-escape sequences in a URI that are unreserved | |
364 characters. This leaves all reserved, illegal and non-ASCII bytes encoded. | |
365 """ | |
366 parts = uri.split('%') | |
367 for i in range(1, len(parts)): | |
368 h = parts[i][0:2] | |
369 if len(h) == 2 and h.isalnum(): | |
370 try: | |
371 c = chr(int(h, 16)) | |
372 except ValueError: | |
373 raise InvalidURL("Invalid percent-escape sequence: '%s'" % h) | |
374 | |
375 if c in UNRESERVED_SET: | |
376 parts[i] = c + parts[i][2:] | |
377 else: | |
378 parts[i] = '%' + parts[i] | |
379 else: | |
380 parts[i] = '%' + parts[i] | |
381 return ''.join(parts) | |
382 | |
383 | |
384 def requote_uri(uri): | |
385 """Re-quote the given URI. | |
386 | |
387 This function passes the given URI through an unquote/quote cycle to | |
388 ensure that it is fully and consistently quoted. | |
389 """ | |
390 # Unquote only the unreserved characters | |
391 # Then quote only illegal characters (do not quote reserved, unreserved, | |
392 # or '%') | |
393 return quote(unquote_unreserved(uri), safe="!#$%&'()*+,/:;=?@[]~") | |
394 | |
395 | |
396 def get_environ_proxies(url): | |
397 """Return a dict of environment proxies.""" | |
398 | |
399 get_proxy = lambda k: os.environ.get(k) or os.environ.get(k.upper()) | |
400 | |
401 # First check whether no_proxy is defined. If it is, check that the URL | |
402 # we're getting isn't in the no_proxy list. | |
403 no_proxy = get_proxy('no_proxy') | |
404 netloc = urlparse(url).netloc | |
405 | |
406 if no_proxy: | |
407 # We need to check whether we match here. We need to see if we match | |
408 # the end of the netloc, both with and without the port. | |
409 no_proxy = no_proxy.replace(' ', '').split(',') | |
410 | |
411 for host in no_proxy: | |
412 if netloc.endswith(host) or netloc.split(':')[0].endswith(host): | |
413 # The URL does match something in no_proxy, so we don't want | |
414 # to apply the proxies on this URL. | |
415 return {} | |
416 | |
417 # If the system proxy settings indicate that this URL should be bypassed, | |
418 # don't proxy. | |
419 if proxy_bypass(netloc): | |
420 return {} | |
421 | |
422 # If we get here, we either didn't have no_proxy set or we're not going | |
423 # anywhere that no_proxy applies to, and the system settings don't require | |
424 # bypassing the proxy for the current URL. | |
425 return getproxies() | |
426 | |
427 | |
428 def default_user_agent(): | |
429 """Return a string representing the default user agent.""" | |
430 _implementation = platform.python_implementation() | |
431 | |
432 if _implementation == 'CPython': | |
433 _implementation_version = platform.python_version() | |
434 elif _implementation == 'PyPy': | |
435 _implementation_version = '%s.%s.%s' % (sys.pypy_version_info.major, | |
436 sys.pypy_version_info.minor, | |
437 sys.pypy_version_info.micro) | |
438 if sys.pypy_version_info.releaselevel != 'final': | |
439 _implementation_version = ''.join([_implementation_version, sys.pypy
_version_info.releaselevel]) | |
440 elif _implementation == 'Jython': | |
441 _implementation_version = platform.python_version() # Complete Guess | |
442 elif _implementation == 'IronPython': | |
443 _implementation_version = platform.python_version() # Complete Guess | |
444 else: | |
445 _implementation_version = 'Unknown' | |
446 | |
447 try: | |
448 p_system = platform.system() | |
449 p_release = platform.release() | |
450 except IOError: | |
451 p_system = 'Unknown' | |
452 p_release = 'Unknown' | |
453 | |
454 return " ".join(['python-requests/%s' % __version__, | |
455 '%s/%s' % (_implementation, _implementation_version), | |
456 '%s/%s' % (p_system, p_release)]) | |
457 | |
458 | |
459 def default_headers(): | |
460 return CaseInsensitiveDict({ | |
461 'User-Agent': default_user_agent(), | |
462 'Accept-Encoding': ', '.join(('gzip', 'deflate', 'compress')), | |
463 'Accept': '*/*' | |
464 }) | |
465 | |
466 | |
467 def parse_header_links(value): | |
468 """Return a dict of parsed link headers proxies. | |
469 | |
470 i.e. Link: <http:/.../front.jpeg>; rel=front; type="image/jpeg",<http://.../
back.jpeg>; rel=back;type="image/jpeg" | |
471 | |
472 """ | |
473 | |
474 links = [] | |
475 | |
476 replace_chars = " '\"" | |
477 | |
478 for val in value.split(","): | |
479 try: | |
480 url, params = val.split(";", 1) | |
481 except ValueError: | |
482 url, params = val, '' | |
483 | |
484 link = {} | |
485 | |
486 link["url"] = url.strip("<> '\"") | |
487 | |
488 for param in params.split(";"): | |
489 try: | |
490 key, value = param.split("=") | |
491 except ValueError: | |
492 break | |
493 | |
494 link[key.strip(replace_chars)] = value.strip(replace_chars) | |
495 | |
496 links.append(link) | |
497 | |
498 return links | |
499 | |
500 | |
501 # Null bytes; no need to recreate these on each call to guess_json_utf | |
502 _null = '\x00'.encode('ascii') # encoding to ASCII for Python 3 | |
503 _null2 = _null * 2 | |
504 _null3 = _null * 3 | |
505 | |
506 | |
507 def guess_json_utf(data): | |
508 # JSON always starts with two ASCII characters, so detection is as | |
509 # easy as counting the nulls and from their location and count | |
510 # determine the encoding. Also detect a BOM, if present. | |
511 sample = data[:4] | |
512 if sample in (codecs.BOM_UTF32_LE, codecs.BOM32_BE): | |
513 return 'utf-32' # BOM included | |
514 if sample[:3] == codecs.BOM_UTF8: | |
515 return 'utf-8-sig' # BOM included, MS style (discouraged) | |
516 if sample[:2] in (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE): | |
517 return 'utf-16' # BOM included | |
518 nullcount = sample.count(_null) | |
519 if nullcount == 0: | |
520 return 'utf-8' | |
521 if nullcount == 2: | |
522 if sample[::2] == _null2: # 1st and 3rd are null | |
523 return 'utf-16-be' | |
524 if sample[1::2] == _null2: # 2nd and 4th are null | |
525 return 'utf-16-le' | |
526 # Did not detect 2 valid UTF-16 ascii-range characters | |
527 if nullcount == 3: | |
528 if sample[:3] == _null3: | |
529 return 'utf-32-be' | |
530 if sample[1:] == _null3: | |
531 return 'utf-32-le' | |
532 # Did not detect a valid UTF-32 ascii-range character | |
533 return None | |
534 | |
535 | |
536 def except_on_missing_scheme(url): | |
537 """Given a URL, raise a MissingSchema exception if the scheme is missing. | |
538 """ | |
539 scheme, netloc, path, params, query, fragment = urlparse(url) | |
540 | |
541 if not scheme: | |
542 raise MissingSchema('Proxy URLs must have explicit schemes.') | |
543 | |
544 | |
545 def get_auth_from_url(url): | |
546 """Given a url with authentication components, extract them into a tuple of | |
547 username,password.""" | |
548 if url: | |
549 parsed = urlparse(url) | |
550 return (parsed.username, parsed.password) | |
551 else: | |
552 return ('', '') | |
553 | |
554 | |
555 def to_native_string(string, encoding='ascii'): | |
556 """ | |
557 Given a string object, regardless of type, returns a representation of that | |
558 string in the native string type, encoding and decoding where necessary. | |
559 This assumes ASCII unless told otherwise. | |
560 """ | |
561 out = None | |
562 | |
563 if isinstance(string, builtin_str): | |
564 out = string | |
565 else: | |
566 if is_py2: | |
567 out = string.encode(encoding) | |
568 else: | |
569 out = string.decode(encoding) | |
570 | |
571 return out | |
OLD | NEW |