Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(194)

Side by Side Diff: third_party/google-endpoints/future/backports/http/cookiejar.py

Issue 2666783008: Add google-endpoints to third_party/. (Closed)
Patch Set: Created 3 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 r"""HTTP cookie handling for web clients.
2
3 This is a backport of the Py3.3 ``http.cookiejar`` module for
4 python-future.
5
6 This module has (now fairly distant) origins in Gisle Aas' Perl module
7 HTTP::Cookies, from the libwww-perl library.
8
9 Docstrings, comments and debug strings in this code refer to the
10 attributes of the HTTP cookie system as cookie-attributes, to distinguish
11 them clearly from Python attributes.
12
13 Class diagram (note that BSDDBCookieJar and the MSIE* classes are not
14 distributed with the Python standard library, but are available from
15 http://wwwsearch.sf.net/):
16
17 CookieJar____
18 / \ \
19 FileCookieJar \ \
20 / | \ \ \
21 MozillaCookieJar | LWPCookieJar \ \
22 | | \
23 | ---MSIEBase | \
24 | / | | \
25 | / MSIEDBCookieJar BSDDBCookieJar
26 |/
27 MSIECookieJar
28
29 """
30
31 from __future__ import unicode_literals
32 from __future__ import print_function
33 from __future__ import division
34 from __future__ import absolute_import
35 from future.builtins import filter, int, map, open, str
36 from future.utils import as_native_str
37
38 __all__ = ['Cookie', 'CookieJar', 'CookiePolicy', 'DefaultCookiePolicy',
39 'FileCookieJar', 'LWPCookieJar', 'LoadError', 'MozillaCookieJar']
40
41 import copy
42 import datetime
43 import re
44 re.ASCII = 0
45 import time
46 from future.backports.urllib.parse import urlparse, urlsplit, quote
47 from future.backports.http.client import HTTP_PORT
48 try:
49 import threading as _threading
50 except ImportError:
51 import dummy_threading as _threading
52 from calendar import timegm
53
54 debug = False # set to True to enable debugging via the logging module
55 logger = None
56
57 def _debug(*args):
58 if not debug:
59 return
60 global logger
61 if not logger:
62 import logging
63 logger = logging.getLogger("http.cookiejar")
64 return logger.debug(*args)
65
66
67 DEFAULT_HTTP_PORT = str(HTTP_PORT)
68 MISSING_FILENAME_TEXT = ("a filename was not supplied (nor was the CookieJar "
69 "instance initialised with one)")
70
71 def _warn_unhandled_exception():
72 # There are a few catch-all except: statements in this module, for
73 # catching input that's bad in unexpected ways. Warn if any
74 # exceptions are caught there.
75 import io, warnings, traceback
76 f = io.StringIO()
77 traceback.print_exc(None, f)
78 msg = f.getvalue()
79 warnings.warn("http.cookiejar bug!\n%s" % msg, stacklevel=2)
80
81
82 # Date/time conversion
83 # -----------------------------------------------------------------------------
84
85 EPOCH_YEAR = 1970
86 def _timegm(tt):
87 year, month, mday, hour, min, sec = tt[:6]
88 if ((year >= EPOCH_YEAR) and (1 <= month <= 12) and (1 <= mday <= 31) and
89 (0 <= hour <= 24) and (0 <= min <= 59) and (0 <= sec <= 61)):
90 return timegm(tt)
91 else:
92 return None
93
94 DAYS = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
95 MONTHS = ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
96 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
97 MONTHS_LOWER = []
98 for month in MONTHS: MONTHS_LOWER.append(month.lower())
99
100 def time2isoz(t=None):
101 """Return a string representing time in seconds since epoch, t.
102
103 If the function is called without an argument, it will use the current
104 time.
105
106 The format of the returned string is like "YYYY-MM-DD hh:mm:ssZ",
107 representing Universal Time (UTC, aka GMT). An example of this format is:
108
109 1994-11-24 08:49:37Z
110
111 """
112 if t is None:
113 dt = datetime.datetime.utcnow()
114 else:
115 dt = datetime.datetime.utcfromtimestamp(t)
116 return "%04d-%02d-%02d %02d:%02d:%02dZ" % (
117 dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second)
118
119 def time2netscape(t=None):
120 """Return a string representing time in seconds since epoch, t.
121
122 If the function is called without an argument, it will use the current
123 time.
124
125 The format of the returned string is like this:
126
127 Wed, DD-Mon-YYYY HH:MM:SS GMT
128
129 """
130 if t is None:
131 dt = datetime.datetime.utcnow()
132 else:
133 dt = datetime.datetime.utcfromtimestamp(t)
134 return "%s %02d-%s-%04d %02d:%02d:%02d GMT" % (
135 DAYS[dt.weekday()], dt.day, MONTHS[dt.month-1],
136 dt.year, dt.hour, dt.minute, dt.second)
137
138
139 UTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None}
140
141 TIMEZONE_RE = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$", re.ASCII)
142 def offset_from_tz_string(tz):
143 offset = None
144 if tz in UTC_ZONES:
145 offset = 0
146 else:
147 m = TIMEZONE_RE.search(tz)
148 if m:
149 offset = 3600 * int(m.group(2))
150 if m.group(3):
151 offset = offset + 60 * int(m.group(3))
152 if m.group(1) == '-':
153 offset = -offset
154 return offset
155
156 def _str2time(day, mon, yr, hr, min, sec, tz):
157 # translate month name to number
158 # month numbers start with 1 (January)
159 try:
160 mon = MONTHS_LOWER.index(mon.lower())+1
161 except ValueError:
162 # maybe it's already a number
163 try:
164 imon = int(mon)
165 except ValueError:
166 return None
167 if 1 <= imon <= 12:
168 mon = imon
169 else:
170 return None
171
172 # make sure clock elements are defined
173 if hr is None: hr = 0
174 if min is None: min = 0
175 if sec is None: sec = 0
176
177 yr = int(yr)
178 day = int(day)
179 hr = int(hr)
180 min = int(min)
181 sec = int(sec)
182
183 if yr < 1000:
184 # find "obvious" year
185 cur_yr = time.localtime(time.time())[0]
186 m = cur_yr % 100
187 tmp = yr
188 yr = yr + cur_yr - m
189 m = m - tmp
190 if abs(m) > 50:
191 if m > 0: yr = yr + 100
192 else: yr = yr - 100
193
194 # convert UTC time tuple to seconds since epoch (not timezone-adjusted)
195 t = _timegm((yr, mon, day, hr, min, sec, tz))
196
197 if t is not None:
198 # adjust time using timezone string, to get absolute time since epoch
199 if tz is None:
200 tz = "UTC"
201 tz = tz.upper()
202 offset = offset_from_tz_string(tz)
203 if offset is None:
204 return None
205 t = t - offset
206
207 return t
208
209 STRICT_DATE_RE = re.compile(
210 r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) "
211 "(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$", re.ASCII)
212 WEEKDAY_RE = re.compile(
213 r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I | re.ASCII)
214 LOOSE_HTTP_DATE_RE = re.compile(
215 r"""^
216 (\d\d?) # day
217 (?:\s+|[-\/])
218 (\w+) # month
219 (?:\s+|[-\/])
220 (\d+) # year
221 (?:
222 (?:\s+|:) # separator before clock
223 (\d\d?):(\d\d) # hour:min
224 (?::(\d\d))? # optional seconds
225 )? # optional clock
226 \s*
227 ([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+)? # timezone
228 \s*
229 (?:\(\w+\))? # ASCII representation of timezone in parens.
230 \s*$""", re.X | re.ASCII)
231 def http2time(text):
232 """Returns time in seconds since epoch of time represented by a string.
233
234 Return value is an integer.
235
236 None is returned if the format of str is unrecognized, the time is outside
237 the representable range, or the timezone string is not recognized. If the
238 string contains no timezone, UTC is assumed.
239
240 The timezone in the string may be numerical (like "-0800" or "+0100") or a
241 string timezone (like "UTC", "GMT", "BST" or "EST"). Currently, only the
242 timezone strings equivalent to UTC (zero offset) are known to the function.
243
244 The function loosely parses the following formats:
245
246 Wed, 09 Feb 1994 22:23:32 GMT -- HTTP format
247 Tuesday, 08-Feb-94 14:15:29 GMT -- old rfc850 HTTP format
248 Tuesday, 08-Feb-1994 14:15:29 GMT -- broken rfc850 HTTP format
249 09 Feb 1994 22:23:32 GMT -- HTTP format (no weekday)
250 08-Feb-94 14:15:29 GMT -- rfc850 format (no weekday)
251 08-Feb-1994 14:15:29 GMT -- broken rfc850 format (no weekday)
252
253 The parser ignores leading and trailing whitespace. The time may be
254 absent.
255
256 If the year is given with only 2 digits, the function will select the
257 century that makes the year closest to the current date.
258
259 """
260 # fast exit for strictly conforming string
261 m = STRICT_DATE_RE.search(text)
262 if m:
263 g = m.groups()
264 mon = MONTHS_LOWER.index(g[1].lower()) + 1
265 tt = (int(g[2]), mon, int(g[0]),
266 int(g[3]), int(g[4]), float(g[5]))
267 return _timegm(tt)
268
269 # No, we need some messy parsing...
270
271 # clean up
272 text = text.lstrip()
273 text = WEEKDAY_RE.sub("", text, 1) # Useless weekday
274
275 # tz is time zone specifier string
276 day, mon, yr, hr, min, sec, tz = [None]*7
277
278 # loose regexp parse
279 m = LOOSE_HTTP_DATE_RE.search(text)
280 if m is not None:
281 day, mon, yr, hr, min, sec, tz = m.groups()
282 else:
283 return None # bad format
284
285 return _str2time(day, mon, yr, hr, min, sec, tz)
286
287 ISO_DATE_RE = re.compile(
288 """^
289 (\d{4}) # year
290 [-\/]?
291 (\d\d?) # numerical month
292 [-\/]?
293 (\d\d?) # day
294 (?:
295 (?:\s+|[-:Tt]) # separator before clock
296 (\d\d?):?(\d\d) # hour:min
297 (?::?(\d\d(?:\.\d*)?))? # optional seconds (and fractional)
298 )? # optional clock
299 \s*
300 ([-+]?\d\d?:?(:?\d\d)?
301 |Z|z)? # timezone (Z is "zero meridian", i.e. GMT)
302 \s*$""", re.X | re. ASCII)
303 def iso2time(text):
304 """
305 As for http2time, but parses the ISO 8601 formats:
306
307 1994-02-03 14:15:29 -0100 -- ISO 8601 format
308 1994-02-03 14:15:29 -- zone is optional
309 1994-02-03 -- only date
310 1994-02-03T14:15:29 -- Use T as separator
311 19940203T141529Z -- ISO 8601 compact format
312 19940203 -- only date
313
314 """
315 # clean up
316 text = text.lstrip()
317
318 # tz is time zone specifier string
319 day, mon, yr, hr, min, sec, tz = [None]*7
320
321 # loose regexp parse
322 m = ISO_DATE_RE.search(text)
323 if m is not None:
324 # XXX there's an extra bit of the timezone I'm ignoring here: is
325 # this the right thing to do?
326 yr, mon, day, hr, min, sec, tz, _ = m.groups()
327 else:
328 return None # bad format
329
330 return _str2time(day, mon, yr, hr, min, sec, tz)
331
332
333 # Header parsing
334 # -----------------------------------------------------------------------------
335
336 def unmatched(match):
337 """Return unmatched part of re.Match object."""
338 start, end = match.span(0)
339 return match.string[:start]+match.string[end:]
340
341 HEADER_TOKEN_RE = re.compile(r"^\s*([^=\s;,]+)")
342 HEADER_QUOTED_VALUE_RE = re.compile(r"^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"")
343 HEADER_VALUE_RE = re.compile(r"^\s*=\s*([^\s;,]*)")
344 HEADER_ESCAPE_RE = re.compile(r"\\(.)")
345 def split_header_words(header_values):
346 r"""Parse header values into a list of lists containing key,value pairs.
347
348 The function knows how to deal with ",", ";" and "=" as well as quoted
349 values after "=". A list of space separated tokens are parsed as if they
350 were separated by ";".
351
352 If the header_values passed as argument contains multiple values, then they
353 are treated as if they were a single value separated by comma ",".
354
355 This means that this function is useful for parsing header fields that
356 follow this syntax (BNF as from the HTTP/1.1 specification, but we relax
357 the requirement for tokens).
358
359 headers = #header
360 header = (token | parameter) *( [";"] (token | parameter))
361
362 token = 1*<any CHAR except CTLs or separators>
363 separators = "(" | ")" | "<" | ">" | "@"
364 | "," | ";" | ":" | "\" | <">
365 | "/" | "[" | "]" | "?" | "="
366 | "{" | "}" | SP | HT
367
368 quoted-string = ( <"> *(qdtext | quoted-pair ) <"> )
369 qdtext = <any TEXT except <">>
370 quoted-pair = "\" CHAR
371
372 parameter = attribute "=" value
373 attribute = token
374 value = token | quoted-string
375
376 Each header is represented by a list of key/value pairs. The value for a
377 simple token (not part of a parameter) is None. Syntactically incorrect
378 headers will not necessarily be parsed as you would want.
379
380 This is easier to describe with some examples:
381
382 >>> split_header_words(['foo="bar"; port="80,81"; discard, bar=baz'])
383 [[('foo', 'bar'), ('port', '80,81'), ('discard', None)], [('bar', 'baz')]]
384 >>> split_header_words(['text/html; charset="iso-8859-1"'])
385 [[('text/html', None), ('charset', 'iso-8859-1')]]
386 >>> split_header_words([r'Basic realm="\"foo\bar\""'])
387 [[('Basic', None), ('realm', '"foobar"')]]
388
389 """
390 assert not isinstance(header_values, str)
391 result = []
392 for text in header_values:
393 orig_text = text
394 pairs = []
395 while text:
396 m = HEADER_TOKEN_RE.search(text)
397 if m:
398 text = unmatched(m)
399 name = m.group(1)
400 m = HEADER_QUOTED_VALUE_RE.search(text)
401 if m: # quoted value
402 text = unmatched(m)
403 value = m.group(1)
404 value = HEADER_ESCAPE_RE.sub(r"\1", value)
405 else:
406 m = HEADER_VALUE_RE.search(text)
407 if m: # unquoted value
408 text = unmatched(m)
409 value = m.group(1)
410 value = value.rstrip()
411 else:
412 # no value, a lone token
413 value = None
414 pairs.append((name, value))
415 elif text.lstrip().startswith(","):
416 # concatenated headers, as per RFC 2616 section 4.2
417 text = text.lstrip()[1:]
418 if pairs: result.append(pairs)
419 pairs = []
420 else:
421 # skip junk
422 non_junk, nr_junk_chars = re.subn("^[=\s;]*", "", text)
423 assert nr_junk_chars > 0, (
424 "split_header_words bug: '%s', '%s', %s" %
425 (orig_text, text, pairs))
426 text = non_junk
427 if pairs: result.append(pairs)
428 return result
429
430 HEADER_JOIN_ESCAPE_RE = re.compile(r"([\"\\])")
431 def join_header_words(lists):
432 """Do the inverse (almost) of the conversion done by split_header_words.
433
434 Takes a list of lists of (key, value) pairs and produces a single header
435 value. Attribute values are quoted if needed.
436
437 >>> join_header_words([[("text/plain", None), ("charset", "iso-8859/1")]])
438 'text/plain; charset="iso-8859/1"'
439 >>> join_header_words([[("text/plain", None)], [("charset", "iso-8859/1")]])
440 'text/plain, charset="iso-8859/1"'
441
442 """
443 headers = []
444 for pairs in lists:
445 attr = []
446 for k, v in pairs:
447 if v is not None:
448 if not re.search(r"^\w+$", v):
449 v = HEADER_JOIN_ESCAPE_RE.sub(r"\\\1", v) # escape " and \
450 v = '"%s"' % v
451 k = "%s=%s" % (k, v)
452 attr.append(k)
453 if attr: headers.append("; ".join(attr))
454 return ", ".join(headers)
455
456 def strip_quotes(text):
457 if text.startswith('"'):
458 text = text[1:]
459 if text.endswith('"'):
460 text = text[:-1]
461 return text
462
463 def parse_ns_headers(ns_headers):
464 """Ad-hoc parser for Netscape protocol cookie-attributes.
465
466 The old Netscape cookie format for Set-Cookie can for instance contain
467 an unquoted "," in the expires field, so we have to use this ad-hoc
468 parser instead of split_header_words.
469
470 XXX This may not make the best possible effort to parse all the crap
471 that Netscape Cookie headers contain. Ronald Tschalar's HTTPClient
472 parser is probably better, so could do worse than following that if
473 this ever gives any trouble.
474
475 Currently, this is also used for parsing RFC 2109 cookies.
476
477 """
478 known_attrs = ("expires", "domain", "path", "secure",
479 # RFC 2109 attrs (may turn up in Netscape cookies, too)
480 "version", "port", "max-age")
481
482 result = []
483 for ns_header in ns_headers:
484 pairs = []
485 version_set = False
486 for ii, param in enumerate(re.split(r";\s*", ns_header)):
487 param = param.rstrip()
488 if param == "": continue
489 if "=" not in param:
490 k, v = param, None
491 else:
492 k, v = re.split(r"\s*=\s*", param, 1)
493 k = k.lstrip()
494 if ii != 0:
495 lc = k.lower()
496 if lc in known_attrs:
497 k = lc
498 if k == "version":
499 # This is an RFC 2109 cookie.
500 v = strip_quotes(v)
501 version_set = True
502 if k == "expires":
503 # convert expires date to seconds since epoch
504 v = http2time(strip_quotes(v)) # None if invalid
505 pairs.append((k, v))
506
507 if pairs:
508 if not version_set:
509 pairs.append(("version", "0"))
510 result.append(pairs)
511
512 return result
513
514
515 IPV4_RE = re.compile(r"\.\d+$", re.ASCII)
516 def is_HDN(text):
517 """Return True if text is a host domain name."""
518 # XXX
519 # This may well be wrong. Which RFC is HDN defined in, if any (for
520 # the purposes of RFC 2965)?
521 # For the current implementation, what about IPv6? Remember to look
522 # at other uses of IPV4_RE also, if change this.
523 if IPV4_RE.search(text):
524 return False
525 if text == "":
526 return False
527 if text[0] == "." or text[-1] == ".":
528 return False
529 return True
530
531 def domain_match(A, B):
532 """Return True if domain A domain-matches domain B, according to RFC 2965.
533
534 A and B may be host domain names or IP addresses.
535
536 RFC 2965, section 1:
537
538 Host names can be specified either as an IP address or a HDN string.
539 Sometimes we compare one host name with another. (Such comparisons SHALL
540 be case-insensitive.) Host A's name domain-matches host B's if
541
542 * their host name strings string-compare equal; or
543
544 * A is a HDN string and has the form NB, where N is a non-empty
545 name string, B has the form .B', and B' is a HDN string. (So,
546 x.y.com domain-matches .Y.com but not Y.com.)
547
548 Note that domain-match is not a commutative operation: a.b.c.com
549 domain-matches .c.com, but not the reverse.
550
551 """
552 # Note that, if A or B are IP addresses, the only relevant part of the
553 # definition of the domain-match algorithm is the direct string-compare.
554 A = A.lower()
555 B = B.lower()
556 if A == B:
557 return True
558 if not is_HDN(A):
559 return False
560 i = A.rfind(B)
561 if i == -1 or i == 0:
562 # A does not have form NB, or N is the empty string
563 return False
564 if not B.startswith("."):
565 return False
566 if not is_HDN(B[1:]):
567 return False
568 return True
569
570 def liberal_is_HDN(text):
571 """Return True if text is a sort-of-like a host domain name.
572
573 For accepting/blocking domains.
574
575 """
576 if IPV4_RE.search(text):
577 return False
578 return True
579
580 def user_domain_match(A, B):
581 """For blocking/accepting domains.
582
583 A and B may be host domain names or IP addresses.
584
585 """
586 A = A.lower()
587 B = B.lower()
588 if not (liberal_is_HDN(A) and liberal_is_HDN(B)):
589 if A == B:
590 # equal IP addresses
591 return True
592 return False
593 initial_dot = B.startswith(".")
594 if initial_dot and A.endswith(B):
595 return True
596 if not initial_dot and A == B:
597 return True
598 return False
599
600 cut_port_re = re.compile(r":\d+$", re.ASCII)
601 def request_host(request):
602 """Return request-host, as defined by RFC 2965.
603
604 Variation from RFC: returned value is lowercased, for convenient
605 comparison.
606
607 """
608 url = request.get_full_url()
609 host = urlparse(url)[1]
610 if host == "":
611 host = request.get_header("Host", "")
612
613 # remove port, if present
614 host = cut_port_re.sub("", host, 1)
615 return host.lower()
616
617 def eff_request_host(request):
618 """Return a tuple (request-host, effective request-host name).
619
620 As defined by RFC 2965, except both are lowercased.
621
622 """
623 erhn = req_host = request_host(request)
624 if req_host.find(".") == -1 and not IPV4_RE.search(req_host):
625 erhn = req_host + ".local"
626 return req_host, erhn
627
628 def request_path(request):
629 """Path component of request-URI, as defined by RFC 2965."""
630 url = request.get_full_url()
631 parts = urlsplit(url)
632 path = escape_path(parts.path)
633 if not path.startswith("/"):
634 # fix bad RFC 2396 absoluteURI
635 path = "/" + path
636 return path
637
638 def request_port(request):
639 host = request.host
640 i = host.find(':')
641 if i >= 0:
642 port = host[i+1:]
643 try:
644 int(port)
645 except ValueError:
646 _debug("nonnumeric port: '%s'", port)
647 return None
648 else:
649 port = DEFAULT_HTTP_PORT
650 return port
651
652 # Characters in addition to A-Z, a-z, 0-9, '_', '.', and '-' that don't
653 # need to be escaped to form a valid HTTP URL (RFCs 2396 and 1738).
654 HTTP_PATH_SAFE = "%/;:@&=+$,!~*'()"
655 ESCAPED_CHAR_RE = re.compile(r"%([0-9a-fA-F][0-9a-fA-F])")
656 def uppercase_escaped_char(match):
657 return "%%%s" % match.group(1).upper()
658 def escape_path(path):
659 """Escape any invalid characters in HTTP URL, and uppercase all escapes."""
660 # There's no knowing what character encoding was used to create URLs
661 # containing %-escapes, but since we have to pick one to escape invalid
662 # path characters, we pick UTF-8, as recommended in the HTML 4.0
663 # specification:
664 # http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1
665 # And here, kind of: draft-fielding-uri-rfc2396bis-03
666 # (And in draft IRI specification: draft-duerst-iri-05)
667 # (And here, for new URI schemes: RFC 2718)
668 path = quote(path, HTTP_PATH_SAFE)
669 path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path)
670 return path
671
672 def reach(h):
673 """Return reach of host h, as defined by RFC 2965, section 1.
674
675 The reach R of a host name H is defined as follows:
676
677 * If
678
679 - H is the host domain name of a host; and,
680
681 - H has the form A.B; and
682
683 - A has no embedded (that is, interior) dots; and
684
685 - B has at least one embedded dot, or B is the string "local".
686 then the reach of H is .B.
687
688 * Otherwise, the reach of H is H.
689
690 >>> reach("www.acme.com")
691 '.acme.com'
692 >>> reach("acme.com")
693 'acme.com'
694 >>> reach("acme.local")
695 '.local'
696
697 """
698 i = h.find(".")
699 if i >= 0:
700 #a = h[:i] # this line is only here to show what a is
701 b = h[i+1:]
702 i = b.find(".")
703 if is_HDN(h) and (i >= 0 or b == "local"):
704 return "."+b
705 return h
706
707 def is_third_party(request):
708 """
709
710 RFC 2965, section 3.3.6:
711
712 An unverifiable transaction is to a third-party host if its request-
713 host U does not domain-match the reach R of the request-host O in the
714 origin transaction.
715
716 """
717 req_host = request_host(request)
718 if not domain_match(req_host, reach(request.get_origin_req_host())):
719 return True
720 else:
721 return False
722
723
724 class Cookie(object):
725 """HTTP Cookie.
726
727 This class represents both Netscape and RFC 2965 cookies.
728
729 This is deliberately a very simple class. It just holds attributes. It's
730 possible to construct Cookie instances that don't comply with the cookie
731 standards. CookieJar.make_cookies is the factory function for Cookie
732 objects -- it deals with cookie parsing, supplying defaults, and
733 normalising to the representation used in this class. CookiePolicy is
734 responsible for checking them to see whether they should be accepted from
735 and returned to the server.
736
737 Note that the port may be present in the headers, but unspecified ("Port"
738 rather than"Port=80", for example); if this is the case, port is None.
739
740 """
741
742 def __init__(self, version, name, value,
743 port, port_specified,
744 domain, domain_specified, domain_initial_dot,
745 path, path_specified,
746 secure,
747 expires,
748 discard,
749 comment,
750 comment_url,
751 rest,
752 rfc2109=False,
753 ):
754
755 if version is not None: version = int(version)
756 if expires is not None: expires = int(expires)
757 if port is None and port_specified is True:
758 raise ValueError("if port is None, port_specified must be false")
759
760 self.version = version
761 self.name = name
762 self.value = value
763 self.port = port
764 self.port_specified = port_specified
765 # normalise case, as per RFC 2965 section 3.3.3
766 self.domain = domain.lower()
767 self.domain_specified = domain_specified
768 # Sigh. We need to know whether the domain given in the
769 # cookie-attribute had an initial dot, in order to follow RFC 2965
770 # (as clarified in draft errata). Needed for the returned $Domain
771 # value.
772 self.domain_initial_dot = domain_initial_dot
773 self.path = path
774 self.path_specified = path_specified
775 self.secure = secure
776 self.expires = expires
777 self.discard = discard
778 self.comment = comment
779 self.comment_url = comment_url
780 self.rfc2109 = rfc2109
781
782 self._rest = copy.copy(rest)
783
784 def has_nonstandard_attr(self, name):
785 return name in self._rest
786 def get_nonstandard_attr(self, name, default=None):
787 return self._rest.get(name, default)
788 def set_nonstandard_attr(self, name, value):
789 self._rest[name] = value
790
791 def is_expired(self, now=None):
792 if now is None: now = time.time()
793 if (self.expires is not None) and (self.expires <= now):
794 return True
795 return False
796
797 def __str__(self):
798 if self.port is None: p = ""
799 else: p = ":"+self.port
800 limit = self.domain + p + self.path
801 if self.value is not None:
802 namevalue = "%s=%s" % (self.name, self.value)
803 else:
804 namevalue = self.name
805 return "<Cookie %s for %s>" % (namevalue, limit)
806
807 @as_native_str()
808 def __repr__(self):
809 args = []
810 for name in ("version", "name", "value",
811 "port", "port_specified",
812 "domain", "domain_specified", "domain_initial_dot",
813 "path", "path_specified",
814 "secure", "expires", "discard", "comment", "comment_url",
815 ):
816 attr = getattr(self, name)
817 ### Python-Future:
818 # Avoid u'...' prefixes for unicode strings:
819 if isinstance(attr, str):
820 attr = str(attr)
821 ###
822 args.append(str("%s=%s") % (name, repr(attr)))
823 args.append("rest=%s" % repr(self._rest))
824 args.append("rfc2109=%s" % repr(self.rfc2109))
825 return "Cookie(%s)" % ", ".join(args)
826
827
828 class CookiePolicy(object):
829 """Defines which cookies get accepted from and returned to server.
830
831 May also modify cookies, though this is probably a bad idea.
832
833 The subclass DefaultCookiePolicy defines the standard rules for Netscape
834 and RFC 2965 cookies -- override that if you want a customised policy.
835
836 """
837 def set_ok(self, cookie, request):
838 """Return true if (and only if) cookie should be accepted from server.
839
840 Currently, pre-expired cookies never get this far -- the CookieJar
841 class deletes such cookies itself.
842
843 """
844 raise NotImplementedError()
845
846 def return_ok(self, cookie, request):
847 """Return true if (and only if) cookie should be returned to server."""
848 raise NotImplementedError()
849
850 def domain_return_ok(self, domain, request):
851 """Return false if cookies should not be returned, given cookie domain.
852 """
853 return True
854
855 def path_return_ok(self, path, request):
856 """Return false if cookies should not be returned, given cookie path.
857 """
858 return True
859
860
861 class DefaultCookiePolicy(CookiePolicy):
862 """Implements the standard rules for accepting and returning cookies."""
863
864 DomainStrictNoDots = 1
865 DomainStrictNonDomain = 2
866 DomainRFC2965Match = 4
867
868 DomainLiberal = 0
869 DomainStrict = DomainStrictNoDots|DomainStrictNonDomain
870
871 def __init__(self,
872 blocked_domains=None, allowed_domains=None,
873 netscape=True, rfc2965=False,
874 rfc2109_as_netscape=None,
875 hide_cookie2=False,
876 strict_domain=False,
877 strict_rfc2965_unverifiable=True,
878 strict_ns_unverifiable=False,
879 strict_ns_domain=DomainLiberal,
880 strict_ns_set_initial_dollar=False,
881 strict_ns_set_path=False,
882 ):
883 """Constructor arguments should be passed as keyword arguments only."""
884 self.netscape = netscape
885 self.rfc2965 = rfc2965
886 self.rfc2109_as_netscape = rfc2109_as_netscape
887 self.hide_cookie2 = hide_cookie2
888 self.strict_domain = strict_domain
889 self.strict_rfc2965_unverifiable = strict_rfc2965_unverifiable
890 self.strict_ns_unverifiable = strict_ns_unverifiable
891 self.strict_ns_domain = strict_ns_domain
892 self.strict_ns_set_initial_dollar = strict_ns_set_initial_dollar
893 self.strict_ns_set_path = strict_ns_set_path
894
895 if blocked_domains is not None:
896 self._blocked_domains = tuple(blocked_domains)
897 else:
898 self._blocked_domains = ()
899
900 if allowed_domains is not None:
901 allowed_domains = tuple(allowed_domains)
902 self._allowed_domains = allowed_domains
903
904 def blocked_domains(self):
905 """Return the sequence of blocked domains (as a tuple)."""
906 return self._blocked_domains
907 def set_blocked_domains(self, blocked_domains):
908 """Set the sequence of blocked domains."""
909 self._blocked_domains = tuple(blocked_domains)
910
911 def is_blocked(self, domain):
912 for blocked_domain in self._blocked_domains:
913 if user_domain_match(domain, blocked_domain):
914 return True
915 return False
916
917 def allowed_domains(self):
918 """Return None, or the sequence of allowed domains (as a tuple)."""
919 return self._allowed_domains
920 def set_allowed_domains(self, allowed_domains):
921 """Set the sequence of allowed domains, or None."""
922 if allowed_domains is not None:
923 allowed_domains = tuple(allowed_domains)
924 self._allowed_domains = allowed_domains
925
926 def is_not_allowed(self, domain):
927 if self._allowed_domains is None:
928 return False
929 for allowed_domain in self._allowed_domains:
930 if user_domain_match(domain, allowed_domain):
931 return False
932 return True
933
934 def set_ok(self, cookie, request):
935 """
936 If you override .set_ok(), be sure to call this method. If it returns
937 false, so should your subclass (assuming your subclass wants to be more
938 strict about which cookies to accept).
939
940 """
941 _debug(" - checking cookie %s=%s", cookie.name, cookie.value)
942
943 assert cookie.name is not None
944
945 for n in "version", "verifiability", "name", "path", "domain", "port":
946 fn_name = "set_ok_"+n
947 fn = getattr(self, fn_name)
948 if not fn(cookie, request):
949 return False
950
951 return True
952
953 def set_ok_version(self, cookie, request):
954 if cookie.version is None:
955 # Version is always set to 0 by parse_ns_headers if it's a Netscape
956 # cookie, so this must be an invalid RFC 2965 cookie.
957 _debug(" Set-Cookie2 without version attribute (%s=%s)",
958 cookie.name, cookie.value)
959 return False
960 if cookie.version > 0 and not self.rfc2965:
961 _debug(" RFC 2965 cookies are switched off")
962 return False
963 elif cookie.version == 0 and not self.netscape:
964 _debug(" Netscape cookies are switched off")
965 return False
966 return True
967
968 def set_ok_verifiability(self, cookie, request):
969 if request.unverifiable and is_third_party(request):
970 if cookie.version > 0 and self.strict_rfc2965_unverifiable:
971 _debug(" third-party RFC 2965 cookie during "
972 "unverifiable transaction")
973 return False
974 elif cookie.version == 0 and self.strict_ns_unverifiable:
975 _debug(" third-party Netscape cookie during "
976 "unverifiable transaction")
977 return False
978 return True
979
980 def set_ok_name(self, cookie, request):
981 # Try and stop servers setting V0 cookies designed to hack other
982 # servers that know both V0 and V1 protocols.
983 if (cookie.version == 0 and self.strict_ns_set_initial_dollar and
984 cookie.name.startswith("$")):
985 _debug(" illegal name (starts with '$'): '%s'", cookie.name)
986 return False
987 return True
988
989 def set_ok_path(self, cookie, request):
990 if cookie.path_specified:
991 req_path = request_path(request)
992 if ((cookie.version > 0 or
993 (cookie.version == 0 and self.strict_ns_set_path)) and
994 not req_path.startswith(cookie.path)):
995 _debug(" path attribute %s is not a prefix of request "
996 "path %s", cookie.path, req_path)
997 return False
998 return True
999
1000 def set_ok_domain(self, cookie, request):
1001 if self.is_blocked(cookie.domain):
1002 _debug(" domain %s is in user block-list", cookie.domain)
1003 return False
1004 if self.is_not_allowed(cookie.domain):
1005 _debug(" domain %s is not in user allow-list", cookie.domain)
1006 return False
1007 if cookie.domain_specified:
1008 req_host, erhn = eff_request_host(request)
1009 domain = cookie.domain
1010 if self.strict_domain and (domain.count(".") >= 2):
1011 # XXX This should probably be compared with the Konqueror
1012 # (kcookiejar.cpp) and Mozilla implementations, but it's a
1013 # losing battle.
1014 i = domain.rfind(".")
1015 j = domain.rfind(".", 0, i)
1016 if j == 0: # domain like .foo.bar
1017 tld = domain[i+1:]
1018 sld = domain[j+1:i]
1019 if sld.lower() in ("co", "ac", "com", "edu", "org", "net",
1020 "gov", "mil", "int", "aero", "biz", "cat", "coop",
1021 "info", "jobs", "mobi", "museum", "name", "pro",
1022 "travel", "eu") and len(tld) == 2:
1023 # domain like .co.uk
1024 _debug(" country-code second level domain %s", domain)
1025 return False
1026 if domain.startswith("."):
1027 undotted_domain = domain[1:]
1028 else:
1029 undotted_domain = domain
1030 embedded_dots = (undotted_domain.find(".") >= 0)
1031 if not embedded_dots and domain != ".local":
1032 _debug(" non-local domain %s contains no embedded dot",
1033 domain)
1034 return False
1035 if cookie.version == 0:
1036 if (not erhn.endswith(domain) and
1037 (not erhn.startswith(".") and
1038 not ("."+erhn).endswith(domain))):
1039 _debug(" effective request-host %s (even with added "
1040 "initial dot) does not end with %s",
1041 erhn, domain)
1042 return False
1043 if (cookie.version > 0 or
1044 (self.strict_ns_domain & self.DomainRFC2965Match)):
1045 if not domain_match(erhn, domain):
1046 _debug(" effective request-host %s does not domain-match "
1047 "%s", erhn, domain)
1048 return False
1049 if (cookie.version > 0 or
1050 (self.strict_ns_domain & self.DomainStrictNoDots)):
1051 host_prefix = req_host[:-len(domain)]
1052 if (host_prefix.find(".") >= 0 and
1053 not IPV4_RE.search(req_host)):
1054 _debug(" host prefix %s for domain %s contains a dot",
1055 host_prefix, domain)
1056 return False
1057 return True
1058
1059 def set_ok_port(self, cookie, request):
1060 if cookie.port_specified:
1061 req_port = request_port(request)
1062 if req_port is None:
1063 req_port = "80"
1064 else:
1065 req_port = str(req_port)
1066 for p in cookie.port.split(","):
1067 try:
1068 int(p)
1069 except ValueError:
1070 _debug(" bad port %s (not numeric)", p)
1071 return False
1072 if p == req_port:
1073 break
1074 else:
1075 _debug(" request port (%s) not found in %s",
1076 req_port, cookie.port)
1077 return False
1078 return True
1079
1080 def return_ok(self, cookie, request):
1081 """
1082 If you override .return_ok(), be sure to call this method. If it
1083 returns false, so should your subclass (assuming your subclass wants to
1084 be more strict about which cookies to return).
1085
1086 """
1087 # Path has already been checked by .path_return_ok(), and domain
1088 # blocking done by .domain_return_ok().
1089 _debug(" - checking cookie %s=%s", cookie.name, cookie.value)
1090
1091 for n in "version", "verifiability", "secure", "expires", "port", "domai n":
1092 fn_name = "return_ok_"+n
1093 fn = getattr(self, fn_name)
1094 if not fn(cookie, request):
1095 return False
1096 return True
1097
1098 def return_ok_version(self, cookie, request):
1099 if cookie.version > 0 and not self.rfc2965:
1100 _debug(" RFC 2965 cookies are switched off")
1101 return False
1102 elif cookie.version == 0 and not self.netscape:
1103 _debug(" Netscape cookies are switched off")
1104 return False
1105 return True
1106
1107 def return_ok_verifiability(self, cookie, request):
1108 if request.unverifiable and is_third_party(request):
1109 if cookie.version > 0 and self.strict_rfc2965_unverifiable:
1110 _debug(" third-party RFC 2965 cookie during unverifiable "
1111 "transaction")
1112 return False
1113 elif cookie.version == 0 and self.strict_ns_unverifiable:
1114 _debug(" third-party Netscape cookie during unverifiable "
1115 "transaction")
1116 return False
1117 return True
1118
1119 def return_ok_secure(self, cookie, request):
1120 if cookie.secure and request.type != "https":
1121 _debug(" secure cookie with non-secure request")
1122 return False
1123 return True
1124
1125 def return_ok_expires(self, cookie, request):
1126 if cookie.is_expired(self._now):
1127 _debug(" cookie expired")
1128 return False
1129 return True
1130
1131 def return_ok_port(self, cookie, request):
1132 if cookie.port:
1133 req_port = request_port(request)
1134 if req_port is None:
1135 req_port = "80"
1136 for p in cookie.port.split(","):
1137 if p == req_port:
1138 break
1139 else:
1140 _debug(" request port %s does not match cookie port %s",
1141 req_port, cookie.port)
1142 return False
1143 return True
1144
1145 def return_ok_domain(self, cookie, request):
1146 req_host, erhn = eff_request_host(request)
1147 domain = cookie.domain
1148
1149 # strict check of non-domain cookies: Mozilla does this, MSIE5 doesn't
1150 if (cookie.version == 0 and
1151 (self.strict_ns_domain & self.DomainStrictNonDomain) and
1152 not cookie.domain_specified and domain != erhn):
1153 _debug(" cookie with unspecified domain does not string-compare "
1154 "equal to request domain")
1155 return False
1156
1157 if cookie.version > 0 and not domain_match(erhn, domain):
1158 _debug(" effective request-host name %s does not domain-match "
1159 "RFC 2965 cookie domain %s", erhn, domain)
1160 return False
1161 if cookie.version == 0 and not ("."+erhn).endswith(domain):
1162 _debug(" request-host %s does not match Netscape cookie domain "
1163 "%s", req_host, domain)
1164 return False
1165 return True
1166
1167 def domain_return_ok(self, domain, request):
1168 # Liberal check of. This is here as an optimization to avoid
1169 # having to load lots of MSIE cookie files unless necessary.
1170 req_host, erhn = eff_request_host(request)
1171 if not req_host.startswith("."):
1172 req_host = "."+req_host
1173 if not erhn.startswith("."):
1174 erhn = "."+erhn
1175 if not (req_host.endswith(domain) or erhn.endswith(domain)):
1176 #_debug(" request domain %s does not match cookie domain %s",
1177 # req_host, domain)
1178 return False
1179
1180 if self.is_blocked(domain):
1181 _debug(" domain %s is in user block-list", domain)
1182 return False
1183 if self.is_not_allowed(domain):
1184 _debug(" domain %s is not in user allow-list", domain)
1185 return False
1186
1187 return True
1188
1189 def path_return_ok(self, path, request):
1190 _debug("- checking cookie path=%s", path)
1191 req_path = request_path(request)
1192 if not req_path.startswith(path):
1193 _debug(" %s does not path-match %s", req_path, path)
1194 return False
1195 return True
1196
1197
1198 def vals_sorted_by_key(adict):
1199 keys = sorted(adict.keys())
1200 return map(adict.get, keys)
1201
1202 def deepvalues(mapping):
1203 """Iterates over nested mapping, depth-first, in sorted order by key."""
1204 values = vals_sorted_by_key(mapping)
1205 for obj in values:
1206 mapping = False
1207 try:
1208 obj.items
1209 except AttributeError:
1210 pass
1211 else:
1212 mapping = True
1213 for subobj in deepvalues(obj):
1214 yield subobj
1215 if not mapping:
1216 yield obj
1217
1218
1219 # Used as second parameter to dict.get() method, to distinguish absent
1220 # dict key from one with a None value.
1221 class Absent(object): pass
1222
1223 class CookieJar(object):
1224 """Collection of HTTP cookies.
1225
1226 You may not need to know about this class: try
1227 urllib.request.build_opener(HTTPCookieProcessor).open(url).
1228 """
1229
1230 non_word_re = re.compile(r"\W")
1231 quote_re = re.compile(r"([\"\\])")
1232 strict_domain_re = re.compile(r"\.?[^.]*")
1233 domain_re = re.compile(r"[^.]*")
1234 dots_re = re.compile(r"^\.+")
1235
1236 magic_re = re.compile(r"^\#LWP-Cookies-(\d+\.\d+)", re.ASCII)
1237
1238 def __init__(self, policy=None):
1239 if policy is None:
1240 policy = DefaultCookiePolicy()
1241 self._policy = policy
1242
1243 self._cookies_lock = _threading.RLock()
1244 self._cookies = {}
1245
1246 def set_policy(self, policy):
1247 self._policy = policy
1248
1249 def _cookies_for_domain(self, domain, request):
1250 cookies = []
1251 if not self._policy.domain_return_ok(domain, request):
1252 return []
1253 _debug("Checking %s for cookies to return", domain)
1254 cookies_by_path = self._cookies[domain]
1255 for path in cookies_by_path.keys():
1256 if not self._policy.path_return_ok(path, request):
1257 continue
1258 cookies_by_name = cookies_by_path[path]
1259 for cookie in cookies_by_name.values():
1260 if not self._policy.return_ok(cookie, request):
1261 _debug(" not returning cookie")
1262 continue
1263 _debug(" it's a match")
1264 cookies.append(cookie)
1265 return cookies
1266
1267 def _cookies_for_request(self, request):
1268 """Return a list of cookies to be returned to server."""
1269 cookies = []
1270 for domain in self._cookies.keys():
1271 cookies.extend(self._cookies_for_domain(domain, request))
1272 return cookies
1273
1274 def _cookie_attrs(self, cookies):
1275 """Return a list of cookie-attributes to be returned to server.
1276
1277 like ['foo="bar"; $Path="/"', ...]
1278
1279 The $Version attribute is also added when appropriate (currently only
1280 once per request).
1281
1282 """
1283 # add cookies in order of most specific (ie. longest) path first
1284 cookies.sort(key=lambda a: len(a.path), reverse=True)
1285
1286 version_set = False
1287
1288 attrs = []
1289 for cookie in cookies:
1290 # set version of Cookie header
1291 # XXX
1292 # What should it be if multiple matching Set-Cookie headers have
1293 # different versions themselves?
1294 # Answer: there is no answer; was supposed to be settled by
1295 # RFC 2965 errata, but that may never appear...
1296 version = cookie.version
1297 if not version_set:
1298 version_set = True
1299 if version > 0:
1300 attrs.append("$Version=%s" % version)
1301
1302 # quote cookie value if necessary
1303 # (not for Netscape protocol, which already has any quotes
1304 # intact, due to the poorly-specified Netscape Cookie: syntax)
1305 if ((cookie.value is not None) and
1306 self.non_word_re.search(cookie.value) and version > 0):
1307 value = self.quote_re.sub(r"\\\1", cookie.value)
1308 else:
1309 value = cookie.value
1310
1311 # add cookie-attributes to be returned in Cookie header
1312 if cookie.value is None:
1313 attrs.append(cookie.name)
1314 else:
1315 attrs.append("%s=%s" % (cookie.name, value))
1316 if version > 0:
1317 if cookie.path_specified:
1318 attrs.append('$Path="%s"' % cookie.path)
1319 if cookie.domain.startswith("."):
1320 domain = cookie.domain
1321 if (not cookie.domain_initial_dot and
1322 domain.startswith(".")):
1323 domain = domain[1:]
1324 attrs.append('$Domain="%s"' % domain)
1325 if cookie.port is not None:
1326 p = "$Port"
1327 if cookie.port_specified:
1328 p = p + ('="%s"' % cookie.port)
1329 attrs.append(p)
1330
1331 return attrs
1332
1333 def add_cookie_header(self, request):
1334 """Add correct Cookie: header to request (urllib.request.Request object) .
1335
1336 The Cookie2 header is also added unless policy.hide_cookie2 is true.
1337
1338 """
1339 _debug("add_cookie_header")
1340 self._cookies_lock.acquire()
1341 try:
1342
1343 self._policy._now = self._now = int(time.time())
1344
1345 cookies = self._cookies_for_request(request)
1346
1347 attrs = self._cookie_attrs(cookies)
1348 if attrs:
1349 if not request.has_header("Cookie"):
1350 request.add_unredirected_header(
1351 "Cookie", "; ".join(attrs))
1352
1353 # if necessary, advertise that we know RFC 2965
1354 if (self._policy.rfc2965 and not self._policy.hide_cookie2 and
1355 not request.has_header("Cookie2")):
1356 for cookie in cookies:
1357 if cookie.version != 1:
1358 request.add_unredirected_header("Cookie2", '$Version="1" ')
1359 break
1360
1361 finally:
1362 self._cookies_lock.release()
1363
1364 self.clear_expired_cookies()
1365
1366 def _normalized_cookie_tuples(self, attrs_set):
1367 """Return list of tuples containing normalised cookie information.
1368
1369 attrs_set is the list of lists of key,value pairs extracted from
1370 the Set-Cookie or Set-Cookie2 headers.
1371
1372 Tuples are name, value, standard, rest, where name and value are the
1373 cookie name and value, standard is a dictionary containing the standard
1374 cookie-attributes (discard, secure, version, expires or max-age,
1375 domain, path and port) and rest is a dictionary containing the rest of
1376 the cookie-attributes.
1377
1378 """
1379 cookie_tuples = []
1380
1381 boolean_attrs = "discard", "secure"
1382 value_attrs = ("version",
1383 "expires", "max-age",
1384 "domain", "path", "port",
1385 "comment", "commenturl")
1386
1387 for cookie_attrs in attrs_set:
1388 name, value = cookie_attrs[0]
1389
1390 # Build dictionary of standard cookie-attributes (standard) and
1391 # dictionary of other cookie-attributes (rest).
1392
1393 # Note: expiry time is normalised to seconds since epoch. V0
1394 # cookies should have the Expires cookie-attribute, and V1 cookies
1395 # should have Max-Age, but since V1 includes RFC 2109 cookies (and
1396 # since V0 cookies may be a mish-mash of Netscape and RFC 2109), we
1397 # accept either (but prefer Max-Age).
1398 max_age_set = False
1399
1400 bad_cookie = False
1401
1402 standard = {}
1403 rest = {}
1404 for k, v in cookie_attrs[1:]:
1405 lc = k.lower()
1406 # don't lose case distinction for unknown fields
1407 if lc in value_attrs or lc in boolean_attrs:
1408 k = lc
1409 if k in boolean_attrs and v is None:
1410 # boolean cookie-attribute is present, but has no value
1411 # (like "discard", rather than "port=80")
1412 v = True
1413 if k in standard:
1414 # only first value is significant
1415 continue
1416 if k == "domain":
1417 if v is None:
1418 _debug(" missing value for domain attribute")
1419 bad_cookie = True
1420 break
1421 # RFC 2965 section 3.3.3
1422 v = v.lower()
1423 if k == "expires":
1424 if max_age_set:
1425 # Prefer max-age to expires (like Mozilla)
1426 continue
1427 if v is None:
1428 _debug(" missing or invalid value for expires "
1429 "attribute: treating as session cookie")
1430 continue
1431 if k == "max-age":
1432 max_age_set = True
1433 try:
1434 v = int(v)
1435 except ValueError:
1436 _debug(" missing or invalid (non-numeric) value for "
1437 "max-age attribute")
1438 bad_cookie = True
1439 break
1440 # convert RFC 2965 Max-Age to seconds since epoch
1441 # XXX Strictly you're supposed to follow RFC 2616
1442 # age-calculation rules. Remember that zero Max-Age is a
1443 # is a request to discard (old and new) cookie, though.
1444 k = "expires"
1445 v = self._now + v
1446 if (k in value_attrs) or (k in boolean_attrs):
1447 if (v is None and
1448 k not in ("port", "comment", "commenturl")):
1449 _debug(" missing value for %s attribute" % k)
1450 bad_cookie = True
1451 break
1452 standard[k] = v
1453 else:
1454 rest[k] = v
1455
1456 if bad_cookie:
1457 continue
1458
1459 cookie_tuples.append((name, value, standard, rest))
1460
1461 return cookie_tuples
1462
1463 def _cookie_from_cookie_tuple(self, tup, request):
1464 # standard is dict of standard cookie-attributes, rest is dict of the
1465 # rest of them
1466 name, value, standard, rest = tup
1467
1468 domain = standard.get("domain", Absent)
1469 path = standard.get("path", Absent)
1470 port = standard.get("port", Absent)
1471 expires = standard.get("expires", Absent)
1472
1473 # set the easy defaults
1474 version = standard.get("version", None)
1475 if version is not None:
1476 try:
1477 version = int(version)
1478 except ValueError:
1479 return None # invalid version, ignore cookie
1480 secure = standard.get("secure", False)
1481 # (discard is also set if expires is Absent)
1482 discard = standard.get("discard", False)
1483 comment = standard.get("comment", None)
1484 comment_url = standard.get("commenturl", None)
1485
1486 # set default path
1487 if path is not Absent and path != "":
1488 path_specified = True
1489 path = escape_path(path)
1490 else:
1491 path_specified = False
1492 path = request_path(request)
1493 i = path.rfind("/")
1494 if i != -1:
1495 if version == 0:
1496 # Netscape spec parts company from reality here
1497 path = path[:i]
1498 else:
1499 path = path[:i+1]
1500 if len(path) == 0: path = "/"
1501
1502 # set default domain
1503 domain_specified = domain is not Absent
1504 # but first we have to remember whether it starts with a dot
1505 domain_initial_dot = False
1506 if domain_specified:
1507 domain_initial_dot = bool(domain.startswith("."))
1508 if domain is Absent:
1509 req_host, erhn = eff_request_host(request)
1510 domain = erhn
1511 elif not domain.startswith("."):
1512 domain = "."+domain
1513
1514 # set default port
1515 port_specified = False
1516 if port is not Absent:
1517 if port is None:
1518 # Port attr present, but has no value: default to request port.
1519 # Cookie should then only be sent back on that port.
1520 port = request_port(request)
1521 else:
1522 port_specified = True
1523 port = re.sub(r"\s+", "", port)
1524 else:
1525 # No port attr present. Cookie can be sent back on any port.
1526 port = None
1527
1528 # set default expires and discard
1529 if expires is Absent:
1530 expires = None
1531 discard = True
1532 elif expires <= self._now:
1533 # Expiry date in past is request to delete cookie. This can't be
1534 # in DefaultCookiePolicy, because can't delete cookies there.
1535 try:
1536 self.clear(domain, path, name)
1537 except KeyError:
1538 pass
1539 _debug("Expiring cookie, domain='%s', path='%s', name='%s'",
1540 domain, path, name)
1541 return None
1542
1543 return Cookie(version,
1544 name, value,
1545 port, port_specified,
1546 domain, domain_specified, domain_initial_dot,
1547 path, path_specified,
1548 secure,
1549 expires,
1550 discard,
1551 comment,
1552 comment_url,
1553 rest)
1554
1555 def _cookies_from_attrs_set(self, attrs_set, request):
1556 cookie_tuples = self._normalized_cookie_tuples(attrs_set)
1557
1558 cookies = []
1559 for tup in cookie_tuples:
1560 cookie = self._cookie_from_cookie_tuple(tup, request)
1561 if cookie: cookies.append(cookie)
1562 return cookies
1563
1564 def _process_rfc2109_cookies(self, cookies):
1565 rfc2109_as_ns = getattr(self._policy, 'rfc2109_as_netscape', None)
1566 if rfc2109_as_ns is None:
1567 rfc2109_as_ns = not self._policy.rfc2965
1568 for cookie in cookies:
1569 if cookie.version == 1:
1570 cookie.rfc2109 = True
1571 if rfc2109_as_ns:
1572 # treat 2109 cookies as Netscape cookies rather than
1573 # as RFC2965 cookies
1574 cookie.version = 0
1575
1576 def make_cookies(self, response, request):
1577 """Return sequence of Cookie objects extracted from response object."""
1578 # get cookie-attributes for RFC 2965 and Netscape protocols
1579 headers = response.info()
1580 rfc2965_hdrs = headers.get_all("Set-Cookie2", [])
1581 ns_hdrs = headers.get_all("Set-Cookie", [])
1582
1583 rfc2965 = self._policy.rfc2965
1584 netscape = self._policy.netscape
1585
1586 if ((not rfc2965_hdrs and not ns_hdrs) or
1587 (not ns_hdrs and not rfc2965) or
1588 (not rfc2965_hdrs and not netscape) or
1589 (not netscape and not rfc2965)):
1590 return [] # no relevant cookie headers: quick exit
1591
1592 try:
1593 cookies = self._cookies_from_attrs_set(
1594 split_header_words(rfc2965_hdrs), request)
1595 except Exception:
1596 _warn_unhandled_exception()
1597 cookies = []
1598
1599 if ns_hdrs and netscape:
1600 try:
1601 # RFC 2109 and Netscape cookies
1602 ns_cookies = self._cookies_from_attrs_set(
1603 parse_ns_headers(ns_hdrs), request)
1604 except Exception:
1605 _warn_unhandled_exception()
1606 ns_cookies = []
1607 self._process_rfc2109_cookies(ns_cookies)
1608
1609 # Look for Netscape cookies (from Set-Cookie headers) that match
1610 # corresponding RFC 2965 cookies (from Set-Cookie2 headers).
1611 # For each match, keep the RFC 2965 cookie and ignore the Netscape
1612 # cookie (RFC 2965 section 9.1). Actually, RFC 2109 cookies are
1613 # bundled in with the Netscape cookies for this purpose, which is
1614 # reasonable behaviour.
1615 if rfc2965:
1616 lookup = {}
1617 for cookie in cookies:
1618 lookup[(cookie.domain, cookie.path, cookie.name)] = None
1619
1620 def no_matching_rfc2965(ns_cookie, lookup=lookup):
1621 key = ns_cookie.domain, ns_cookie.path, ns_cookie.name
1622 return key not in lookup
1623 ns_cookies = filter(no_matching_rfc2965, ns_cookies)
1624
1625 if ns_cookies:
1626 cookies.extend(ns_cookies)
1627
1628 return cookies
1629
1630 def set_cookie_if_ok(self, cookie, request):
1631 """Set a cookie if policy says it's OK to do so."""
1632 self._cookies_lock.acquire()
1633 try:
1634 self._policy._now = self._now = int(time.time())
1635
1636 if self._policy.set_ok(cookie, request):
1637 self.set_cookie(cookie)
1638
1639
1640 finally:
1641 self._cookies_lock.release()
1642
1643 def set_cookie(self, cookie):
1644 """Set a cookie, without checking whether or not it should be set."""
1645 c = self._cookies
1646 self._cookies_lock.acquire()
1647 try:
1648 if cookie.domain not in c: c[cookie.domain] = {}
1649 c2 = c[cookie.domain]
1650 if cookie.path not in c2: c2[cookie.path] = {}
1651 c3 = c2[cookie.path]
1652 c3[cookie.name] = cookie
1653 finally:
1654 self._cookies_lock.release()
1655
1656 def extract_cookies(self, response, request):
1657 """Extract cookies from response, where allowable given the request."""
1658 _debug("extract_cookies: %s", response.info())
1659 self._cookies_lock.acquire()
1660 try:
1661 self._policy._now = self._now = int(time.time())
1662
1663 for cookie in self.make_cookies(response, request):
1664 if self._policy.set_ok(cookie, request):
1665 _debug(" setting cookie: %s", cookie)
1666 self.set_cookie(cookie)
1667 finally:
1668 self._cookies_lock.release()
1669
1670 def clear(self, domain=None, path=None, name=None):
1671 """Clear some cookies.
1672
1673 Invoking this method without arguments will clear all cookies. If
1674 given a single argument, only cookies belonging to that domain will be
1675 removed. If given two arguments, cookies belonging to the specified
1676 path within that domain are removed. If given three arguments, then
1677 the cookie with the specified name, path and domain is removed.
1678
1679 Raises KeyError if no matching cookie exists.
1680
1681 """
1682 if name is not None:
1683 if (domain is None) or (path is None):
1684 raise ValueError(
1685 "domain and path must be given to remove a cookie by name")
1686 del self._cookies[domain][path][name]
1687 elif path is not None:
1688 if domain is None:
1689 raise ValueError(
1690 "domain must be given to remove cookies by path")
1691 del self._cookies[domain][path]
1692 elif domain is not None:
1693 del self._cookies[domain]
1694 else:
1695 self._cookies = {}
1696
1697 def clear_session_cookies(self):
1698 """Discard all session cookies.
1699
1700 Note that the .save() method won't save session cookies anyway, unless
1701 you ask otherwise by passing a true ignore_discard argument.
1702
1703 """
1704 self._cookies_lock.acquire()
1705 try:
1706 for cookie in self:
1707 if cookie.discard:
1708 self.clear(cookie.domain, cookie.path, cookie.name)
1709 finally:
1710 self._cookies_lock.release()
1711
1712 def clear_expired_cookies(self):
1713 """Discard all expired cookies.
1714
1715 You probably don't need to call this method: expired cookies are never
1716 sent back to the server (provided you're using DefaultCookiePolicy),
1717 this method is called by CookieJar itself every so often, and the
1718 .save() method won't save expired cookies anyway (unless you ask
1719 otherwise by passing a true ignore_expires argument).
1720
1721 """
1722 self._cookies_lock.acquire()
1723 try:
1724 now = time.time()
1725 for cookie in self:
1726 if cookie.is_expired(now):
1727 self.clear(cookie.domain, cookie.path, cookie.name)
1728 finally:
1729 self._cookies_lock.release()
1730
1731 def __iter__(self):
1732 return deepvalues(self._cookies)
1733
1734 def __len__(self):
1735 """Return number of contained cookies."""
1736 i = 0
1737 for cookie in self: i = i + 1
1738 return i
1739
1740 @as_native_str()
1741 def __repr__(self):
1742 r = []
1743 for cookie in self: r.append(repr(cookie))
1744 return "<%s[%s]>" % (self.__class__, ", ".join(r))
1745
1746 def __str__(self):
1747 r = []
1748 for cookie in self: r.append(str(cookie))
1749 return "<%s[%s]>" % (self.__class__, ", ".join(r))
1750
1751
1752 # derives from IOError for backwards-compatibility with Python 2.4.0
1753 class LoadError(IOError): pass
1754
1755 class FileCookieJar(CookieJar):
1756 """CookieJar that can be loaded from and saved to a file."""
1757
1758 def __init__(self, filename=None, delayload=False, policy=None):
1759 """
1760 Cookies are NOT loaded from the named file until either the .load() or
1761 .revert() method is called.
1762
1763 """
1764 CookieJar.__init__(self, policy)
1765 if filename is not None:
1766 try:
1767 filename+""
1768 except:
1769 raise ValueError("filename must be string-like")
1770 self.filename = filename
1771 self.delayload = bool(delayload)
1772
1773 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
1774 """Save cookies to a file."""
1775 raise NotImplementedError()
1776
1777 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
1778 """Load cookies from a file."""
1779 if filename is None:
1780 if self.filename is not None: filename = self.filename
1781 else: raise ValueError(MISSING_FILENAME_TEXT)
1782
1783 f = open(filename)
1784 try:
1785 self._really_load(f, filename, ignore_discard, ignore_expires)
1786 finally:
1787 f.close()
1788
1789 def revert(self, filename=None,
1790 ignore_discard=False, ignore_expires=False):
1791 """Clear all cookies and reload cookies from a saved file.
1792
1793 Raises LoadError (or IOError) if reversion is not successful; the
1794 object's state will not be altered if this happens.
1795
1796 """
1797 if filename is None:
1798 if self.filename is not None: filename = self.filename
1799 else: raise ValueError(MISSING_FILENAME_TEXT)
1800
1801 self._cookies_lock.acquire()
1802 try:
1803
1804 old_state = copy.deepcopy(self._cookies)
1805 self._cookies = {}
1806 try:
1807 self.load(filename, ignore_discard, ignore_expires)
1808 except (LoadError, IOError):
1809 self._cookies = old_state
1810 raise
1811
1812 finally:
1813 self._cookies_lock.release()
1814
1815
1816 def lwp_cookie_str(cookie):
1817 """Return string representation of Cookie in an the LWP cookie file format.
1818
1819 Actually, the format is extended a bit -- see module docstring.
1820
1821 """
1822 h = [(cookie.name, cookie.value),
1823 ("path", cookie.path),
1824 ("domain", cookie.domain)]
1825 if cookie.port is not None: h.append(("port", cookie.port))
1826 if cookie.path_specified: h.append(("path_spec", None))
1827 if cookie.port_specified: h.append(("port_spec", None))
1828 if cookie.domain_initial_dot: h.append(("domain_dot", None))
1829 if cookie.secure: h.append(("secure", None))
1830 if cookie.expires: h.append(("expires",
1831 time2isoz(float(cookie.expires))))
1832 if cookie.discard: h.append(("discard", None))
1833 if cookie.comment: h.append(("comment", cookie.comment))
1834 if cookie.comment_url: h.append(("commenturl", cookie.comment_url))
1835
1836 keys = sorted(cookie._rest.keys())
1837 for k in keys:
1838 h.append((k, str(cookie._rest[k])))
1839
1840 h.append(("version", str(cookie.version)))
1841
1842 return join_header_words([h])
1843
1844 class LWPCookieJar(FileCookieJar):
1845 """
1846 The LWPCookieJar saves a sequence of "Set-Cookie3" lines.
1847 "Set-Cookie3" is the format used by the libwww-perl libary, not known
1848 to be compatible with any browser, but which is easy to read and
1849 doesn't lose information about RFC 2965 cookies.
1850
1851 Additional methods
1852
1853 as_lwp_str(ignore_discard=True, ignore_expired=True)
1854
1855 """
1856
1857 def as_lwp_str(self, ignore_discard=True, ignore_expires=True):
1858 """Return cookies as a string of "\\n"-separated "Set-Cookie3" headers.
1859
1860 ignore_discard and ignore_expires: see docstring for FileCookieJar.save
1861
1862 """
1863 now = time.time()
1864 r = []
1865 for cookie in self:
1866 if not ignore_discard and cookie.discard:
1867 continue
1868 if not ignore_expires and cookie.is_expired(now):
1869 continue
1870 r.append("Set-Cookie3: %s" % lwp_cookie_str(cookie))
1871 return "\n".join(r+[""])
1872
1873 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
1874 if filename is None:
1875 if self.filename is not None: filename = self.filename
1876 else: raise ValueError(MISSING_FILENAME_TEXT)
1877
1878 f = open(filename, "w")
1879 try:
1880 # There really isn't an LWP Cookies 2.0 format, but this indicates
1881 # that there is extra information in here (domain_dot and
1882 # port_spec) while still being compatible with libwww-perl, I hope.
1883 f.write("#LWP-Cookies-2.0\n")
1884 f.write(self.as_lwp_str(ignore_discard, ignore_expires))
1885 finally:
1886 f.close()
1887
1888 def _really_load(self, f, filename, ignore_discard, ignore_expires):
1889 magic = f.readline()
1890 if not self.magic_re.search(magic):
1891 msg = ("%r does not look like a Set-Cookie3 (LWP) format "
1892 "file" % filename)
1893 raise LoadError(msg)
1894
1895 now = time.time()
1896
1897 header = "Set-Cookie3:"
1898 boolean_attrs = ("port_spec", "path_spec", "domain_dot",
1899 "secure", "discard")
1900 value_attrs = ("version",
1901 "port", "path", "domain",
1902 "expires",
1903 "comment", "commenturl")
1904
1905 try:
1906 while 1:
1907 line = f.readline()
1908 if line == "": break
1909 if not line.startswith(header):
1910 continue
1911 line = line[len(header):].strip()
1912
1913 for data in split_header_words([line]):
1914 name, value = data[0]
1915 standard = {}
1916 rest = {}
1917 for k in boolean_attrs:
1918 standard[k] = False
1919 for k, v in data[1:]:
1920 if k is not None:
1921 lc = k.lower()
1922 else:
1923 lc = None
1924 # don't lose case distinction for unknown fields
1925 if (lc in value_attrs) or (lc in boolean_attrs):
1926 k = lc
1927 if k in boolean_attrs:
1928 if v is None: v = True
1929 standard[k] = v
1930 elif k in value_attrs:
1931 standard[k] = v
1932 else:
1933 rest[k] = v
1934
1935 h = standard.get
1936 expires = h("expires")
1937 discard = h("discard")
1938 if expires is not None:
1939 expires = iso2time(expires)
1940 if expires is None:
1941 discard = True
1942 domain = h("domain")
1943 domain_specified = domain.startswith(".")
1944 c = Cookie(h("version"), name, value,
1945 h("port"), h("port_spec"),
1946 domain, domain_specified, h("domain_dot"),
1947 h("path"), h("path_spec"),
1948 h("secure"),
1949 expires,
1950 discard,
1951 h("comment"),
1952 h("commenturl"),
1953 rest)
1954 if not ignore_discard and c.discard:
1955 continue
1956 if not ignore_expires and c.is_expired(now):
1957 continue
1958 self.set_cookie(c)
1959
1960 except IOError:
1961 raise
1962 except Exception:
1963 _warn_unhandled_exception()
1964 raise LoadError("invalid Set-Cookie3 format file %r: %r" %
1965 (filename, line))
1966
1967
1968 class MozillaCookieJar(FileCookieJar):
1969 """
1970
1971 WARNING: you may want to backup your browser's cookies file if you use
1972 this class to save cookies. I *think* it works, but there have been
1973 bugs in the past!
1974
1975 This class differs from CookieJar only in the format it uses to save and
1976 load cookies to and from a file. This class uses the Mozilla/Netscape
1977 `cookies.txt' format. lynx uses this file format, too.
1978
1979 Don't expect cookies saved while the browser is running to be noticed by
1980 the browser (in fact, Mozilla on unix will overwrite your saved cookies if
1981 you change them on disk while it's running; on Windows, you probably can't
1982 save at all while the browser is running).
1983
1984 Note that the Mozilla/Netscape format will downgrade RFC2965 cookies to
1985 Netscape cookies on saving.
1986
1987 In particular, the cookie version and port number information is lost,
1988 together with information about whether or not Path, Port and Discard were
1989 specified by the Set-Cookie2 (or Set-Cookie) header, and whether or not the
1990 domain as set in the HTTP header started with a dot (yes, I'm aware some
1991 domains in Netscape files start with a dot and some don't -- trust me, you
1992 really don't want to know any more about this).
1993
1994 Note that though Mozilla and Netscape use the same format, they use
1995 slightly different headers. The class saves cookies using the Netscape
1996 header by default (Mozilla can cope with that).
1997
1998 """
1999 magic_re = re.compile("#( Netscape)? HTTP Cookie File")
2000 header = """\
2001 # Netscape HTTP Cookie File
2002 # http://www.netscape.com/newsref/std/cookie_spec.html
2003 # This is a generated file! Do not edit.
2004
2005 """
2006
2007 def _really_load(self, f, filename, ignore_discard, ignore_expires):
2008 now = time.time()
2009
2010 magic = f.readline()
2011 if not self.magic_re.search(magic):
2012 f.close()
2013 raise LoadError(
2014 "%r does not look like a Netscape format cookies file" %
2015 filename)
2016
2017 try:
2018 while 1:
2019 line = f.readline()
2020 if line == "": break
2021
2022 # last field may be absent, so keep any trailing tab
2023 if line.endswith("\n"): line = line[:-1]
2024
2025 # skip comments and blank lines XXX what is $ for?
2026 if (line.strip().startswith(("#", "$")) or
2027 line.strip() == ""):
2028 continue
2029
2030 domain, domain_specified, path, secure, expires, name, value = \
2031 line.split("\t")
2032 secure = (secure == "TRUE")
2033 domain_specified = (domain_specified == "TRUE")
2034 if name == "":
2035 # cookies.txt regards 'Set-Cookie: foo' as a cookie
2036 # with no name, whereas http.cookiejar regards it as a
2037 # cookie with no value.
2038 name = value
2039 value = None
2040
2041 initial_dot = domain.startswith(".")
2042 assert domain_specified == initial_dot
2043
2044 discard = False
2045 if expires == "":
2046 expires = None
2047 discard = True
2048
2049 # assume path_specified is false
2050 c = Cookie(0, name, value,
2051 None, False,
2052 domain, domain_specified, initial_dot,
2053 path, False,
2054 secure,
2055 expires,
2056 discard,
2057 None,
2058 None,
2059 {})
2060 if not ignore_discard and c.discard:
2061 continue
2062 if not ignore_expires and c.is_expired(now):
2063 continue
2064 self.set_cookie(c)
2065
2066 except IOError:
2067 raise
2068 except Exception:
2069 _warn_unhandled_exception()
2070 raise LoadError("invalid Netscape format cookies file %r: %r" %
2071 (filename, line))
2072
2073 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
2074 if filename is None:
2075 if self.filename is not None: filename = self.filename
2076 else: raise ValueError(MISSING_FILENAME_TEXT)
2077
2078 f = open(filename, "w")
2079 try:
2080 f.write(self.header)
2081 now = time.time()
2082 for cookie in self:
2083 if not ignore_discard and cookie.discard:
2084 continue
2085 if not ignore_expires and cookie.is_expired(now):
2086 continue
2087 if cookie.secure: secure = "TRUE"
2088 else: secure = "FALSE"
2089 if cookie.domain.startswith("."): initial_dot = "TRUE"
2090 else: initial_dot = "FALSE"
2091 if cookie.expires is not None:
2092 expires = str(cookie.expires)
2093 else:
2094 expires = ""
2095 if cookie.value is None:
2096 # cookies.txt regards 'Set-Cookie: foo' as a cookie
2097 # with no name, whereas http.cookiejar regards it as a
2098 # cookie with no value.
2099 name = ""
2100 value = cookie.name
2101 else:
2102 name = cookie.name
2103 value = cookie.value
2104 f.write(
2105 "\t".join([cookie.domain, initial_dot, cookie.path,
2106 secure, expires, name, value])+
2107 "\n")
2108 finally:
2109 f.close()
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698