Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(3)

Side by Side Diff: Tools/Scripts/webkitpy/thirdparty/mechanize/_clientcookie.py

Issue 18418010: Check in the thirdparty libs needed for webkitpy. (Closed) Base URL: svn://svn.chromium.org/blink/trunk
Patch Set: Created 7 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 """HTTP cookie handling for web clients.
2
3 This module originally developed from my port of Gisle Aas' Perl module
4 HTTP::Cookies, from the libwww-perl library.
5
6 Docstrings, comments and debug strings in this code refer to the
7 attributes of the HTTP cookie system as cookie-attributes, to distinguish
8 them clearly from Python attributes.
9
10 CookieJar____
11 / \ \
12 FileCookieJar \ \
13 / | \ \ \
14 MozillaCookieJar | LWPCookieJar \ \
15 | | \
16 | ---MSIEBase | \
17 | / | | \
18 | / MSIEDBCookieJar BSDDBCookieJar
19 |/
20 MSIECookieJar
21
22 Comments to John J Lee <jjl@pobox.com>.
23
24
25 Copyright 2002-2006 John J Lee <jjl@pobox.com>
26 Copyright 1997-1999 Gisle Aas (original libwww-perl code)
27 Copyright 2002-2003 Johnny Lee (original MSIE Perl code)
28
29 This code is free software; you can redistribute it and/or modify it
30 under the terms of the BSD or ZPL 2.1 licenses (see the file
31 COPYING.txt included with the distribution).
32
33 """
34
35 import sys, re, copy, time, urllib, types, logging
36 try:
37 import threading
38 _threading = threading; del threading
39 except ImportError:
40 import dummy_threading
41 _threading = dummy_threading; del dummy_threading
42
43 MISSING_FILENAME_TEXT = ("a filename was not supplied (nor was the CookieJar "
44 "instance initialised with one)")
45 DEFAULT_HTTP_PORT = "80"
46
47 from _headersutil import split_header_words, parse_ns_headers
48 from _util import isstringlike
49 import _rfc3986
50
51 debug = logging.getLogger("mechanize.cookies").debug
52
53
54 def reraise_unmasked_exceptions(unmasked=()):
55 # There are a few catch-all except: statements in this module, for
56 # catching input that's bad in unexpected ways.
57 # This function re-raises some exceptions we don't want to trap.
58 import mechanize, warnings
59 if not mechanize.USE_BARE_EXCEPT:
60 raise
61 unmasked = unmasked + (KeyboardInterrupt, SystemExit, MemoryError)
62 etype = sys.exc_info()[0]
63 if issubclass(etype, unmasked):
64 raise
65 # swallowed an exception
66 import traceback, StringIO
67 f = StringIO.StringIO()
68 traceback.print_exc(None, f)
69 msg = f.getvalue()
70 warnings.warn("mechanize bug!\n%s" % msg, stacklevel=2)
71
72
73 IPV4_RE = re.compile(r"\.\d+$")
74 def is_HDN(text):
75 """Return True if text is a host domain name."""
76 # XXX
77 # This may well be wrong. Which RFC is HDN defined in, if any (for
78 # the purposes of RFC 2965)?
79 # For the current implementation, what about IPv6? Remember to look
80 # at other uses of IPV4_RE also, if change this.
81 return not (IPV4_RE.search(text) or
82 text == "" or
83 text[0] == "." or text[-1] == ".")
84
85 def domain_match(A, B):
86 """Return True if domain A domain-matches domain B, according to RFC 2965.
87
88 A and B may be host domain names or IP addresses.
89
90 RFC 2965, section 1:
91
92 Host names can be specified either as an IP address or a HDN string.
93 Sometimes we compare one host name with another. (Such comparisons SHALL
94 be case-insensitive.) Host A's name domain-matches host B's if
95
96 * their host name strings string-compare equal; or
97
98 * A is a HDN string and has the form NB, where N is a non-empty
99 name string, B has the form .B', and B' is a HDN string. (So,
100 x.y.com domain-matches .Y.com but not Y.com.)
101
102 Note that domain-match is not a commutative operation: a.b.c.com
103 domain-matches .c.com, but not the reverse.
104
105 """
106 # Note that, if A or B are IP addresses, the only relevant part of the
107 # definition of the domain-match algorithm is the direct string-compare.
108 A = A.lower()
109 B = B.lower()
110 if A == B:
111 return True
112 if not is_HDN(A):
113 return False
114 i = A.rfind(B)
115 has_form_nb = not (i == -1 or i == 0)
116 return (
117 has_form_nb and
118 B.startswith(".") and
119 is_HDN(B[1:])
120 )
121
122 def liberal_is_HDN(text):
123 """Return True if text is a sort-of-like a host domain name.
124
125 For accepting/blocking domains.
126
127 """
128 return not IPV4_RE.search(text)
129
130 def user_domain_match(A, B):
131 """For blocking/accepting domains.
132
133 A and B may be host domain names or IP addresses.
134
135 """
136 A = A.lower()
137 B = B.lower()
138 if not (liberal_is_HDN(A) and liberal_is_HDN(B)):
139 if A == B:
140 # equal IP addresses
141 return True
142 return False
143 initial_dot = B.startswith(".")
144 if initial_dot and A.endswith(B):
145 return True
146 if not initial_dot and A == B:
147 return True
148 return False
149
150 cut_port_re = re.compile(r":\d+$")
151 def request_host(request):
152 """Return request-host, as defined by RFC 2965.
153
154 Variation from RFC: returned value is lowercased, for convenient
155 comparison.
156
157 """
158 url = request.get_full_url()
159 host = _rfc3986.urlsplit(url)[1]
160 if host is None:
161 host = request.get_header("Host", "")
162 # remove port, if present
163 return cut_port_re.sub("", host, 1)
164
165 def request_host_lc(request):
166 return request_host(request).lower()
167
168 def eff_request_host(request):
169 """Return a tuple (request-host, effective request-host name)."""
170 erhn = req_host = request_host(request)
171 if req_host.find(".") == -1 and not IPV4_RE.search(req_host):
172 erhn = req_host + ".local"
173 return req_host, erhn
174
175 def eff_request_host_lc(request):
176 req_host, erhn = eff_request_host(request)
177 return req_host.lower(), erhn.lower()
178
179 def effective_request_host(request):
180 """Return the effective request-host, as defined by RFC 2965."""
181 return eff_request_host(request)[1]
182
183 def request_path(request):
184 """Return path component of request-URI, as defined by RFC 2965."""
185 url = request.get_full_url()
186 path = escape_path(_rfc3986.urlsplit(url)[2])
187 if not path.startswith("/"):
188 path = "/" + path
189 return path
190
191 def request_port(request):
192 host = request.get_host()
193 i = host.find(':')
194 if i >= 0:
195 port = host[i+1:]
196 try:
197 int(port)
198 except ValueError:
199 debug("nonnumeric port: '%s'", port)
200 return None
201 else:
202 port = DEFAULT_HTTP_PORT
203 return port
204
205 def request_is_unverifiable(request):
206 try:
207 return request.is_unverifiable()
208 except AttributeError:
209 if hasattr(request, "unverifiable"):
210 return request.unverifiable
211 else:
212 raise
213
214 # Characters in addition to A-Z, a-z, 0-9, '_', '.', and '-' that don't
215 # need to be escaped to form a valid HTTP URL (RFCs 2396 and 1738).
216 HTTP_PATH_SAFE = "%/;:@&=+$,!~*'()"
217 ESCAPED_CHAR_RE = re.compile(r"%([0-9a-fA-F][0-9a-fA-F])")
218 def uppercase_escaped_char(match):
219 return "%%%s" % match.group(1).upper()
220 def escape_path(path):
221 """Escape any invalid characters in HTTP URL, and uppercase all escapes."""
222 # There's no knowing what character encoding was used to create URLs
223 # containing %-escapes, but since we have to pick one to escape invalid
224 # path characters, we pick UTF-8, as recommended in the HTML 4.0
225 # specification:
226 # http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1
227 # And here, kind of: draft-fielding-uri-rfc2396bis-03
228 # (And in draft IRI specification: draft-duerst-iri-05)
229 # (And here, for new URI schemes: RFC 2718)
230 if isinstance(path, types.UnicodeType):
231 path = path.encode("utf-8")
232 path = urllib.quote(path, HTTP_PATH_SAFE)
233 path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path)
234 return path
235
236 def reach(h):
237 """Return reach of host h, as defined by RFC 2965, section 1.
238
239 The reach R of a host name H is defined as follows:
240
241 * If
242
243 - H is the host domain name of a host; and,
244
245 - H has the form A.B; and
246
247 - A has no embedded (that is, interior) dots; and
248
249 - B has at least one embedded dot, or B is the string "local".
250 then the reach of H is .B.
251
252 * Otherwise, the reach of H is H.
253
254 >>> reach("www.acme.com")
255 '.acme.com'
256 >>> reach("acme.com")
257 'acme.com'
258 >>> reach("acme.local")
259 '.local'
260
261 """
262 i = h.find(".")
263 if i >= 0:
264 #a = h[:i] # this line is only here to show what a is
265 b = h[i+1:]
266 i = b.find(".")
267 if is_HDN(h) and (i >= 0 or b == "local"):
268 return "."+b
269 return h
270
271 def is_third_party(request):
272 """
273
274 RFC 2965, section 3.3.6:
275
276 An unverifiable transaction is to a third-party host if its request-
277 host U does not domain-match the reach R of the request-host O in the
278 origin transaction.
279
280 """
281 req_host = request_host_lc(request)
282 # the origin request's request-host was stuffed into request by
283 # _urllib2_support.AbstractHTTPHandler
284 return not domain_match(req_host, reach(request.origin_req_host))
285
286
287 try:
288 all
289 except NameError:
290 # python 2.4
291 def all(iterable):
292 for x in iterable:
293 if not x:
294 return False
295 return True
296
297
298 class Cookie:
299 """HTTP Cookie.
300
301 This class represents both Netscape and RFC 2965 cookies.
302
303 This is deliberately a very simple class. It just holds attributes. It's
304 possible to construct Cookie instances that don't comply with the cookie
305 standards. CookieJar.make_cookies is the factory function for Cookie
306 objects -- it deals with cookie parsing, supplying defaults, and
307 normalising to the representation used in this class. CookiePolicy is
308 responsible for checking them to see whether they should be accepted from
309 and returned to the server.
310
311 version: integer;
312 name: string;
313 value: string (may be None);
314 port: string; None indicates no attribute was supplied (e.g. "Port", rather
315 than eg. "Port=80"); otherwise, a port string (eg. "80") or a port list
316 string (e.g. "80,8080")
317 port_specified: boolean; true if a value was supplied with the Port
318 cookie-attribute
319 domain: string;
320 domain_specified: boolean; true if Domain was explicitly set
321 domain_initial_dot: boolean; true if Domain as set in HTTP header by server
322 started with a dot (yes, this really is necessary!)
323 path: string;
324 path_specified: boolean; true if Path was explicitly set
325 secure: boolean; true if should only be returned over secure connection
326 expires: integer; seconds since epoch (RFC 2965 cookies should calculate
327 this value from the Max-Age attribute)
328 discard: boolean, true if this is a session cookie; (if no expires value,
329 this should be true)
330 comment: string;
331 comment_url: string;
332 rfc2109: boolean; true if cookie arrived in a Set-Cookie: (not
333 Set-Cookie2:) header, but had a version cookie-attribute of 1
334 rest: mapping of other cookie-attributes
335
336 Note that the port may be present in the headers, but unspecified ("Port"
337 rather than"Port=80", for example); if this is the case, port is None.
338
339 """
340
341
342 _attrs = ("version", "name", "value",
343 "port", "port_specified",
344 "domain", "domain_specified", "domain_initial_dot",
345 "path", "path_specified",
346 "secure", "expires", "discard", "comment", "comment_url",
347 "rfc2109", "_rest")
348
349 def __init__(self, version, name, value,
350 port, port_specified,
351 domain, domain_specified, domain_initial_dot,
352 path, path_specified,
353 secure,
354 expires,
355 discard,
356 comment,
357 comment_url,
358 rest,
359 rfc2109=False,
360 ):
361
362 if version is not None: version = int(version)
363 if expires is not None: expires = int(expires)
364 if port is None and port_specified is True:
365 raise ValueError("if port is None, port_specified must be false")
366
367 self.version = version
368 self.name = name
369 self.value = value
370 self.port = port
371 self.port_specified = port_specified
372 # normalise case, as per RFC 2965 section 3.3.3
373 self.domain = domain.lower()
374 self.domain_specified = domain_specified
375 # Sigh. We need to know whether the domain given in the
376 # cookie-attribute had an initial dot, in order to follow RFC 2965
377 # (as clarified in draft errata). Needed for the returned $Domain
378 # value.
379 self.domain_initial_dot = domain_initial_dot
380 self.path = path
381 self.path_specified = path_specified
382 self.secure = secure
383 self.expires = expires
384 self.discard = discard
385 self.comment = comment
386 self.comment_url = comment_url
387 self.rfc2109 = rfc2109
388
389 self._rest = copy.copy(rest)
390
391 def has_nonstandard_attr(self, name):
392 return self._rest.has_key(name)
393 def get_nonstandard_attr(self, name, default=None):
394 return self._rest.get(name, default)
395 def set_nonstandard_attr(self, name, value):
396 self._rest[name] = value
397 def nonstandard_attr_keys(self):
398 return self._rest.keys()
399
400 def is_expired(self, now=None):
401 if now is None: now = time.time()
402 return (self.expires is not None) and (self.expires <= now)
403
404 def __eq__(self, other):
405 return all(getattr(self, a) == getattr(other, a) for a in self._attrs)
406
407 def __ne__(self, other):
408 return not (self == other)
409
410 def __str__(self):
411 if self.port is None: p = ""
412 else: p = ":"+self.port
413 limit = self.domain + p + self.path
414 if self.value is not None:
415 namevalue = "%s=%s" % (self.name, self.value)
416 else:
417 namevalue = self.name
418 return "<Cookie %s for %s>" % (namevalue, limit)
419
420 def __repr__(self):
421 args = []
422 for name in ["version", "name", "value",
423 "port", "port_specified",
424 "domain", "domain_specified", "domain_initial_dot",
425 "path", "path_specified",
426 "secure", "expires", "discard", "comment", "comment_url",
427 ]:
428 attr = getattr(self, name)
429 args.append("%s=%s" % (name, repr(attr)))
430 args.append("rest=%s" % repr(self._rest))
431 args.append("rfc2109=%s" % repr(self.rfc2109))
432 return "Cookie(%s)" % ", ".join(args)
433
434
435 class CookiePolicy:
436 """Defines which cookies get accepted from and returned to server.
437
438 May also modify cookies.
439
440 The subclass DefaultCookiePolicy defines the standard rules for Netscape
441 and RFC 2965 cookies -- override that if you want a customised policy.
442
443 As well as implementing set_ok and return_ok, implementations of this
444 interface must also supply the following attributes, indicating which
445 protocols should be used, and how. These can be read and set at any time,
446 though whether that makes complete sense from the protocol point of view is
447 doubtful.
448
449 Public attributes:
450
451 netscape: implement netscape protocol
452 rfc2965: implement RFC 2965 protocol
453 rfc2109_as_netscape:
454 WARNING: This argument will change or go away if is not accepted into
455 the Python standard library in this form!
456 If true, treat RFC 2109 cookies as though they were Netscape cookies. The
457 default is for this attribute to be None, which means treat 2109 cookies
458 as RFC 2965 cookies unless RFC 2965 handling is switched off (which it is,
459 by default), and as Netscape cookies otherwise.
460 hide_cookie2: don't add Cookie2 header to requests (the presence of
461 this header indicates to the server that we understand RFC 2965
462 cookies)
463
464 """
465 def set_ok(self, cookie, request):
466 """Return true if (and only if) cookie should be accepted from server.
467
468 Currently, pre-expired cookies never get this far -- the CookieJar
469 class deletes such cookies itself.
470
471 cookie: mechanize.Cookie object
472 request: object implementing the interface defined by
473 CookieJar.extract_cookies.__doc__
474
475 """
476 raise NotImplementedError()
477
478 def return_ok(self, cookie, request):
479 """Return true if (and only if) cookie should be returned to server.
480
481 cookie: mechanize.Cookie object
482 request: object implementing the interface defined by
483 CookieJar.add_cookie_header.__doc__
484
485 """
486 raise NotImplementedError()
487
488 def domain_return_ok(self, domain, request):
489 """Return false if cookies should not be returned, given cookie domain.
490
491 This is here as an optimization, to remove the need for checking every
492 cookie with a particular domain (which may involve reading many files).
493 The default implementations of domain_return_ok and path_return_ok
494 (return True) leave all the work to return_ok.
495
496 If domain_return_ok returns true for the cookie domain, path_return_ok
497 is called for the cookie path. Otherwise, path_return_ok and return_ok
498 are never called for that cookie domain. If path_return_ok returns
499 true, return_ok is called with the Cookie object itself for a full
500 check. Otherwise, return_ok is never called for that cookie path.
501
502 Note that domain_return_ok is called for every *cookie* domain, not
503 just for the *request* domain. For example, the function might be
504 called with both ".acme.com" and "www.acme.com" if the request domain
505 is "www.acme.com". The same goes for path_return_ok.
506
507 For argument documentation, see the docstring for return_ok.
508
509 """
510 return True
511
512 def path_return_ok(self, path, request):
513 """Return false if cookies should not be returned, given cookie path.
514
515 See the docstring for domain_return_ok.
516
517 """
518 return True
519
520
521 class DefaultCookiePolicy(CookiePolicy):
522 """Implements the standard rules for accepting and returning cookies.
523
524 Both RFC 2965 and Netscape cookies are covered. RFC 2965 handling is
525 switched off by default.
526
527 The easiest way to provide your own policy is to override this class and
528 call its methods in your overriden implementations before adding your own
529 additional checks.
530
531 import mechanize
532 class MyCookiePolicy(mechanize.DefaultCookiePolicy):
533 def set_ok(self, cookie, request):
534 if not mechanize.DefaultCookiePolicy.set_ok(
535 self, cookie, request):
536 return False
537 if i_dont_want_to_store_this_cookie():
538 return False
539 return True
540
541 In addition to the features required to implement the CookiePolicy
542 interface, this class allows you to block and allow domains from setting
543 and receiving cookies. There are also some strictness switches that allow
544 you to tighten up the rather loose Netscape protocol rules a little bit (at
545 the cost of blocking some benign cookies).
546
547 A domain blacklist and whitelist is provided (both off by default). Only
548 domains not in the blacklist and present in the whitelist (if the whitelist
549 is active) participate in cookie setting and returning. Use the
550 blocked_domains constructor argument, and blocked_domains and
551 set_blocked_domains methods (and the corresponding argument and methods for
552 allowed_domains). If you set a whitelist, you can turn it off again by
553 setting it to None.
554
555 Domains in block or allow lists that do not start with a dot must
556 string-compare equal. For example, "acme.com" matches a blacklist entry of
557 "acme.com", but "www.acme.com" does not. Domains that do start with a dot
558 are matched by more specific domains too. For example, both "www.acme.com"
559 and "www.munitions.acme.com" match ".acme.com" (but "acme.com" itself does
560 not). IP addresses are an exception, and must match exactly. For example,
561 if blocked_domains contains "192.168.1.2" and ".168.1.2" 192.168.1.2 is
562 blocked, but 193.168.1.2 is not.
563
564 Additional Public Attributes:
565
566 General strictness switches
567
568 strict_domain: don't allow sites to set two-component domains with
569 country-code top-level domains like .co.uk, .gov.uk, .co.nz. etc.
570 This is far from perfect and isn't guaranteed to work!
571
572 RFC 2965 protocol strictness switches
573
574 strict_rfc2965_unverifiable: follow RFC 2965 rules on unverifiable
575 transactions (usually, an unverifiable transaction is one resulting from
576 a redirect or an image hosted on another site); if this is false, cookies
577 are NEVER blocked on the basis of verifiability
578
579 Netscape protocol strictness switches
580
581 strict_ns_unverifiable: apply RFC 2965 rules on unverifiable transactions
582 even to Netscape cookies
583 strict_ns_domain: flags indicating how strict to be with domain-matching
584 rules for Netscape cookies:
585 DomainStrictNoDots: when setting cookies, host prefix must not contain a
586 dot (e.g. www.foo.bar.com can't set a cookie for .bar.com, because
587 www.foo contains a dot)
588 DomainStrictNonDomain: cookies that did not explicitly specify a Domain
589 cookie-attribute can only be returned to a domain that string-compares
590 equal to the domain that set the cookie (e.g. rockets.acme.com won't
591 be returned cookies from acme.com that had no Domain cookie-attribute)
592 DomainRFC2965Match: when setting cookies, require a full RFC 2965
593 domain-match
594 DomainLiberal and DomainStrict are the most useful combinations of the
595 above flags, for convenience
596 strict_ns_set_initial_dollar: ignore cookies in Set-Cookie: headers that
597 have names starting with '$'
598 strict_ns_set_path: don't allow setting cookies whose path doesn't
599 path-match request URI
600
601 """
602
603 DomainStrictNoDots = 1
604 DomainStrictNonDomain = 2
605 DomainRFC2965Match = 4
606
607 DomainLiberal = 0
608 DomainStrict = DomainStrictNoDots|DomainStrictNonDomain
609
610 def __init__(self,
611 blocked_domains=None, allowed_domains=None,
612 netscape=True, rfc2965=False,
613 # WARNING: this argument will change or go away if is not
614 # accepted into the Python standard library in this form!
615 # default, ie. treat 2109 as netscape iff not rfc2965
616 rfc2109_as_netscape=None,
617 hide_cookie2=False,
618 strict_domain=False,
619 strict_rfc2965_unverifiable=True,
620 strict_ns_unverifiable=False,
621 strict_ns_domain=DomainLiberal,
622 strict_ns_set_initial_dollar=False,
623 strict_ns_set_path=False,
624 ):
625 """
626 Constructor arguments should be used as keyword arguments only.
627
628 blocked_domains: sequence of domain names that we never accept cookies
629 from, nor return cookies to
630 allowed_domains: if not None, this is a sequence of the only domains
631 for which we accept and return cookies
632
633 For other arguments, see CookiePolicy.__doc__ and
634 DefaultCookiePolicy.__doc__..
635
636 """
637 self.netscape = netscape
638 self.rfc2965 = rfc2965
639 self.rfc2109_as_netscape = rfc2109_as_netscape
640 self.hide_cookie2 = hide_cookie2
641 self.strict_domain = strict_domain
642 self.strict_rfc2965_unverifiable = strict_rfc2965_unverifiable
643 self.strict_ns_unverifiable = strict_ns_unverifiable
644 self.strict_ns_domain = strict_ns_domain
645 self.strict_ns_set_initial_dollar = strict_ns_set_initial_dollar
646 self.strict_ns_set_path = strict_ns_set_path
647
648 if blocked_domains is not None:
649 self._blocked_domains = tuple(blocked_domains)
650 else:
651 self._blocked_domains = ()
652
653 if allowed_domains is not None:
654 allowed_domains = tuple(allowed_domains)
655 self._allowed_domains = allowed_domains
656
657 def blocked_domains(self):
658 """Return the sequence of blocked domains (as a tuple)."""
659 return self._blocked_domains
660 def set_blocked_domains(self, blocked_domains):
661 """Set the sequence of blocked domains."""
662 self._blocked_domains = tuple(blocked_domains)
663
664 def is_blocked(self, domain):
665 for blocked_domain in self._blocked_domains:
666 if user_domain_match(domain, blocked_domain):
667 return True
668 return False
669
670 def allowed_domains(self):
671 """Return None, or the sequence of allowed domains (as a tuple)."""
672 return self._allowed_domains
673 def set_allowed_domains(self, allowed_domains):
674 """Set the sequence of allowed domains, or None."""
675 if allowed_domains is not None:
676 allowed_domains = tuple(allowed_domains)
677 self._allowed_domains = allowed_domains
678
679 def is_not_allowed(self, domain):
680 if self._allowed_domains is None:
681 return False
682 for allowed_domain in self._allowed_domains:
683 if user_domain_match(domain, allowed_domain):
684 return False
685 return True
686
687 def set_ok(self, cookie, request):
688 """
689 If you override set_ok, be sure to call this method. If it returns
690 false, so should your subclass (assuming your subclass wants to be more
691 strict about which cookies to accept).
692
693 """
694 debug(" - checking cookie %s", cookie)
695
696 assert cookie.name is not None
697
698 for n in "version", "verifiability", "name", "path", "domain", "port":
699 fn_name = "set_ok_"+n
700 fn = getattr(self, fn_name)
701 if not fn(cookie, request):
702 return False
703
704 return True
705
706 def set_ok_version(self, cookie, request):
707 if cookie.version is None:
708 # Version is always set to 0 by parse_ns_headers if it's a Netscape
709 # cookie, so this must be an invalid RFC 2965 cookie.
710 debug(" Set-Cookie2 without version attribute (%s)", cookie)
711 return False
712 if cookie.version > 0 and not self.rfc2965:
713 debug(" RFC 2965 cookies are switched off")
714 return False
715 elif cookie.version == 0 and not self.netscape:
716 debug(" Netscape cookies are switched off")
717 return False
718 return True
719
720 def set_ok_verifiability(self, cookie, request):
721 if request_is_unverifiable(request) and is_third_party(request):
722 if cookie.version > 0 and self.strict_rfc2965_unverifiable:
723 debug(" third-party RFC 2965 cookie during "
724 "unverifiable transaction")
725 return False
726 elif cookie.version == 0 and self.strict_ns_unverifiable:
727 debug(" third-party Netscape cookie during "
728 "unverifiable transaction")
729 return False
730 return True
731
732 def set_ok_name(self, cookie, request):
733 # Try and stop servers setting V0 cookies designed to hack other
734 # servers that know both V0 and V1 protocols.
735 if (cookie.version == 0 and self.strict_ns_set_initial_dollar and
736 cookie.name.startswith("$")):
737 debug(" illegal name (starts with '$'): '%s'", cookie.name)
738 return False
739 return True
740
741 def set_ok_path(self, cookie, request):
742 if cookie.path_specified:
743 req_path = request_path(request)
744 if ((cookie.version > 0 or
745 (cookie.version == 0 and self.strict_ns_set_path)) and
746 not req_path.startswith(cookie.path)):
747 debug(" path attribute %s is not a prefix of request "
748 "path %s", cookie.path, req_path)
749 return False
750 return True
751
752 def set_ok_countrycode_domain(self, cookie, request):
753 """Return False if explicit cookie domain is not acceptable.
754
755 Called by set_ok_domain, for convenience of overriding by
756 subclasses.
757
758 """
759 if cookie.domain_specified and self.strict_domain:
760 domain = cookie.domain
761 # since domain was specified, we know that:
762 assert domain.startswith(".")
763 if domain.count(".") == 2:
764 # domain like .foo.bar
765 i = domain.rfind(".")
766 tld = domain[i+1:]
767 sld = domain[1:i]
768 if (sld.lower() in [
769 "co", "ac",
770 "com", "edu", "org", "net", "gov", "mil", "int",
771 "aero", "biz", "cat", "coop", "info", "jobs", "mobi",
772 "museum", "name", "pro", "travel",
773 ] and
774 len(tld) == 2):
775 # domain like .co.uk
776 return False
777 return True
778
779 def set_ok_domain(self, cookie, request):
780 if self.is_blocked(cookie.domain):
781 debug(" domain %s is in user block-list", cookie.domain)
782 return False
783 if self.is_not_allowed(cookie.domain):
784 debug(" domain %s is not in user allow-list", cookie.domain)
785 return False
786 if not self.set_ok_countrycode_domain(cookie, request):
787 debug(" country-code second level domain %s", cookie.domain)
788 return False
789 if cookie.domain_specified:
790 req_host, erhn = eff_request_host_lc(request)
791 domain = cookie.domain
792 if domain.startswith("."):
793 undotted_domain = domain[1:]
794 else:
795 undotted_domain = domain
796 embedded_dots = (undotted_domain.find(".") >= 0)
797 if not embedded_dots and domain != ".local":
798 debug(" non-local domain %s contains no embedded dot",
799 domain)
800 return False
801 if cookie.version == 0:
802 if (not erhn.endswith(domain) and
803 (not erhn.startswith(".") and
804 not ("."+erhn).endswith(domain))):
805 debug(" effective request-host %s (even with added "
806 "initial dot) does not end end with %s",
807 erhn, domain)
808 return False
809 if (cookie.version > 0 or
810 (self.strict_ns_domain & self.DomainRFC2965Match)):
811 if not domain_match(erhn, domain):
812 debug(" effective request-host %s does not domain-match "
813 "%s", erhn, domain)
814 return False
815 if (cookie.version > 0 or
816 (self.strict_ns_domain & self.DomainStrictNoDots)):
817 host_prefix = req_host[:-len(domain)]
818 if (host_prefix.find(".") >= 0 and
819 not IPV4_RE.search(req_host)):
820 debug(" host prefix %s for domain %s contains a dot",
821 host_prefix, domain)
822 return False
823 return True
824
825 def set_ok_port(self, cookie, request):
826 if cookie.port_specified:
827 req_port = request_port(request)
828 if req_port is None:
829 req_port = "80"
830 else:
831 req_port = str(req_port)
832 for p in cookie.port.split(","):
833 try:
834 int(p)
835 except ValueError:
836 debug(" bad port %s (not numeric)", p)
837 return False
838 if p == req_port:
839 break
840 else:
841 debug(" request port (%s) not found in %s",
842 req_port, cookie.port)
843 return False
844 return True
845
846 def return_ok(self, cookie, request):
847 """
848 If you override return_ok, be sure to call this method. If it returns
849 false, so should your subclass (assuming your subclass wants to be more
850 strict about which cookies to return).
851
852 """
853 # Path has already been checked by path_return_ok, and domain blocking
854 # done by domain_return_ok.
855 debug(" - checking cookie %s", cookie)
856
857 for n in ("version", "verifiability", "secure", "expires", "port",
858 "domain"):
859 fn_name = "return_ok_"+n
860 fn = getattr(self, fn_name)
861 if not fn(cookie, request):
862 return False
863 return True
864
865 def return_ok_version(self, cookie, request):
866 if cookie.version > 0 and not self.rfc2965:
867 debug(" RFC 2965 cookies are switched off")
868 return False
869 elif cookie.version == 0 and not self.netscape:
870 debug(" Netscape cookies are switched off")
871 return False
872 return True
873
874 def return_ok_verifiability(self, cookie, request):
875 if request_is_unverifiable(request) and is_third_party(request):
876 if cookie.version > 0 and self.strict_rfc2965_unverifiable:
877 debug(" third-party RFC 2965 cookie during unverifiable "
878 "transaction")
879 return False
880 elif cookie.version == 0 and self.strict_ns_unverifiable:
881 debug(" third-party Netscape cookie during unverifiable "
882 "transaction")
883 return False
884 return True
885
886 def return_ok_secure(self, cookie, request):
887 if cookie.secure and request.get_type() != "https":
888 debug(" secure cookie with non-secure request")
889 return False
890 return True
891
892 def return_ok_expires(self, cookie, request):
893 if cookie.is_expired(self._now):
894 debug(" cookie expired")
895 return False
896 return True
897
898 def return_ok_port(self, cookie, request):
899 if cookie.port:
900 req_port = request_port(request)
901 if req_port is None:
902 req_port = "80"
903 for p in cookie.port.split(","):
904 if p == req_port:
905 break
906 else:
907 debug(" request port %s does not match cookie port %s",
908 req_port, cookie.port)
909 return False
910 return True
911
912 def return_ok_domain(self, cookie, request):
913 req_host, erhn = eff_request_host_lc(request)
914 domain = cookie.domain
915
916 # strict check of non-domain cookies: Mozilla does this, MSIE5 doesn't
917 if (cookie.version == 0 and
918 (self.strict_ns_domain & self.DomainStrictNonDomain) and
919 not cookie.domain_specified and domain != erhn):
920 debug(" cookie with unspecified domain does not string-compare "
921 "equal to request domain")
922 return False
923
924 if cookie.version > 0 and not domain_match(erhn, domain):
925 debug(" effective request-host name %s does not domain-match "
926 "RFC 2965 cookie domain %s", erhn, domain)
927 return False
928 if cookie.version == 0 and not ("."+erhn).endswith(domain):
929 debug(" request-host %s does not match Netscape cookie domain "
930 "%s", req_host, domain)
931 return False
932 return True
933
934 def domain_return_ok(self, domain, request):
935 # Liberal check of domain. This is here as an optimization to avoid
936 # having to load lots of MSIE cookie files unless necessary.
937
938 # Munge req_host and erhn to always start with a dot, so as to err on
939 # the side of letting cookies through.
940 dotted_req_host, dotted_erhn = eff_request_host_lc(request)
941 if not dotted_req_host.startswith("."):
942 dotted_req_host = "."+dotted_req_host
943 if not dotted_erhn.startswith("."):
944 dotted_erhn = "."+dotted_erhn
945 if not (dotted_req_host.endswith(domain) or
946 dotted_erhn.endswith(domain)):
947 #debug(" request domain %s does not match cookie domain %s",
948 # req_host, domain)
949 return False
950
951 if self.is_blocked(domain):
952 debug(" domain %s is in user block-list", domain)
953 return False
954 if self.is_not_allowed(domain):
955 debug(" domain %s is not in user allow-list", domain)
956 return False
957
958 return True
959
960 def path_return_ok(self, path, request):
961 debug("- checking cookie path=%s", path)
962 req_path = request_path(request)
963 if not req_path.startswith(path):
964 debug(" %s does not path-match %s", req_path, path)
965 return False
966 return True
967
968
969 def vals_sorted_by_key(adict):
970 keys = adict.keys()
971 keys.sort()
972 return map(adict.get, keys)
973
974 class MappingIterator:
975 """Iterates over nested mapping, depth-first, in sorted order by key."""
976 def __init__(self, mapping):
977 self._s = [(vals_sorted_by_key(mapping), 0, None)] # LIFO stack
978
979 def __iter__(self): return self
980
981 def next(self):
982 # this is hairy because of lack of generators
983 while 1:
984 try:
985 vals, i, prev_item = self._s.pop()
986 except IndexError:
987 raise StopIteration()
988 if i < len(vals):
989 item = vals[i]
990 i = i + 1
991 self._s.append((vals, i, prev_item))
992 try:
993 item.items
994 except AttributeError:
995 # non-mapping
996 break
997 else:
998 # mapping
999 self._s.append((vals_sorted_by_key(item), 0, item))
1000 continue
1001 return item
1002
1003
1004 # Used as second parameter to dict.get method, to distinguish absent
1005 # dict key from one with a None value.
1006 class Absent: pass
1007
1008 class CookieJar:
1009 """Collection of HTTP cookies.
1010
1011 You may not need to know about this class: try mechanize.urlopen().
1012
1013 The major methods are extract_cookies and add_cookie_header; these are all
1014 you are likely to need.
1015
1016 CookieJar supports the iterator protocol:
1017
1018 for cookie in cookiejar:
1019 # do something with cookie
1020
1021 Methods:
1022
1023 add_cookie_header(request)
1024 extract_cookies(response, request)
1025 get_policy()
1026 set_policy(policy)
1027 cookies_for_request(request)
1028 make_cookies(response, request)
1029 set_cookie_if_ok(cookie, request)
1030 set_cookie(cookie)
1031 clear_session_cookies()
1032 clear_expired_cookies()
1033 clear(domain=None, path=None, name=None)
1034
1035 Public attributes
1036
1037 policy: CookiePolicy object
1038
1039 """
1040
1041 non_word_re = re.compile(r"\W")
1042 quote_re = re.compile(r"([\"\\])")
1043 strict_domain_re = re.compile(r"\.?[^.]*")
1044 domain_re = re.compile(r"[^.]*")
1045 dots_re = re.compile(r"^\.+")
1046
1047 def __init__(self, policy=None):
1048 """
1049 See CookieJar.__doc__ for argument documentation.
1050
1051 """
1052 if policy is None:
1053 policy = DefaultCookiePolicy()
1054 self._policy = policy
1055
1056 self._cookies = {}
1057
1058 # for __getitem__ iteration in pre-2.2 Pythons
1059 self._prev_getitem_index = 0
1060
1061 def get_policy(self):
1062 return self._policy
1063
1064 def set_policy(self, policy):
1065 self._policy = policy
1066
1067 def _cookies_for_domain(self, domain, request):
1068 cookies = []
1069 if not self._policy.domain_return_ok(domain, request):
1070 return []
1071 debug("Checking %s for cookies to return", domain)
1072 cookies_by_path = self._cookies[domain]
1073 for path in cookies_by_path.keys():
1074 if not self._policy.path_return_ok(path, request):
1075 continue
1076 cookies_by_name = cookies_by_path[path]
1077 for cookie in cookies_by_name.values():
1078 if not self._policy.return_ok(cookie, request):
1079 debug(" not returning cookie")
1080 continue
1081 debug(" it's a match")
1082 cookies.append(cookie)
1083 return cookies
1084
1085 def cookies_for_request(self, request):
1086 """Return a list of cookies to be returned to server.
1087
1088 The returned list of cookie instances is sorted in the order they
1089 should appear in the Cookie: header for return to the server.
1090
1091 See add_cookie_header.__doc__ for the interface required of the
1092 request argument.
1093
1094 New in version 0.1.10
1095
1096 """
1097 self._policy._now = self._now = int(time.time())
1098 cookies = self._cookies_for_request(request)
1099 # add cookies in order of most specific (i.e. longest) path first
1100 def decreasing_size(a, b): return cmp(len(b.path), len(a.path))
1101 cookies.sort(decreasing_size)
1102 return cookies
1103
1104 def _cookies_for_request(self, request):
1105 """Return a list of cookies to be returned to server."""
1106 # this method still exists (alongside cookies_for_request) because it
1107 # is part of an implied protected interface for subclasses of cookiejar
1108 # XXX document that implied interface, or provide another way of
1109 # implementing cookiejars than subclassing
1110 cookies = []
1111 for domain in self._cookies.keys():
1112 cookies.extend(self._cookies_for_domain(domain, request))
1113 return cookies
1114
1115 def _cookie_attrs(self, cookies):
1116 """Return a list of cookie-attributes to be returned to server.
1117
1118 The $Version attribute is also added when appropriate (currently only
1119 once per request).
1120
1121 >>> jar = CookieJar()
1122 >>> ns_cookie = Cookie(0, "foo", '"bar"', None, False,
1123 ... "example.com", False, False,
1124 ... "/", False, False, None, True,
1125 ... None, None, {})
1126 >>> jar._cookie_attrs([ns_cookie])
1127 ['foo="bar"']
1128 >>> rfc2965_cookie = Cookie(1, "foo", "bar", None, False,
1129 ... ".example.com", True, False,
1130 ... "/", False, False, None, True,
1131 ... None, None, {})
1132 >>> jar._cookie_attrs([rfc2965_cookie])
1133 ['$Version=1', 'foo=bar', '$Domain="example.com"']
1134
1135 """
1136 version_set = False
1137
1138 attrs = []
1139 for cookie in cookies:
1140 # set version of Cookie header
1141 # XXX
1142 # What should it be if multiple matching Set-Cookie headers have
1143 # different versions themselves?
1144 # Answer: there is no answer; was supposed to be settled by
1145 # RFC 2965 errata, but that may never appear...
1146 version = cookie.version
1147 if not version_set:
1148 version_set = True
1149 if version > 0:
1150 attrs.append("$Version=%s" % version)
1151
1152 # quote cookie value if necessary
1153 # (not for Netscape protocol, which already has any quotes
1154 # intact, due to the poorly-specified Netscape Cookie: syntax)
1155 if ((cookie.value is not None) and
1156 self.non_word_re.search(cookie.value) and version > 0):
1157 value = self.quote_re.sub(r"\\\1", cookie.value)
1158 else:
1159 value = cookie.value
1160
1161 # add cookie-attributes to be returned in Cookie header
1162 if cookie.value is None:
1163 attrs.append(cookie.name)
1164 else:
1165 attrs.append("%s=%s" % (cookie.name, value))
1166 if version > 0:
1167 if cookie.path_specified:
1168 attrs.append('$Path="%s"' % cookie.path)
1169 if cookie.domain.startswith("."):
1170 domain = cookie.domain
1171 if (not cookie.domain_initial_dot and
1172 domain.startswith(".")):
1173 domain = domain[1:]
1174 attrs.append('$Domain="%s"' % domain)
1175 if cookie.port is not None:
1176 p = "$Port"
1177 if cookie.port_specified:
1178 p = p + ('="%s"' % cookie.port)
1179 attrs.append(p)
1180
1181 return attrs
1182
1183 def add_cookie_header(self, request):
1184 """Add correct Cookie: header to request (mechanize.Request object).
1185
1186 The Cookie2 header is also added unless policy.hide_cookie2 is true.
1187
1188 The request object (usually a mechanize.Request instance) must support
1189 the methods get_full_url, get_host, is_unverifiable, get_type,
1190 has_header, get_header, header_items and add_unredirected_header, as
1191 documented by urllib2.
1192 """
1193 debug("add_cookie_header")
1194 cookies = self.cookies_for_request(request)
1195
1196 attrs = self._cookie_attrs(cookies)
1197 if attrs:
1198 if not request.has_header("Cookie"):
1199 request.add_unredirected_header("Cookie", "; ".join(attrs))
1200
1201 # if necessary, advertise that we know RFC 2965
1202 if self._policy.rfc2965 and not self._policy.hide_cookie2:
1203 for cookie in cookies:
1204 if cookie.version != 1 and not request.has_header("Cookie2"):
1205 request.add_unredirected_header("Cookie2", '$Version="1"')
1206 break
1207
1208 self.clear_expired_cookies()
1209
1210 def _normalized_cookie_tuples(self, attrs_set):
1211 """Return list of tuples containing normalised cookie information.
1212
1213 attrs_set is the list of lists of key,value pairs extracted from
1214 the Set-Cookie or Set-Cookie2 headers.
1215
1216 Tuples are name, value, standard, rest, where name and value are the
1217 cookie name and value, standard is a dictionary containing the standard
1218 cookie-attributes (discard, secure, version, expires or max-age,
1219 domain, path and port) and rest is a dictionary containing the rest of
1220 the cookie-attributes.
1221
1222 """
1223 cookie_tuples = []
1224
1225 boolean_attrs = "discard", "secure"
1226 value_attrs = ("version",
1227 "expires", "max-age",
1228 "domain", "path", "port",
1229 "comment", "commenturl")
1230
1231 for cookie_attrs in attrs_set:
1232 name, value = cookie_attrs[0]
1233
1234 # Build dictionary of standard cookie-attributes (standard) and
1235 # dictionary of other cookie-attributes (rest).
1236
1237 # Note: expiry time is normalised to seconds since epoch. V0
1238 # cookies should have the Expires cookie-attribute, and V1 cookies
1239 # should have Max-Age, but since V1 includes RFC 2109 cookies (and
1240 # since V0 cookies may be a mish-mash of Netscape and RFC 2109), we
1241 # accept either (but prefer Max-Age).
1242 max_age_set = False
1243
1244 bad_cookie = False
1245
1246 standard = {}
1247 rest = {}
1248 for k, v in cookie_attrs[1:]:
1249 lc = k.lower()
1250 # don't lose case distinction for unknown fields
1251 if lc in value_attrs or lc in boolean_attrs:
1252 k = lc
1253 if k in boolean_attrs and v is None:
1254 # boolean cookie-attribute is present, but has no value
1255 # (like "discard", rather than "port=80")
1256 v = True
1257 if standard.has_key(k):
1258 # only first value is significant
1259 continue
1260 if k == "domain":
1261 if v is None:
1262 debug(" missing value for domain attribute")
1263 bad_cookie = True
1264 break
1265 # RFC 2965 section 3.3.3
1266 v = v.lower()
1267 if k == "expires":
1268 if max_age_set:
1269 # Prefer max-age to expires (like Mozilla)
1270 continue
1271 if v is None:
1272 debug(" missing or invalid value for expires "
1273 "attribute: treating as session cookie")
1274 continue
1275 if k == "max-age":
1276 max_age_set = True
1277 if v is None:
1278 debug(" missing value for max-age attribute")
1279 bad_cookie = True
1280 break
1281 try:
1282 v = int(v)
1283 except ValueError:
1284 debug(" missing or invalid (non-numeric) value for "
1285 "max-age attribute")
1286 bad_cookie = True
1287 break
1288 # convert RFC 2965 Max-Age to seconds since epoch
1289 # XXX Strictly you're supposed to follow RFC 2616
1290 # age-calculation rules. Remember that zero Max-Age is a
1291 # is a request to discard (old and new) cookie, though.
1292 k = "expires"
1293 v = self._now + v
1294 if (k in value_attrs) or (k in boolean_attrs):
1295 if (v is None and
1296 k not in ["port", "comment", "commenturl"]):
1297 debug(" missing value for %s attribute" % k)
1298 bad_cookie = True
1299 break
1300 standard[k] = v
1301 else:
1302 rest[k] = v
1303
1304 if bad_cookie:
1305 continue
1306
1307 cookie_tuples.append((name, value, standard, rest))
1308
1309 return cookie_tuples
1310
1311 def _cookie_from_cookie_tuple(self, tup, request):
1312 # standard is dict of standard cookie-attributes, rest is dict of the
1313 # rest of them
1314 name, value, standard, rest = tup
1315
1316 domain = standard.get("domain", Absent)
1317 path = standard.get("path", Absent)
1318 port = standard.get("port", Absent)
1319 expires = standard.get("expires", Absent)
1320
1321 # set the easy defaults
1322 version = standard.get("version", None)
1323 if version is not None:
1324 try:
1325 version = int(version)
1326 except ValueError:
1327 return None # invalid version, ignore cookie
1328 secure = standard.get("secure", False)
1329 # (discard is also set if expires is Absent)
1330 discard = standard.get("discard", False)
1331 comment = standard.get("comment", None)
1332 comment_url = standard.get("commenturl", None)
1333
1334 # set default path
1335 if path is not Absent and path != "":
1336 path_specified = True
1337 path = escape_path(path)
1338 else:
1339 path_specified = False
1340 path = request_path(request)
1341 i = path.rfind("/")
1342 if i != -1:
1343 if version == 0:
1344 # Netscape spec parts company from reality here
1345 path = path[:i]
1346 else:
1347 path = path[:i+1]
1348 if len(path) == 0: path = "/"
1349
1350 # set default domain
1351 domain_specified = domain is not Absent
1352 # but first we have to remember whether it starts with a dot
1353 domain_initial_dot = False
1354 if domain_specified:
1355 domain_initial_dot = bool(domain.startswith("."))
1356 if domain is Absent:
1357 req_host, erhn = eff_request_host_lc(request)
1358 domain = erhn
1359 elif not domain.startswith("."):
1360 domain = "."+domain
1361
1362 # set default port
1363 port_specified = False
1364 if port is not Absent:
1365 if port is None:
1366 # Port attr present, but has no value: default to request port.
1367 # Cookie should then only be sent back on that port.
1368 port = request_port(request)
1369 else:
1370 port_specified = True
1371 port = re.sub(r"\s+", "", port)
1372 else:
1373 # No port attr present. Cookie can be sent back on any port.
1374 port = None
1375
1376 # set default expires and discard
1377 if expires is Absent:
1378 expires = None
1379 discard = True
1380
1381 return Cookie(version,
1382 name, value,
1383 port, port_specified,
1384 domain, domain_specified, domain_initial_dot,
1385 path, path_specified,
1386 secure,
1387 expires,
1388 discard,
1389 comment,
1390 comment_url,
1391 rest)
1392
1393 def _cookies_from_attrs_set(self, attrs_set, request):
1394 cookie_tuples = self._normalized_cookie_tuples(attrs_set)
1395
1396 cookies = []
1397 for tup in cookie_tuples:
1398 cookie = self._cookie_from_cookie_tuple(tup, request)
1399 if cookie: cookies.append(cookie)
1400 return cookies
1401
1402 def _process_rfc2109_cookies(self, cookies):
1403 if self._policy.rfc2109_as_netscape is None:
1404 rfc2109_as_netscape = not self._policy.rfc2965
1405 else:
1406 rfc2109_as_netscape = self._policy.rfc2109_as_netscape
1407 for cookie in cookies:
1408 if cookie.version == 1:
1409 cookie.rfc2109 = True
1410 if rfc2109_as_netscape:
1411 # treat 2109 cookies as Netscape cookies rather than
1412 # as RFC2965 cookies
1413 cookie.version = 0
1414
1415 def _make_cookies(self, response, request):
1416 # get cookie-attributes for RFC 2965 and Netscape protocols
1417 headers = response.info()
1418 rfc2965_hdrs = headers.getheaders("Set-Cookie2")
1419 ns_hdrs = headers.getheaders("Set-Cookie")
1420
1421 rfc2965 = self._policy.rfc2965
1422 netscape = self._policy.netscape
1423
1424 if ((not rfc2965_hdrs and not ns_hdrs) or
1425 (not ns_hdrs and not rfc2965) or
1426 (not rfc2965_hdrs and not netscape) or
1427 (not netscape and not rfc2965)):
1428 return [] # no relevant cookie headers: quick exit
1429
1430 try:
1431 cookies = self._cookies_from_attrs_set(
1432 split_header_words(rfc2965_hdrs), request)
1433 except:
1434 reraise_unmasked_exceptions()
1435 cookies = []
1436
1437 if ns_hdrs and netscape:
1438 try:
1439 # RFC 2109 and Netscape cookies
1440 ns_cookies = self._cookies_from_attrs_set(
1441 parse_ns_headers(ns_hdrs), request)
1442 except:
1443 reraise_unmasked_exceptions()
1444 ns_cookies = []
1445 self._process_rfc2109_cookies(ns_cookies)
1446
1447 # Look for Netscape cookies (from Set-Cookie headers) that match
1448 # corresponding RFC 2965 cookies (from Set-Cookie2 headers).
1449 # For each match, keep the RFC 2965 cookie and ignore the Netscape
1450 # cookie (RFC 2965 section 9.1). Actually, RFC 2109 cookies are
1451 # bundled in with the Netscape cookies for this purpose, which is
1452 # reasonable behaviour.
1453 if rfc2965:
1454 lookup = {}
1455 for cookie in cookies:
1456 lookup[(cookie.domain, cookie.path, cookie.name)] = None
1457
1458 def no_matching_rfc2965(ns_cookie, lookup=lookup):
1459 key = ns_cookie.domain, ns_cookie.path, ns_cookie.name
1460 return not lookup.has_key(key)
1461 ns_cookies = filter(no_matching_rfc2965, ns_cookies)
1462
1463 if ns_cookies:
1464 cookies.extend(ns_cookies)
1465
1466 return cookies
1467
1468 def make_cookies(self, response, request):
1469 """Return sequence of Cookie objects extracted from response object.
1470
1471 See extract_cookies.__doc__ for the interface required of the
1472 response and request arguments.
1473
1474 """
1475 self._policy._now = self._now = int(time.time())
1476 return [cookie for cookie in self._make_cookies(response, request)
1477 if cookie.expires is None or not cookie.expires <= self._now]
1478
1479 def set_cookie_if_ok(self, cookie, request):
1480 """Set a cookie if policy says it's OK to do so.
1481
1482 cookie: mechanize.Cookie instance
1483 request: see extract_cookies.__doc__ for the required interface
1484
1485 """
1486 self._policy._now = self._now = int(time.time())
1487
1488 if self._policy.set_ok(cookie, request):
1489 self.set_cookie(cookie)
1490
1491 def set_cookie(self, cookie):
1492 """Set a cookie, without checking whether or not it should be set.
1493
1494 cookie: mechanize.Cookie instance
1495 """
1496 c = self._cookies
1497 if not c.has_key(cookie.domain): c[cookie.domain] = {}
1498 c2 = c[cookie.domain]
1499 if not c2.has_key(cookie.path): c2[cookie.path] = {}
1500 c3 = c2[cookie.path]
1501 c3[cookie.name] = cookie
1502
1503 def extract_cookies(self, response, request):
1504 """Extract cookies from response, where allowable given the request.
1505
1506 Look for allowable Set-Cookie: and Set-Cookie2: headers in the response
1507 object passed as argument. Any of these headers that are found are
1508 used to update the state of the object (subject to the policy.set_ok
1509 method's approval).
1510
1511 The response object (usually be the result of a call to
1512 mechanize.urlopen, or similar) should support an info method, which
1513 returns a mimetools.Message object (in fact, the 'mimetools.Message
1514 object' may be any object that provides a getheaders method).
1515
1516 The request object (usually a mechanize.Request instance) must support
1517 the methods get_full_url, get_type, get_host, and is_unverifiable, as
1518 documented by mechanize, and the port attribute (the port number). The
1519 request is used to set default values for cookie-attributes as well as
1520 for checking that the cookie is OK to be set.
1521
1522 """
1523 debug("extract_cookies: %s", response.info())
1524 self._policy._now = self._now = int(time.time())
1525
1526 for cookie in self._make_cookies(response, request):
1527 if cookie.expires is not None and cookie.expires <= self._now:
1528 # Expiry date in past is request to delete cookie. This can't b e
1529 # in DefaultCookiePolicy, because can't delete cookies there.
1530 try:
1531 self.clear(cookie.domain, cookie.path, cookie.name)
1532 except KeyError:
1533 pass
1534 debug("Expiring cookie, domain='%s', path='%s', name='%s'",
1535 cookie.domain, cookie.path, cookie.name)
1536 elif self._policy.set_ok(cookie, request):
1537 debug(" setting cookie: %s", cookie)
1538 self.set_cookie(cookie)
1539
1540 def clear(self, domain=None, path=None, name=None):
1541 """Clear some cookies.
1542
1543 Invoking this method without arguments will clear all cookies. If
1544 given a single argument, only cookies belonging to that domain will be
1545 removed. If given two arguments, cookies belonging to the specified
1546 path within that domain are removed. If given three arguments, then
1547 the cookie with the specified name, path and domain is removed.
1548
1549 Raises KeyError if no matching cookie exists.
1550
1551 """
1552 if name is not None:
1553 if (domain is None) or (path is None):
1554 raise ValueError(
1555 "domain and path must be given to remove a cookie by name")
1556 del self._cookies[domain][path][name]
1557 elif path is not None:
1558 if domain is None:
1559 raise ValueError(
1560 "domain must be given to remove cookies by path")
1561 del self._cookies[domain][path]
1562 elif domain is not None:
1563 del self._cookies[domain]
1564 else:
1565 self._cookies = {}
1566
1567 def clear_session_cookies(self):
1568 """Discard all session cookies.
1569
1570 Discards all cookies held by object which had either no Max-Age or
1571 Expires cookie-attribute or an explicit Discard cookie-attribute, or
1572 which otherwise have ended up with a true discard attribute. For
1573 interactive browsers, the end of a session usually corresponds to
1574 closing the browser window.
1575
1576 Note that the save method won't save session cookies anyway, unless you
1577 ask otherwise by passing a true ignore_discard argument.
1578
1579 """
1580 for cookie in self:
1581 if cookie.discard:
1582 self.clear(cookie.domain, cookie.path, cookie.name)
1583
1584 def clear_expired_cookies(self):
1585 """Discard all expired cookies.
1586
1587 You probably don't need to call this method: expired cookies are never
1588 sent back to the server (provided you're using DefaultCookiePolicy),
1589 this method is called by CookieJar itself every so often, and the save
1590 method won't save expired cookies anyway (unless you ask otherwise by
1591 passing a true ignore_expires argument).
1592
1593 """
1594 now = time.time()
1595 for cookie in self:
1596 if cookie.is_expired(now):
1597 self.clear(cookie.domain, cookie.path, cookie.name)
1598
1599 def __getitem__(self, i):
1600 if i == 0:
1601 self._getitem_iterator = self.__iter__()
1602 elif self._prev_getitem_index != i-1: raise IndexError(
1603 "CookieJar.__getitem__ only supports sequential iteration")
1604 self._prev_getitem_index = i
1605 try:
1606 return self._getitem_iterator.next()
1607 except StopIteration:
1608 raise IndexError()
1609
1610 def __iter__(self):
1611 return MappingIterator(self._cookies)
1612
1613 def __len__(self):
1614 """Return number of contained cookies."""
1615 i = 0
1616 for cookie in self: i = i + 1
1617 return i
1618
1619 def __repr__(self):
1620 r = []
1621 for cookie in self: r.append(repr(cookie))
1622 return "<%s[%s]>" % (self.__class__, ", ".join(r))
1623
1624 def __str__(self):
1625 r = []
1626 for cookie in self: r.append(str(cookie))
1627 return "<%s[%s]>" % (self.__class__, ", ".join(r))
1628
1629
1630 class LoadError(Exception): pass
1631
1632 class FileCookieJar(CookieJar):
1633 """CookieJar that can be loaded from and saved to a file.
1634
1635 Additional methods
1636
1637 save(filename=None, ignore_discard=False, ignore_expires=False)
1638 load(filename=None, ignore_discard=False, ignore_expires=False)
1639 revert(filename=None, ignore_discard=False, ignore_expires=False)
1640
1641 Additional public attributes
1642
1643 filename: filename for loading and saving cookies
1644
1645 Additional public readable attributes
1646
1647 delayload: request that cookies are lazily loaded from disk; this is only
1648 a hint since this only affects performance, not behaviour (unless the
1649 cookies on disk are changing); a CookieJar object may ignore it (in fact,
1650 only MSIECookieJar lazily loads cookies at the moment)
1651
1652 """
1653
1654 def __init__(self, filename=None, delayload=False, policy=None):
1655 """
1656 See FileCookieJar.__doc__ for argument documentation.
1657
1658 Cookies are NOT loaded from the named file until either the load or
1659 revert method is called.
1660
1661 """
1662 CookieJar.__init__(self, policy)
1663 if filename is not None and not isstringlike(filename):
1664 raise ValueError("filename must be string-like")
1665 self.filename = filename
1666 self.delayload = bool(delayload)
1667
1668 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
1669 """Save cookies to a file.
1670
1671 filename: name of file in which to save cookies
1672 ignore_discard: save even cookies set to be discarded
1673 ignore_expires: save even cookies that have expired
1674
1675 The file is overwritten if it already exists, thus wiping all its
1676 cookies. Saved cookies can be restored later using the load or revert
1677 methods. If filename is not specified, self.filename is used; if
1678 self.filename is None, ValueError is raised.
1679
1680 """
1681 raise NotImplementedError()
1682
1683 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
1684 """Load cookies from a file.
1685
1686 Old cookies are kept unless overwritten by newly loaded ones.
1687
1688 Arguments are as for .save().
1689
1690 If filename is not specified, self.filename is used; if self.filename
1691 is None, ValueError is raised. The named file must be in the format
1692 understood by the class, or LoadError will be raised. This format will
1693 be identical to that written by the save method, unless the load format
1694 is not sufficiently well understood (as is the case for MSIECookieJar).
1695
1696 """
1697 if filename is None:
1698 if self.filename is not None: filename = self.filename
1699 else: raise ValueError(MISSING_FILENAME_TEXT)
1700
1701 f = open(filename)
1702 try:
1703 self._really_load(f, filename, ignore_discard, ignore_expires)
1704 finally:
1705 f.close()
1706
1707 def revert(self, filename=None,
1708 ignore_discard=False, ignore_expires=False):
1709 """Clear all cookies and reload cookies from a saved file.
1710
1711 Raises LoadError (or IOError) if reversion is not successful; the
1712 object's state will not be altered if this happens.
1713
1714 """
1715 if filename is None:
1716 if self.filename is not None: filename = self.filename
1717 else: raise ValueError(MISSING_FILENAME_TEXT)
1718
1719 old_state = copy.deepcopy(self._cookies)
1720 self._cookies = {}
1721 try:
1722 self.load(filename, ignore_discard, ignore_expires)
1723 except (LoadError, IOError):
1724 self._cookies = old_state
1725 raise
OLDNEW
« no previous file with comments | « Tools/Scripts/webkitpy/thirdparty/mechanize/_beautifulsoup.py ('k') | Tools/Scripts/webkitpy/thirdparty/mechanize/_debug.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698