Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(484)

Side by Side Diff: Tools/Scripts/webkitpy/thirdparty/mechanize/_form.py

Issue 18418010: Check in the thirdparty libs needed for webkitpy. (Closed) Base URL: svn://svn.chromium.org/blink/trunk
Patch Set: Created 7 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 """HTML form handling for web clients.
2
3 HTML form handling for web clients: useful for parsing HTML forms, filling them
4 in and returning the completed forms to the server. This code developed from a
5 port of Gisle Aas' Perl module HTML::Form, from the libwww-perl library, but
6 the interface is not the same.
7
8 The most useful docstring is the one for HTMLForm.
9
10 RFC 1866: HTML 2.0
11 RFC 1867: Form-based File Upload in HTML
12 RFC 2388: Returning Values from Forms: multipart/form-data
13 HTML 3.2 Specification, W3C Recommendation 14 January 1997 (for ISINDEX)
14 HTML 4.01 Specification, W3C Recommendation 24 December 1999
15
16
17 Copyright 2002-2007 John J. Lee <jjl@pobox.com>
18 Copyright 2005 Gary Poster
19 Copyright 2005 Zope Corporation
20 Copyright 1998-2000 Gisle Aas.
21
22 This code is free software; you can redistribute it and/or modify it
23 under the terms of the BSD or ZPL 2.1 licenses (see the file
24 COPYING.txt included with the distribution).
25
26 """
27
28 # TODO:
29 # Clean up post the merge into mechanize
30 # * Remove code that was duplicated in ClientForm and mechanize
31 # * Remove weird import stuff
32 # * Remove pre-Python 2.4 compatibility cruft
33 # * Clean up tests
34 # * Later release: Remove the ClientForm 0.1 backwards-compatibility switch
35 # Remove parser testing hack
36 # Clean action URI
37 # Switch to unicode throughout
38 # See Wichert Akkerman's 2004-01-22 message to c.l.py.
39 # Apply recommendations from google code project CURLIES
40 # Apply recommendations from HTML 5 spec
41 # Add charset parameter to Content-type headers? How to find value??
42 # Functional tests to add:
43 # Single and multiple file upload
44 # File upload with missing name (check standards)
45 # mailto: submission & enctype text/plain??
46
47 # Replace by_label etc. with moniker / selector concept. Allows, e.g., a
48 # choice between selection by value / id / label / element contents. Or
49 # choice between matching labels exactly or by substring. etc.
50
51
52 __all__ = ['AmbiguityError', 'CheckboxControl', 'Control',
53 'ControlNotFoundError', 'FileControl', 'FormParser', 'HTMLForm',
54 'HiddenControl', 'IgnoreControl', 'ImageControl', 'IsindexControl',
55 'Item', 'ItemCountError', 'ItemNotFoundError', 'Label',
56 'ListControl', 'LocateError', 'Missing', 'ParseError', 'ParseFile',
57 'ParseFileEx', 'ParseResponse', 'ParseResponseEx','PasswordControl',
58 'RadioControl', 'ScalarControl', 'SelectControl',
59 'SubmitButtonControl', 'SubmitControl', 'TextControl',
60 'TextareaControl', 'XHTMLCompatibleFormParser']
61
62 import HTMLParser
63 from cStringIO import StringIO
64 import inspect
65 import logging
66 import random
67 import re
68 import sys
69 import urllib
70 import urlparse
71 import warnings
72
73 import _beautifulsoup
74 import _request
75
76 # from Python itself, for backwards compatibility of raised exceptions
77 import sgmllib
78 # bundled copy of sgmllib
79 import _sgmllib_copy
80
81
82 VERSION = "0.2.11"
83
84 CHUNK = 1024 # size of chunks fed to parser, in bytes
85
86 DEFAULT_ENCODING = "latin-1"
87
88 _logger = logging.getLogger("mechanize.forms")
89 OPTIMIZATION_HACK = True
90
91 def debug(msg, *args, **kwds):
92 if OPTIMIZATION_HACK:
93 return
94
95 caller_name = inspect.stack()[1][3]
96 extended_msg = '%%s %s' % msg
97 extended_args = (caller_name,)+args
98 _logger.debug(extended_msg, *extended_args, **kwds)
99
100 def _show_debug_messages():
101 global OPTIMIZATION_HACK
102 OPTIMIZATION_HACK = False
103 _logger.setLevel(logging.DEBUG)
104 handler = logging.StreamHandler(sys.stdout)
105 handler.setLevel(logging.DEBUG)
106 _logger.addHandler(handler)
107
108
109 def deprecation(message, stack_offset=0):
110 warnings.warn(message, DeprecationWarning, stacklevel=3+stack_offset)
111
112
113 class Missing: pass
114
115 _compress_re = re.compile(r"\s+")
116 def compress_text(text): return _compress_re.sub(" ", text.strip())
117
118 def normalize_line_endings(text):
119 return re.sub(r"(?:(?<!\r)\n)|(?:\r(?!\n))", "\r\n", text)
120
121
122 def unescape(data, entities, encoding=DEFAULT_ENCODING):
123 if data is None or "&" not in data:
124 return data
125
126 def replace_entities(match, entities=entities, encoding=encoding):
127 ent = match.group()
128 if ent[1] == "#":
129 return unescape_charref(ent[2:-1], encoding)
130
131 repl = entities.get(ent)
132 if repl is not None:
133 if type(repl) != type(""):
134 try:
135 repl = repl.encode(encoding)
136 except UnicodeError:
137 repl = ent
138 else:
139 repl = ent
140
141 return repl
142
143 return re.sub(r"&#?[A-Za-z0-9]+?;", replace_entities, data)
144
145 def unescape_charref(data, encoding):
146 name, base = data, 10
147 if name.startswith("x"):
148 name, base= name[1:], 16
149 uc = unichr(int(name, base))
150 if encoding is None:
151 return uc
152 else:
153 try:
154 repl = uc.encode(encoding)
155 except UnicodeError:
156 repl = "&#%s;" % data
157 return repl
158
159 def get_entitydefs():
160 import htmlentitydefs
161 from codecs import latin_1_decode
162 entitydefs = {}
163 try:
164 htmlentitydefs.name2codepoint
165 except AttributeError:
166 entitydefs = {}
167 for name, char in htmlentitydefs.entitydefs.items():
168 uc = latin_1_decode(char)[0]
169 if uc.startswith("&#") and uc.endswith(";"):
170 uc = unescape_charref(uc[2:-1], None)
171 entitydefs["&%s;" % name] = uc
172 else:
173 for name, codepoint in htmlentitydefs.name2codepoint.items():
174 entitydefs["&%s;" % name] = unichr(codepoint)
175 return entitydefs
176
177
178 def issequence(x):
179 try:
180 x[0]
181 except (TypeError, KeyError):
182 return False
183 except IndexError:
184 pass
185 return True
186
187 def isstringlike(x):
188 try: x+""
189 except: return False
190 else: return True
191
192
193 def choose_boundary():
194 """Return a string usable as a multipart boundary."""
195 # follow IE and firefox
196 nonce = "".join([str(random.randint(0, sys.maxint-1)) for i in 0,1,2])
197 return "-"*27 + nonce
198
199 # This cut-n-pasted MimeWriter from standard library is here so can add
200 # to HTTP headers rather than message body when appropriate. It also uses
201 # \r\n in place of \n. This is a bit nasty.
202 class MimeWriter:
203
204 """Generic MIME writer.
205
206 Methods:
207
208 __init__()
209 addheader()
210 flushheaders()
211 startbody()
212 startmultipartbody()
213 nextpart()
214 lastpart()
215
216 A MIME writer is much more primitive than a MIME parser. It
217 doesn't seek around on the output file, and it doesn't use large
218 amounts of buffer space, so you have to write the parts in the
219 order they should occur on the output file. It does buffer the
220 headers you add, allowing you to rearrange their order.
221
222 General usage is:
223
224 f = <open the output file>
225 w = MimeWriter(f)
226 ...call w.addheader(key, value) 0 or more times...
227
228 followed by either:
229
230 f = w.startbody(content_type)
231 ...call f.write(data) for body data...
232
233 or:
234
235 w.startmultipartbody(subtype)
236 for each part:
237 subwriter = w.nextpart()
238 ...use the subwriter's methods to create the subpart...
239 w.lastpart()
240
241 The subwriter is another MimeWriter instance, and should be
242 treated in the same way as the toplevel MimeWriter. This way,
243 writing recursive body parts is easy.
244
245 Warning: don't forget to call lastpart()!
246
247 XXX There should be more state so calls made in the wrong order
248 are detected.
249
250 Some special cases:
251
252 - startbody() just returns the file passed to the constructor;
253 but don't use this knowledge, as it may be changed.
254
255 - startmultipartbody() actually returns a file as well;
256 this can be used to write the initial 'if you can read this your
257 mailer is not MIME-aware' message.
258
259 - If you call flushheaders(), the headers accumulated so far are
260 written out (and forgotten); this is useful if you don't need a
261 body part at all, e.g. for a subpart of type message/rfc822
262 that's (mis)used to store some header-like information.
263
264 - Passing a keyword argument 'prefix=<flag>' to addheader(),
265 start*body() affects where the header is inserted; 0 means
266 append at the end, 1 means insert at the start; default is
267 append for addheader(), but insert for start*body(), which use
268 it to determine where the Content-type header goes.
269
270 """
271
272 def __init__(self, fp, http_hdrs=None):
273 self._http_hdrs = http_hdrs
274 self._fp = fp
275 self._headers = []
276 self._boundary = []
277 self._first_part = True
278
279 def addheader(self, key, value, prefix=0,
280 add_to_http_hdrs=0):
281 """
282 prefix is ignored if add_to_http_hdrs is true.
283 """
284 lines = value.split("\r\n")
285 while lines and not lines[-1]: del lines[-1]
286 while lines and not lines[0]: del lines[0]
287 if add_to_http_hdrs:
288 value = "".join(lines)
289 # 2.2 urllib2 doesn't normalize header case
290 self._http_hdrs.append((key.capitalize(), value))
291 else:
292 for i in range(1, len(lines)):
293 lines[i] = " " + lines[i].strip()
294 value = "\r\n".join(lines) + "\r\n"
295 line = key.title() + ": " + value
296 if prefix:
297 self._headers.insert(0, line)
298 else:
299 self._headers.append(line)
300
301 def flushheaders(self):
302 self._fp.writelines(self._headers)
303 self._headers = []
304
305 def startbody(self, ctype=None, plist=[], prefix=1,
306 add_to_http_hdrs=0, content_type=1):
307 """
308 prefix is ignored if add_to_http_hdrs is true.
309 """
310 if content_type and ctype:
311 for name, value in plist:
312 ctype = ctype + ';\r\n %s=%s' % (name, value)
313 self.addheader("Content-Type", ctype, prefix=prefix,
314 add_to_http_hdrs=add_to_http_hdrs)
315 self.flushheaders()
316 if not add_to_http_hdrs: self._fp.write("\r\n")
317 self._first_part = True
318 return self._fp
319
320 def startmultipartbody(self, subtype, boundary=None, plist=[], prefix=1,
321 add_to_http_hdrs=0, content_type=1):
322 boundary = boundary or choose_boundary()
323 self._boundary.append(boundary)
324 return self.startbody("multipart/" + subtype,
325 [("boundary", boundary)] + plist,
326 prefix=prefix,
327 add_to_http_hdrs=add_to_http_hdrs,
328 content_type=content_type)
329
330 def nextpart(self):
331 boundary = self._boundary[-1]
332 if self._first_part:
333 self._first_part = False
334 else:
335 self._fp.write("\r\n")
336 self._fp.write("--" + boundary + "\r\n")
337 return self.__class__(self._fp)
338
339 def lastpart(self):
340 if self._first_part:
341 self.nextpart()
342 boundary = self._boundary.pop()
343 self._fp.write("\r\n--" + boundary + "--\r\n")
344
345
346 class LocateError(ValueError): pass
347 class AmbiguityError(LocateError): pass
348 class ControlNotFoundError(LocateError): pass
349 class ItemNotFoundError(LocateError): pass
350
351 class ItemCountError(ValueError): pass
352
353 # for backwards compatibility, ParseError derives from exceptions that were
354 # raised by versions of ClientForm <= 0.2.5
355 # TODO: move to _html
356 class ParseError(sgmllib.SGMLParseError,
357 HTMLParser.HTMLParseError):
358
359 def __init__(self, *args, **kwds):
360 Exception.__init__(self, *args, **kwds)
361
362 def __str__(self):
363 return Exception.__str__(self)
364
365
366 class _AbstractFormParser:
367 """forms attribute contains HTMLForm instances on completion."""
368 # thanks to Moshe Zadka for an example of sgmllib/htmllib usage
369 def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING):
370 if entitydefs is None:
371 entitydefs = get_entitydefs()
372 self._entitydefs = entitydefs
373 self._encoding = encoding
374
375 self.base = None
376 self.forms = []
377 self.labels = []
378 self._current_label = None
379 self._current_form = None
380 self._select = None
381 self._optgroup = None
382 self._option = None
383 self._textarea = None
384
385 # forms[0] will contain all controls that are outside of any form
386 # self._global_form is an alias for self.forms[0]
387 self._global_form = None
388 self.start_form([])
389 self.end_form()
390 self._current_form = self._global_form = self.forms[0]
391
392 def do_base(self, attrs):
393 debug("%s", attrs)
394 for key, value in attrs:
395 if key == "href":
396 self.base = self.unescape_attr_if_required(value)
397
398 def end_body(self):
399 debug("")
400 if self._current_label is not None:
401 self.end_label()
402 if self._current_form is not self._global_form:
403 self.end_form()
404
405 def start_form(self, attrs):
406 debug("%s", attrs)
407 if self._current_form is not self._global_form:
408 raise ParseError("nested FORMs")
409 name = None
410 action = None
411 enctype = "application/x-www-form-urlencoded"
412 method = "GET"
413 d = {}
414 for key, value in attrs:
415 if key == "name":
416 name = self.unescape_attr_if_required(value)
417 elif key == "action":
418 action = self.unescape_attr_if_required(value)
419 elif key == "method":
420 method = self.unescape_attr_if_required(value.upper())
421 elif key == "enctype":
422 enctype = self.unescape_attr_if_required(value.lower())
423 d[key] = self.unescape_attr_if_required(value)
424 controls = []
425 self._current_form = (name, action, method, enctype), d, controls
426
427 def end_form(self):
428 debug("")
429 if self._current_label is not None:
430 self.end_label()
431 if self._current_form is self._global_form:
432 raise ParseError("end of FORM before start")
433 self.forms.append(self._current_form)
434 self._current_form = self._global_form
435
436 def start_select(self, attrs):
437 debug("%s", attrs)
438 if self._select is not None:
439 raise ParseError("nested SELECTs")
440 if self._textarea is not None:
441 raise ParseError("SELECT inside TEXTAREA")
442 d = {}
443 for key, val in attrs:
444 d[key] = self.unescape_attr_if_required(val)
445
446 self._select = d
447 self._add_label(d)
448
449 self._append_select_control({"__select": d})
450
451 def end_select(self):
452 debug("")
453 if self._select is None:
454 raise ParseError("end of SELECT before start")
455
456 if self._option is not None:
457 self._end_option()
458
459 self._select = None
460
461 def start_optgroup(self, attrs):
462 debug("%s", attrs)
463 if self._select is None:
464 raise ParseError("OPTGROUP outside of SELECT")
465 d = {}
466 for key, val in attrs:
467 d[key] = self.unescape_attr_if_required(val)
468
469 self._optgroup = d
470
471 def end_optgroup(self):
472 debug("")
473 if self._optgroup is None:
474 raise ParseError("end of OPTGROUP before start")
475 self._optgroup = None
476
477 def _start_option(self, attrs):
478 debug("%s", attrs)
479 if self._select is None:
480 raise ParseError("OPTION outside of SELECT")
481 if self._option is not None:
482 self._end_option()
483
484 d = {}
485 for key, val in attrs:
486 d[key] = self.unescape_attr_if_required(val)
487
488 self._option = {}
489 self._option.update(d)
490 if (self._optgroup and self._optgroup.has_key("disabled") and
491 not self._option.has_key("disabled")):
492 self._option["disabled"] = None
493
494 def _end_option(self):
495 debug("")
496 if self._option is None:
497 raise ParseError("end of OPTION before start")
498
499 contents = self._option.get("contents", "").strip()
500 self._option["contents"] = contents
501 if not self._option.has_key("value"):
502 self._option["value"] = contents
503 if not self._option.has_key("label"):
504 self._option["label"] = contents
505 # stuff dict of SELECT HTML attrs into a special private key
506 # (gets deleted again later)
507 self._option["__select"] = self._select
508 self._append_select_control(self._option)
509 self._option = None
510
511 def _append_select_control(self, attrs):
512 debug("%s", attrs)
513 controls = self._current_form[2]
514 name = self._select.get("name")
515 controls.append(("select", name, attrs))
516
517 def start_textarea(self, attrs):
518 debug("%s", attrs)
519 if self._textarea is not None:
520 raise ParseError("nested TEXTAREAs")
521 if self._select is not None:
522 raise ParseError("TEXTAREA inside SELECT")
523 d = {}
524 for key, val in attrs:
525 d[key] = self.unescape_attr_if_required(val)
526 self._add_label(d)
527
528 self._textarea = d
529
530 def end_textarea(self):
531 debug("")
532 if self._textarea is None:
533 raise ParseError("end of TEXTAREA before start")
534 controls = self._current_form[2]
535 name = self._textarea.get("name")
536 controls.append(("textarea", name, self._textarea))
537 self._textarea = None
538
539 def start_label(self, attrs):
540 debug("%s", attrs)
541 if self._current_label:
542 self.end_label()
543 d = {}
544 for key, val in attrs:
545 d[key] = self.unescape_attr_if_required(val)
546 taken = bool(d.get("for")) # empty id is invalid
547 d["__text"] = ""
548 d["__taken"] = taken
549 if taken:
550 self.labels.append(d)
551 self._current_label = d
552
553 def end_label(self):
554 debug("")
555 label = self._current_label
556 if label is None:
557 # something is ugly in the HTML, but we're ignoring it
558 return
559 self._current_label = None
560 # if it is staying around, it is True in all cases
561 del label["__taken"]
562
563 def _add_label(self, d):
564 #debug("%s", d)
565 if self._current_label is not None:
566 if not self._current_label["__taken"]:
567 self._current_label["__taken"] = True
568 d["__label"] = self._current_label
569
570 def handle_data(self, data):
571 debug("%s", data)
572
573 if self._option is not None:
574 # self._option is a dictionary of the OPTION element's HTML
575 # attributes, but it has two special keys, one of which is the
576 # special "contents" key contains text between OPTION tags (the
577 # other is the "__select" key: see the end_option method)
578 map = self._option
579 key = "contents"
580 elif self._textarea is not None:
581 map = self._textarea
582 key = "value"
583 data = normalize_line_endings(data)
584 # not if within option or textarea
585 elif self._current_label is not None:
586 map = self._current_label
587 key = "__text"
588 else:
589 return
590
591 if data and not map.has_key(key):
592 # according to
593 # http://www.w3.org/TR/html4/appendix/notes.html#h-B.3.1 line break
594 # immediately after start tags or immediately before end tags must
595 # be ignored, but real browsers only ignore a line break after a
596 # start tag, so we'll do that.
597 if data[0:2] == "\r\n":
598 data = data[2:]
599 elif data[0:1] in ["\n", "\r"]:
600 data = data[1:]
601 map[key] = data
602 else:
603 map[key] = map[key] + data
604
605 def do_button(self, attrs):
606 debug("%s", attrs)
607 d = {}
608 d["type"] = "submit" # default
609 for key, val in attrs:
610 d[key] = self.unescape_attr_if_required(val)
611 controls = self._current_form[2]
612
613 type = d["type"]
614 name = d.get("name")
615 # we don't want to lose information, so use a type string that
616 # doesn't clash with INPUT TYPE={SUBMIT,RESET,BUTTON}
617 # e.g. type for BUTTON/RESET is "resetbutton"
618 # (type for INPUT/RESET is "reset")
619 type = type+"button"
620 self._add_label(d)
621 controls.append((type, name, d))
622
623 def do_input(self, attrs):
624 debug("%s", attrs)
625 d = {}
626 d["type"] = "text" # default
627 for key, val in attrs:
628 d[key] = self.unescape_attr_if_required(val)
629 controls = self._current_form[2]
630
631 type = d["type"]
632 name = d.get("name")
633 self._add_label(d)
634 controls.append((type, name, d))
635
636 def do_isindex(self, attrs):
637 debug("%s", attrs)
638 d = {}
639 for key, val in attrs:
640 d[key] = self.unescape_attr_if_required(val)
641 controls = self._current_form[2]
642
643 self._add_label(d)
644 # isindex doesn't have type or name HTML attributes
645 controls.append(("isindex", None, d))
646
647 def handle_entityref(self, name):
648 #debug("%s", name)
649 self.handle_data(unescape(
650 '&%s;' % name, self._entitydefs, self._encoding))
651
652 def handle_charref(self, name):
653 #debug("%s", name)
654 self.handle_data(unescape_charref(name, self._encoding))
655
656 def unescape_attr(self, name):
657 #debug("%s", name)
658 return unescape(name, self._entitydefs, self._encoding)
659
660 def unescape_attrs(self, attrs):
661 #debug("%s", attrs)
662 escaped_attrs = {}
663 for key, val in attrs.items():
664 try:
665 val.items
666 except AttributeError:
667 escaped_attrs[key] = self.unescape_attr(val)
668 else:
669 # e.g. "__select" -- yuck!
670 escaped_attrs[key] = self.unescape_attrs(val)
671 return escaped_attrs
672
673 def unknown_entityref(self, ref): self.handle_data("&%s;" % ref)
674 def unknown_charref(self, ref): self.handle_data("&#%s;" % ref)
675
676
677 class XHTMLCompatibleFormParser(_AbstractFormParser, HTMLParser.HTMLParser):
678 """Good for XHTML, bad for tolerance of incorrect HTML."""
679 # thanks to Michael Howitz for this!
680 def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING):
681 HTMLParser.HTMLParser.__init__(self)
682 _AbstractFormParser.__init__(self, entitydefs, encoding)
683
684 def feed(self, data):
685 try:
686 HTMLParser.HTMLParser.feed(self, data)
687 except HTMLParser.HTMLParseError, exc:
688 raise ParseError(exc)
689
690 def start_option(self, attrs):
691 _AbstractFormParser._start_option(self, attrs)
692
693 def end_option(self):
694 _AbstractFormParser._end_option(self)
695
696 def handle_starttag(self, tag, attrs):
697 try:
698 method = getattr(self, "start_" + tag)
699 except AttributeError:
700 try:
701 method = getattr(self, "do_" + tag)
702 except AttributeError:
703 pass # unknown tag
704 else:
705 method(attrs)
706 else:
707 method(attrs)
708
709 def handle_endtag(self, tag):
710 try:
711 method = getattr(self, "end_" + tag)
712 except AttributeError:
713 pass # unknown tag
714 else:
715 method()
716
717 def unescape(self, name):
718 # Use the entitydefs passed into constructor, not
719 # HTMLParser.HTMLParser's entitydefs.
720 return self.unescape_attr(name)
721
722 def unescape_attr_if_required(self, name):
723 return name # HTMLParser.HTMLParser already did it
724 def unescape_attrs_if_required(self, attrs):
725 return attrs # ditto
726
727 def close(self):
728 HTMLParser.HTMLParser.close(self)
729 self.end_body()
730
731
732 class _AbstractSgmllibParser(_AbstractFormParser):
733
734 def do_option(self, attrs):
735 _AbstractFormParser._start_option(self, attrs)
736
737 # we override this attr to decode hex charrefs
738 entity_or_charref = re.compile(
739 '&(?:([a-zA-Z][-.a-zA-Z0-9]*)|#(x?[0-9a-fA-F]+))(;?)')
740 def convert_entityref(self, name):
741 return unescape("&%s;" % name, self._entitydefs, self._encoding)
742 def convert_charref(self, name):
743 return unescape_charref("%s" % name, self._encoding)
744 def unescape_attr_if_required(self, name):
745 return name # sgmllib already did it
746 def unescape_attrs_if_required(self, attrs):
747 return attrs # ditto
748
749
750 class FormParser(_AbstractSgmllibParser, _sgmllib_copy.SGMLParser):
751 """Good for tolerance of incorrect HTML, bad for XHTML."""
752 def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING):
753 _sgmllib_copy.SGMLParser.__init__(self)
754 _AbstractFormParser.__init__(self, entitydefs, encoding)
755
756 def feed(self, data):
757 try:
758 _sgmllib_copy.SGMLParser.feed(self, data)
759 except _sgmllib_copy.SGMLParseError, exc:
760 raise ParseError(exc)
761
762 def close(self):
763 _sgmllib_copy.SGMLParser.close(self)
764 self.end_body()
765
766
767 class _AbstractBSFormParser(_AbstractSgmllibParser):
768
769 bs_base_class = None
770
771 def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING):
772 _AbstractFormParser.__init__(self, entitydefs, encoding)
773 self.bs_base_class.__init__(self)
774
775 def handle_data(self, data):
776 _AbstractFormParser.handle_data(self, data)
777 self.bs_base_class.handle_data(self, data)
778
779 def feed(self, data):
780 try:
781 self.bs_base_class.feed(self, data)
782 except _sgmllib_copy.SGMLParseError, exc:
783 raise ParseError(exc)
784
785 def close(self):
786 self.bs_base_class.close(self)
787 self.end_body()
788
789
790 class RobustFormParser(_AbstractBSFormParser, _beautifulsoup.BeautifulSoup):
791
792 """Tries to be highly tolerant of incorrect HTML."""
793
794 bs_base_class = _beautifulsoup.BeautifulSoup
795
796
797 class NestingRobustFormParser(_AbstractBSFormParser,
798 _beautifulsoup.ICantBelieveItsBeautifulSoup):
799
800 """Tries to be highly tolerant of incorrect HTML.
801
802 Different from RobustFormParser in that it more often guesses nesting
803 above missing end tags (see BeautifulSoup docs).
804 """
805
806 bs_base_class = _beautifulsoup.ICantBelieveItsBeautifulSoup
807
808
809 #FormParser = XHTMLCompatibleFormParser # testing hack
810 #FormParser = RobustFormParser # testing hack
811
812
813 def ParseResponseEx(response,
814 select_default=False,
815 form_parser_class=FormParser,
816 request_class=_request.Request,
817 entitydefs=None,
818 encoding=DEFAULT_ENCODING,
819
820 # private
821 _urljoin=urlparse.urljoin,
822 _urlparse=urlparse.urlparse,
823 _urlunparse=urlparse.urlunparse,
824 ):
825 """Identical to ParseResponse, except that:
826
827 1. The returned list contains an extra item. The first form in the list
828 contains all controls not contained in any FORM element.
829
830 2. The arguments ignore_errors and backwards_compat have been removed.
831
832 3. Backwards-compatibility mode (backwards_compat=True) is not available.
833 """
834 return _ParseFileEx(response, response.geturl(),
835 select_default,
836 False,
837 form_parser_class,
838 request_class,
839 entitydefs,
840 False,
841 encoding,
842 _urljoin=_urljoin,
843 _urlparse=_urlparse,
844 _urlunparse=_urlunparse,
845 )
846
847 def ParseFileEx(file, base_uri,
848 select_default=False,
849 form_parser_class=FormParser,
850 request_class=_request.Request,
851 entitydefs=None,
852 encoding=DEFAULT_ENCODING,
853
854 # private
855 _urljoin=urlparse.urljoin,
856 _urlparse=urlparse.urlparse,
857 _urlunparse=urlparse.urlunparse,
858 ):
859 """Identical to ParseFile, except that:
860
861 1. The returned list contains an extra item. The first form in the list
862 contains all controls not contained in any FORM element.
863
864 2. The arguments ignore_errors and backwards_compat have been removed.
865
866 3. Backwards-compatibility mode (backwards_compat=True) is not available.
867 """
868 return _ParseFileEx(file, base_uri,
869 select_default,
870 False,
871 form_parser_class,
872 request_class,
873 entitydefs,
874 False,
875 encoding,
876 _urljoin=_urljoin,
877 _urlparse=_urlparse,
878 _urlunparse=_urlunparse,
879 )
880
881 def ParseString(text, base_uri, *args, **kwds):
882 fh = StringIO(text)
883 return ParseFileEx(fh, base_uri, *args, **kwds)
884
885 def ParseResponse(response, *args, **kwds):
886 """Parse HTTP response and return a list of HTMLForm instances.
887
888 The return value of mechanize.urlopen can be conveniently passed to this
889 function as the response parameter.
890
891 mechanize.ParseError is raised on parse errors.
892
893 response: file-like object (supporting read() method) with a method
894 geturl(), returning the URI of the HTTP response
895 select_default: for multiple-selection SELECT controls and RADIO controls,
896 pick the first item as the default if none are selected in the HTML
897 form_parser_class: class to instantiate and use to pass
898 request_class: class to return from .click() method (default is
899 mechanize.Request)
900 entitydefs: mapping like {"&amp;": "&", ...} containing HTML entity
901 definitions (a sensible default is used)
902 encoding: character encoding used for encoding numeric character references
903 when matching link text. mechanize does not attempt to find the encoding
904 in a META HTTP-EQUIV attribute in the document itself (mechanize, for
905 example, does do that and will pass the correct value to mechanize using
906 this parameter).
907
908 backwards_compat: boolean that determines whether the returned HTMLForm
909 objects are backwards-compatible with old code. If backwards_compat is
910 true:
911
912 - ClientForm 0.1 code will continue to work as before.
913
914 - Label searches that do not specify a nr (number or count) will always
915 get the first match, even if other controls match. If
916 backwards_compat is False, label searches that have ambiguous results
917 will raise an AmbiguityError.
918
919 - Item label matching is done by strict string comparison rather than
920 substring matching.
921
922 - De-selecting individual list items is allowed even if the Item is
923 disabled.
924
925 The backwards_compat argument will be removed in a future release.
926
927 Pass a true value for select_default if you want the behaviour specified by
928 RFC 1866 (the HTML 2.0 standard), which is to select the first item in a
929 RADIO or multiple-selection SELECT control if none were selected in the
930 HTML. Most browsers (including Microsoft Internet Explorer (IE) and
931 Netscape Navigator) instead leave all items unselected in these cases. The
932 W3C HTML 4.0 standard leaves this behaviour undefined in the case of
933 multiple-selection SELECT controls, but insists that at least one RADIO
934 button should be checked at all times, in contradiction to browser
935 behaviour.
936
937 There is a choice of parsers. mechanize.XHTMLCompatibleFormParser (uses
938 HTMLParser.HTMLParser) works best for XHTML, mechanize.FormParser (uses
939 bundled copy of sgmllib.SGMLParser) (the default) works better for ordinary
940 grubby HTML. Note that HTMLParser is only available in Python 2.2 and
941 later. You can pass your own class in here as a hack to work around bad
942 HTML, but at your own risk: there is no well-defined interface.
943
944 """
945 return _ParseFileEx(response, response.geturl(), *args, **kwds)[1:]
946
947 def ParseFile(file, base_uri, *args, **kwds):
948 """Parse HTML and return a list of HTMLForm instances.
949
950 mechanize.ParseError is raised on parse errors.
951
952 file: file-like object (supporting read() method) containing HTML with zero
953 or more forms to be parsed
954 base_uri: the URI of the document (note that the base URI used to submit
955 the form will be that given in the BASE element if present, not that of
956 the document)
957
958 For the other arguments and further details, see ParseResponse.__doc__.
959
960 """
961 return _ParseFileEx(file, base_uri, *args, **kwds)[1:]
962
963 def _ParseFileEx(file, base_uri,
964 select_default=False,
965 ignore_errors=False,
966 form_parser_class=FormParser,
967 request_class=_request.Request,
968 entitydefs=None,
969 backwards_compat=True,
970 encoding=DEFAULT_ENCODING,
971 _urljoin=urlparse.urljoin,
972 _urlparse=urlparse.urlparse,
973 _urlunparse=urlparse.urlunparse,
974 ):
975 if backwards_compat:
976 deprecation("operating in backwards-compatibility mode", 1)
977 fp = form_parser_class(entitydefs, encoding)
978 while 1:
979 data = file.read(CHUNK)
980 try:
981 fp.feed(data)
982 except ParseError, e:
983 e.base_uri = base_uri
984 raise
985 if len(data) != CHUNK: break
986 fp.close()
987 if fp.base is not None:
988 # HTML BASE element takes precedence over document URI
989 base_uri = fp.base
990 labels = [] # Label(label) for label in fp.labels]
991 id_to_labels = {}
992 for l in fp.labels:
993 label = Label(l)
994 labels.append(label)
995 for_id = l["for"]
996 coll = id_to_labels.get(for_id)
997 if coll is None:
998 id_to_labels[for_id] = [label]
999 else:
1000 coll.append(label)
1001 forms = []
1002 for (name, action, method, enctype), attrs, controls in fp.forms:
1003 if action is None:
1004 action = base_uri
1005 else:
1006 action = _urljoin(base_uri, action)
1007 # would be nice to make HTMLForm class (form builder) pluggable
1008 form = HTMLForm(
1009 action, method, enctype, name, attrs, request_class,
1010 forms, labels, id_to_labels, backwards_compat)
1011 form._urlparse = _urlparse
1012 form._urlunparse = _urlunparse
1013 for ii in range(len(controls)):
1014 type, name, attrs = controls[ii]
1015 # index=ii*10 allows ImageControl to return multiple ordered pairs
1016 form.new_control(
1017 type, name, attrs, select_default=select_default, index=ii*10)
1018 forms.append(form)
1019 for form in forms:
1020 form.fixup()
1021 return forms
1022
1023
1024 class Label:
1025 def __init__(self, attrs):
1026 self.id = attrs.get("for")
1027 self._text = attrs.get("__text").strip()
1028 self._ctext = compress_text(self._text)
1029 self.attrs = attrs
1030 self._backwards_compat = False # maintained by HTMLForm
1031
1032 def __getattr__(self, name):
1033 if name == "text":
1034 if self._backwards_compat:
1035 return self._text
1036 else:
1037 return self._ctext
1038 return getattr(Label, name)
1039
1040 def __setattr__(self, name, value):
1041 if name == "text":
1042 # don't see any need for this, so make it read-only
1043 raise AttributeError("text attribute is read-only")
1044 self.__dict__[name] = value
1045
1046 def __str__(self):
1047 return "<Label(id=%r, text=%r)>" % (self.id, self.text)
1048
1049
1050 def _get_label(attrs):
1051 text = attrs.get("__label")
1052 if text is not None:
1053 return Label(text)
1054 else:
1055 return None
1056
1057 class Control:
1058 """An HTML form control.
1059
1060 An HTMLForm contains a sequence of Controls. The Controls in an HTMLForm
1061 are accessed using the HTMLForm.find_control method or the
1062 HTMLForm.controls attribute.
1063
1064 Control instances are usually constructed using the ParseFile /
1065 ParseResponse functions. If you use those functions, you can ignore the
1066 rest of this paragraph. A Control is only properly initialised after the
1067 fixup method has been called. In fact, this is only strictly necessary for
1068 ListControl instances. This is necessary because ListControls are built up
1069 from ListControls each containing only a single item, and their initial
1070 value(s) can only be known after the sequence is complete.
1071
1072 The types and values that are acceptable for assignment to the value
1073 attribute are defined by subclasses.
1074
1075 If the disabled attribute is true, this represents the state typically
1076 represented by browsers by 'greying out' a control. If the disabled
1077 attribute is true, the Control will raise AttributeError if an attempt is
1078 made to change its value. In addition, the control will not be considered
1079 'successful' as defined by the W3C HTML 4 standard -- ie. it will
1080 contribute no data to the return value of the HTMLForm.click* methods. To
1081 enable a control, set the disabled attribute to a false value.
1082
1083 If the readonly attribute is true, the Control will raise AttributeError if
1084 an attempt is made to change its value. To make a control writable, set
1085 the readonly attribute to a false value.
1086
1087 All controls have the disabled and readonly attributes, not only those that
1088 may have the HTML attributes of the same names.
1089
1090 On assignment to the value attribute, the following exceptions are raised:
1091 TypeError, AttributeError (if the value attribute should not be assigned
1092 to, because the control is disabled, for example) and ValueError.
1093
1094 If the name or value attributes are None, or the value is an empty list, or
1095 if the control is disabled, the control is not successful.
1096
1097 Public attributes:
1098
1099 type: string describing type of control (see the keys of the
1100 HTMLForm.type2class dictionary for the allowable values) (readonly)
1101 name: name of control (readonly)
1102 value: current value of control (subclasses may allow a single value, a
1103 sequence of values, or either)
1104 disabled: disabled state
1105 readonly: readonly state
1106 id: value of id HTML attribute
1107
1108 """
1109 def __init__(self, type, name, attrs, index=None):
1110 """
1111 type: string describing type of control (see the keys of the
1112 HTMLForm.type2class dictionary for the allowable values)
1113 name: control name
1114 attrs: HTML attributes of control's HTML element
1115
1116 """
1117 raise NotImplementedError()
1118
1119 def add_to_form(self, form):
1120 self._form = form
1121 form.controls.append(self)
1122
1123 def fixup(self):
1124 pass
1125
1126 def is_of_kind(self, kind):
1127 raise NotImplementedError()
1128
1129 def clear(self):
1130 raise NotImplementedError()
1131
1132 def __getattr__(self, name): raise NotImplementedError()
1133 def __setattr__(self, name, value): raise NotImplementedError()
1134
1135 def pairs(self):
1136 """Return list of (key, value) pairs suitable for passing to urlencode.
1137 """
1138 return [(k, v) for (i, k, v) in self._totally_ordered_pairs()]
1139
1140 def _totally_ordered_pairs(self):
1141 """Return list of (key, value, index) tuples.
1142
1143 Like pairs, but allows preserving correct ordering even where several
1144 controls are involved.
1145
1146 """
1147 raise NotImplementedError()
1148
1149 def _write_mime_data(self, mw, name, value):
1150 """Write data for a subitem of this control to a MimeWriter."""
1151 # called by HTMLForm
1152 mw2 = mw.nextpart()
1153 mw2.addheader("Content-Disposition",
1154 'form-data; name="%s"' % name, 1)
1155 f = mw2.startbody(prefix=0)
1156 f.write(value)
1157
1158 def __str__(self):
1159 raise NotImplementedError()
1160
1161 def get_labels(self):
1162 """Return all labels (Label instances) for this control.
1163
1164 If the control was surrounded by a <label> tag, that will be the first
1165 label; all other labels, connected by 'for' and 'id', are in the order
1166 that appear in the HTML.
1167
1168 """
1169 res = []
1170 if self._label:
1171 res.append(self._label)
1172 if self.id:
1173 res.extend(self._form._id_to_labels.get(self.id, ()))
1174 return res
1175
1176
1177 #---------------------------------------------------
1178 class ScalarControl(Control):
1179 """Control whose value is not restricted to one of a prescribed set.
1180
1181 Some ScalarControls don't accept any value attribute. Otherwise, takes a
1182 single value, which must be string-like.
1183
1184 Additional read-only public attribute:
1185
1186 attrs: dictionary mapping the names of original HTML attributes of the
1187 control to their values
1188
1189 """
1190 def __init__(self, type, name, attrs, index=None):
1191 self._index = index
1192 self._label = _get_label(attrs)
1193 self.__dict__["type"] = type.lower()
1194 self.__dict__["name"] = name
1195 self._value = attrs.get("value")
1196 self.disabled = attrs.has_key("disabled")
1197 self.readonly = attrs.has_key("readonly")
1198 self.id = attrs.get("id")
1199
1200 self.attrs = attrs.copy()
1201
1202 self._clicked = False
1203
1204 self._urlparse = urlparse.urlparse
1205 self._urlunparse = urlparse.urlunparse
1206
1207 def __getattr__(self, name):
1208 if name == "value":
1209 return self.__dict__["_value"]
1210 else:
1211 raise AttributeError("%s instance has no attribute '%s'" %
1212 (self.__class__.__name__, name))
1213
1214 def __setattr__(self, name, value):
1215 if name == "value":
1216 if not isstringlike(value):
1217 raise TypeError("must assign a string")
1218 elif self.readonly:
1219 raise AttributeError("control '%s' is readonly" % self.name)
1220 elif self.disabled:
1221 raise AttributeError("control '%s' is disabled" % self.name)
1222 self.__dict__["_value"] = value
1223 elif name in ("name", "type"):
1224 raise AttributeError("%s attribute is readonly" % name)
1225 else:
1226 self.__dict__[name] = value
1227
1228 def _totally_ordered_pairs(self):
1229 name = self.name
1230 value = self.value
1231 if name is None or value is None or self.disabled:
1232 return []
1233 return [(self._index, name, value)]
1234
1235 def clear(self):
1236 if self.readonly:
1237 raise AttributeError("control '%s' is readonly" % self.name)
1238 self.__dict__["_value"] = None
1239
1240 def __str__(self):
1241 name = self.name
1242 value = self.value
1243 if name is None: name = "<None>"
1244 if value is None: value = "<None>"
1245
1246 infos = []
1247 if self.disabled: infos.append("disabled")
1248 if self.readonly: infos.append("readonly")
1249 info = ", ".join(infos)
1250 if info: info = " (%s)" % info
1251
1252 return "<%s(%s=%s)%s>" % (self.__class__.__name__, name, value, info)
1253
1254
1255 #---------------------------------------------------
1256 class TextControl(ScalarControl):
1257 """Textual input control.
1258
1259 Covers:
1260
1261 INPUT/TEXT
1262 INPUT/PASSWORD
1263 INPUT/HIDDEN
1264 TEXTAREA
1265
1266 """
1267 def __init__(self, type, name, attrs, index=None):
1268 ScalarControl.__init__(self, type, name, attrs, index)
1269 if self.type == "hidden": self.readonly = True
1270 if self._value is None:
1271 self._value = ""
1272
1273 def is_of_kind(self, kind): return kind == "text"
1274
1275 #---------------------------------------------------
1276 class FileControl(ScalarControl):
1277 """File upload with INPUT TYPE=FILE.
1278
1279 The value attribute of a FileControl is always None. Use add_file instead.
1280
1281 Additional public method: add_file
1282
1283 """
1284
1285 def __init__(self, type, name, attrs, index=None):
1286 ScalarControl.__init__(self, type, name, attrs, index)
1287 self._value = None
1288 self._upload_data = []
1289
1290 def is_of_kind(self, kind): return kind == "file"
1291
1292 def clear(self):
1293 if self.readonly:
1294 raise AttributeError("control '%s' is readonly" % self.name)
1295 self._upload_data = []
1296
1297 def __setattr__(self, name, value):
1298 if name in ("value", "name", "type"):
1299 raise AttributeError("%s attribute is readonly" % name)
1300 else:
1301 self.__dict__[name] = value
1302
1303 def add_file(self, file_object, content_type=None, filename=None):
1304 if not hasattr(file_object, "read"):
1305 raise TypeError("file-like object must have read method")
1306 if content_type is not None and not isstringlike(content_type):
1307 raise TypeError("content type must be None or string-like")
1308 if filename is not None and not isstringlike(filename):
1309 raise TypeError("filename must be None or string-like")
1310 if content_type is None:
1311 content_type = "application/octet-stream"
1312 self._upload_data.append((file_object, content_type, filename))
1313
1314 def _totally_ordered_pairs(self):
1315 # XXX should it be successful even if unnamed?
1316 if self.name is None or self.disabled:
1317 return []
1318 return [(self._index, self.name, "")]
1319
1320 # If enctype is application/x-www-form-urlencoded and there's a FILE
1321 # control present, what should be sent? Strictly, it should be 'name=data'
1322 # (see HTML 4.01 spec., section 17.13.2), but code sends "name=" ATM. What
1323 # about multiple file upload?
1324 def _write_mime_data(self, mw, _name, _value):
1325 # called by HTMLForm
1326 # assert _name == self.name and _value == ''
1327 if len(self._upload_data) < 2:
1328 if len(self._upload_data) == 0:
1329 file_object = StringIO()
1330 content_type = "application/octet-stream"
1331 filename = ""
1332 else:
1333 file_object, content_type, filename = self._upload_data[0]
1334 if filename is None:
1335 filename = ""
1336 mw2 = mw.nextpart()
1337 fn_part = '; filename="%s"' % filename
1338 disp = 'form-data; name="%s"%s' % (self.name, fn_part)
1339 mw2.addheader("Content-Disposition", disp, prefix=1)
1340 fh = mw2.startbody(content_type, prefix=0)
1341 fh.write(file_object.read())
1342 else:
1343 # multiple files
1344 mw2 = mw.nextpart()
1345 disp = 'form-data; name="%s"' % self.name
1346 mw2.addheader("Content-Disposition", disp, prefix=1)
1347 fh = mw2.startmultipartbody("mixed", prefix=0)
1348 for file_object, content_type, filename in self._upload_data:
1349 mw3 = mw2.nextpart()
1350 if filename is None:
1351 filename = ""
1352 fn_part = '; filename="%s"' % filename
1353 disp = "file%s" % fn_part
1354 mw3.addheader("Content-Disposition", disp, prefix=1)
1355 fh2 = mw3.startbody(content_type, prefix=0)
1356 fh2.write(file_object.read())
1357 mw2.lastpart()
1358
1359 def __str__(self):
1360 name = self.name
1361 if name is None: name = "<None>"
1362
1363 if not self._upload_data:
1364 value = "<No files added>"
1365 else:
1366 value = []
1367 for file, ctype, filename in self._upload_data:
1368 if filename is None:
1369 value.append("<Unnamed file>")
1370 else:
1371 value.append(filename)
1372 value = ", ".join(value)
1373
1374 info = []
1375 if self.disabled: info.append("disabled")
1376 if self.readonly: info.append("readonly")
1377 info = ", ".join(info)
1378 if info: info = " (%s)" % info
1379
1380 return "<%s(%s=%s)%s>" % (self.__class__.__name__, name, value, info)
1381
1382
1383 #---------------------------------------------------
1384 class IsindexControl(ScalarControl):
1385 """ISINDEX control.
1386
1387 ISINDEX is the odd-one-out of HTML form controls. In fact, it isn't really
1388 part of regular HTML forms at all, and predates it. You're only allowed
1389 one ISINDEX per HTML document. ISINDEX and regular form submission are
1390 mutually exclusive -- either submit a form, or the ISINDEX.
1391
1392 Having said this, since ISINDEX controls may appear in forms (which is
1393 probably bad HTML), ParseFile / ParseResponse will include them in the
1394 HTMLForm instances it returns. You can set the ISINDEX's value, as with
1395 any other control (but note that ISINDEX controls have no name, so you'll
1396 need to use the type argument of set_value!). When you submit the form,
1397 the ISINDEX will not be successful (ie., no data will get returned to the
1398 server as a result of its presence), unless you click on the ISINDEX
1399 control, in which case the ISINDEX gets submitted instead of the form:
1400
1401 form.set_value("my isindex value", type="isindex")
1402 mechanize.urlopen(form.click(type="isindex"))
1403
1404 ISINDEX elements outside of FORMs are ignored. If you want to submit one
1405 by hand, do it like so:
1406
1407 url = urlparse.urljoin(page_uri, "?"+urllib.quote_plus("my isindex value"))
1408 result = mechanize.urlopen(url)
1409
1410 """
1411 def __init__(self, type, name, attrs, index=None):
1412 ScalarControl.__init__(self, type, name, attrs, index)
1413 if self._value is None:
1414 self._value = ""
1415
1416 def is_of_kind(self, kind): return kind in ["text", "clickable"]
1417
1418 def _totally_ordered_pairs(self):
1419 return []
1420
1421 def _click(self, form, coord, return_type, request_class=_request.Request):
1422 # Relative URL for ISINDEX submission: instead of "foo=bar+baz",
1423 # want "bar+baz".
1424 # This doesn't seem to be specified in HTML 4.01 spec. (ISINDEX is
1425 # deprecated in 4.01, but it should still say how to submit it).
1426 # Submission of ISINDEX is explained in the HTML 3.2 spec, though.
1427 parts = self._urlparse(form.action)
1428 rest, (query, frag) = parts[:-2], parts[-2:]
1429 parts = rest + (urllib.quote_plus(self.value), None)
1430 url = self._urlunparse(parts)
1431 req_data = url, None, []
1432
1433 if return_type == "pairs":
1434 return []
1435 elif return_type == "request_data":
1436 return req_data
1437 else:
1438 return request_class(url)
1439
1440 def __str__(self):
1441 value = self.value
1442 if value is None: value = "<None>"
1443
1444 infos = []
1445 if self.disabled: infos.append("disabled")
1446 if self.readonly: infos.append("readonly")
1447 info = ", ".join(infos)
1448 if info: info = " (%s)" % info
1449
1450 return "<%s(%s)%s>" % (self.__class__.__name__, value, info)
1451
1452
1453 #---------------------------------------------------
1454 class IgnoreControl(ScalarControl):
1455 """Control that we're not interested in.
1456
1457 Covers:
1458
1459 INPUT/RESET
1460 BUTTON/RESET
1461 INPUT/BUTTON
1462 BUTTON/BUTTON
1463
1464 These controls are always unsuccessful, in the terminology of HTML 4 (ie.
1465 they never require any information to be returned to the server).
1466
1467 BUTTON/BUTTON is used to generate events for script embedded in HTML.
1468
1469 The value attribute of IgnoreControl is always None.
1470
1471 """
1472 def __init__(self, type, name, attrs, index=None):
1473 ScalarControl.__init__(self, type, name, attrs, index)
1474 self._value = None
1475
1476 def is_of_kind(self, kind): return False
1477
1478 def __setattr__(self, name, value):
1479 if name == "value":
1480 raise AttributeError(
1481 "control '%s' is ignored, hence read-only" % self.name)
1482 elif name in ("name", "type"):
1483 raise AttributeError("%s attribute is readonly" % name)
1484 else:
1485 self.__dict__[name] = value
1486
1487
1488 #---------------------------------------------------
1489 # ListControls
1490
1491 # helpers and subsidiary classes
1492
1493 class Item:
1494 def __init__(self, control, attrs, index=None):
1495 label = _get_label(attrs)
1496 self.__dict__.update({
1497 "name": attrs["value"],
1498 "_labels": label and [label] or [],
1499 "attrs": attrs,
1500 "_control": control,
1501 "disabled": attrs.has_key("disabled"),
1502 "_selected": False,
1503 "id": attrs.get("id"),
1504 "_index": index,
1505 })
1506 control.items.append(self)
1507
1508 def get_labels(self):
1509 """Return all labels (Label instances) for this item.
1510
1511 For items that represent radio buttons or checkboxes, if the item was
1512 surrounded by a <label> tag, that will be the first label; all other
1513 labels, connected by 'for' and 'id', are in the order that appear in
1514 the HTML.
1515
1516 For items that represent select options, if the option had a label
1517 attribute, that will be the first label. If the option has contents
1518 (text within the option tags) and it is not the same as the label
1519 attribute (if any), that will be a label. There is nothing in the
1520 spec to my knowledge that makes an option with an id unable to be the
1521 target of a label's for attribute, so those are included, if any, for
1522 the sake of consistency and completeness.
1523
1524 """
1525 res = []
1526 res.extend(self._labels)
1527 if self.id:
1528 res.extend(self._control._form._id_to_labels.get(self.id, ()))
1529 return res
1530
1531 def __getattr__(self, name):
1532 if name=="selected":
1533 return self._selected
1534 raise AttributeError(name)
1535
1536 def __setattr__(self, name, value):
1537 if name == "selected":
1538 self._control._set_selected_state(self, value)
1539 elif name == "disabled":
1540 self.__dict__["disabled"] = bool(value)
1541 else:
1542 raise AttributeError(name)
1543
1544 def __str__(self):
1545 res = self.name
1546 if self.selected:
1547 res = "*" + res
1548 if self.disabled:
1549 res = "(%s)" % res
1550 return res
1551
1552 def __repr__(self):
1553 # XXX appending the attrs without distinguishing them from name and id
1554 # is silly
1555 attrs = [("name", self.name), ("id", self.id)]+self.attrs.items()
1556 return "<%s %s>" % (
1557 self.__class__.__name__,
1558 " ".join(["%s=%r" % (k, v) for k, v in attrs])
1559 )
1560
1561 def disambiguate(items, nr, **kwds):
1562 msgs = []
1563 for key, value in kwds.items():
1564 msgs.append("%s=%r" % (key, value))
1565 msg = " ".join(msgs)
1566 if not items:
1567 raise ItemNotFoundError(msg)
1568 if nr is None:
1569 if len(items) > 1:
1570 raise AmbiguityError(msg)
1571 nr = 0
1572 if len(items) <= nr:
1573 raise ItemNotFoundError(msg)
1574 return items[nr]
1575
1576 class ListControl(Control):
1577 """Control representing a sequence of items.
1578
1579 The value attribute of a ListControl represents the successful list items
1580 in the control. The successful list items are those that are selected and
1581 not disabled.
1582
1583 ListControl implements both list controls that take a length-1 value
1584 (single-selection) and those that take length >1 values
1585 (multiple-selection).
1586
1587 ListControls accept sequence values only. Some controls only accept
1588 sequences of length 0 or 1 (RADIO, and single-selection SELECT).
1589 In those cases, ItemCountError is raised if len(sequence) > 1. CHECKBOXes
1590 and multiple-selection SELECTs (those having the "multiple" HTML attribute)
1591 accept sequences of any length.
1592
1593 Note the following mistake:
1594
1595 control.value = some_value
1596 assert control.value == some_value # not necessarily true
1597
1598 The reason for this is that the value attribute always gives the list items
1599 in the order they were listed in the HTML.
1600
1601 ListControl items can also be referred to by their labels instead of names.
1602 Use the label argument to .get(), and the .set_value_by_label(),
1603 .get_value_by_label() methods.
1604
1605 Note that, rather confusingly, though SELECT controls are represented in
1606 HTML by SELECT elements (which contain OPTION elements, representing
1607 individual list items), CHECKBOXes and RADIOs are not represented by *any*
1608 element. Instead, those controls are represented by a collection of INPUT
1609 elements. For example, this is a SELECT control, named "control1":
1610
1611 <select name="control1">
1612 <option>foo</option>
1613 <option value="1">bar</option>
1614 </select>
1615
1616 and this is a CHECKBOX control, named "control2":
1617
1618 <input type="checkbox" name="control2" value="foo" id="cbe1">
1619 <input type="checkbox" name="control2" value="bar" id="cbe2">
1620
1621 The id attribute of a CHECKBOX or RADIO ListControl is always that of its
1622 first element (for example, "cbe1" above).
1623
1624
1625 Additional read-only public attribute: multiple.
1626
1627 """
1628
1629 # ListControls are built up by the parser from their component items by
1630 # creating one ListControl per item, consolidating them into a single
1631 # master ListControl held by the HTMLForm:
1632
1633 # -User calls form.new_control(...)
1634 # -Form creates Control, and calls control.add_to_form(self).
1635 # -Control looks for a Control with the same name and type in the form,
1636 # and if it finds one, merges itself with that control by calling
1637 # control.merge_control(self). The first Control added to the form, of
1638 # a particular name and type, is the only one that survives in the
1639 # form.
1640 # -Form calls control.fixup for all its controls. ListControls in the
1641 # form know they can now safely pick their default values.
1642
1643 # To create a ListControl without an HTMLForm, use:
1644
1645 # control.merge_control(new_control)
1646
1647 # (actually, it's much easier just to use ParseFile)
1648
1649 _label = None
1650
1651 def __init__(self, type, name, attrs={}, select_default=False,
1652 called_as_base_class=False, index=None):
1653 """
1654 select_default: for RADIO and multiple-selection SELECT controls, pick
1655 the first item as the default if no 'selected' HTML attribute is
1656 present
1657
1658 """
1659 if not called_as_base_class:
1660 raise NotImplementedError()
1661
1662 self.__dict__["type"] = type.lower()
1663 self.__dict__["name"] = name
1664 self._value = attrs.get("value")
1665 self.disabled = False
1666 self.readonly = False
1667 self.id = attrs.get("id")
1668 self._closed = False
1669
1670 # As Controls are merged in with .merge_control(), self.attrs will
1671 # refer to each Control in turn -- always the most recently merged
1672 # control. Each merged-in Control instance corresponds to a single
1673 # list item: see ListControl.__doc__.
1674 self.items = []
1675 self._form = None
1676
1677 self._select_default = select_default
1678 self._clicked = False
1679
1680 def clear(self):
1681 self.value = []
1682
1683 def is_of_kind(self, kind):
1684 if kind == "list":
1685 return True
1686 elif kind == "multilist":
1687 return bool(self.multiple)
1688 elif kind == "singlelist":
1689 return not self.multiple
1690 else:
1691 return False
1692
1693 def get_items(self, name=None, label=None, id=None,
1694 exclude_disabled=False):
1695 """Return matching items by name or label.
1696
1697 For argument docs, see the docstring for .get()
1698
1699 """
1700 if name is not None and not isstringlike(name):
1701 raise TypeError("item name must be string-like")
1702 if label is not None and not isstringlike(label):
1703 raise TypeError("item label must be string-like")
1704 if id is not None and not isstringlike(id):
1705 raise TypeError("item id must be string-like")
1706 items = [] # order is important
1707 compat = self._form.backwards_compat
1708 for o in self.items:
1709 if exclude_disabled and o.disabled:
1710 continue
1711 if name is not None and o.name != name:
1712 continue
1713 if label is not None:
1714 for l in o.get_labels():
1715 if ((compat and l.text == label) or
1716 (not compat and l.text.find(label) > -1)):
1717 break
1718 else:
1719 continue
1720 if id is not None and o.id != id:
1721 continue
1722 items.append(o)
1723 return items
1724
1725 def get(self, name=None, label=None, id=None, nr=None,
1726 exclude_disabled=False):
1727 """Return item by name or label, disambiguating if necessary with nr.
1728
1729 All arguments must be passed by name, with the exception of 'name',
1730 which may be used as a positional argument.
1731
1732 If name is specified, then the item must have the indicated name.
1733
1734 If label is specified, then the item must have a label whose
1735 whitespace-compressed, stripped, text substring-matches the indicated
1736 label string (e.g. label="please choose" will match
1737 " Do please choose an item ").
1738
1739 If id is specified, then the item must have the indicated id.
1740
1741 nr is an optional 0-based index of the items matching the query.
1742
1743 If nr is the default None value and more than item is found, raises
1744 AmbiguityError (unless the HTMLForm instance's backwards_compat
1745 attribute is true).
1746
1747 If no item is found, or if items are found but nr is specified and not
1748 found, raises ItemNotFoundError.
1749
1750 Optionally excludes disabled items.
1751
1752 """
1753 if nr is None and self._form.backwards_compat:
1754 nr = 0 # :-/
1755 items = self.get_items(name, label, id, exclude_disabled)
1756 return disambiguate(items, nr, name=name, label=label, id=id)
1757
1758 def _get(self, name, by_label=False, nr=None, exclude_disabled=False):
1759 # strictly for use by deprecated methods
1760 if by_label:
1761 name, label = None, name
1762 else:
1763 name, label = name, None
1764 return self.get(name, label, nr, exclude_disabled)
1765
1766 def toggle(self, name, by_label=False, nr=None):
1767 """Deprecated: given a name or label and optional disambiguating index
1768 nr, toggle the matching item's selection.
1769
1770 Selecting items follows the behavior described in the docstring of the
1771 'get' method.
1772
1773 if the item is disabled, or this control is disabled or readonly,
1774 raise AttributeError.
1775
1776 """
1777 deprecation(
1778 "item = control.get(...); item.selected = not item.selected")
1779 o = self._get(name, by_label, nr)
1780 self._set_selected_state(o, not o.selected)
1781
1782 def set(self, selected, name, by_label=False, nr=None):
1783 """Deprecated: given a name or label and optional disambiguating index
1784 nr, set the matching item's selection to the bool value of selected.
1785
1786 Selecting items follows the behavior described in the docstring of the
1787 'get' method.
1788
1789 if the item is disabled, or this control is disabled or readonly,
1790 raise AttributeError.
1791
1792 """
1793 deprecation(
1794 "control.get(...).selected = <boolean>")
1795 self._set_selected_state(self._get(name, by_label, nr), selected)
1796
1797 def _set_selected_state(self, item, action):
1798 # action:
1799 # bool False: off
1800 # bool True: on
1801 if self.disabled:
1802 raise AttributeError("control '%s' is disabled" % self.name)
1803 if self.readonly:
1804 raise AttributeError("control '%s' is readonly" % self.name)
1805 action == bool(action)
1806 compat = self._form.backwards_compat
1807 if not compat and item.disabled:
1808 raise AttributeError("item is disabled")
1809 else:
1810 if compat and item.disabled and action:
1811 raise AttributeError("item is disabled")
1812 if self.multiple:
1813 item.__dict__["_selected"] = action
1814 else:
1815 if not action:
1816 item.__dict__["_selected"] = False
1817 else:
1818 for o in self.items:
1819 o.__dict__["_selected"] = False
1820 item.__dict__["_selected"] = True
1821
1822 def toggle_single(self, by_label=None):
1823 """Deprecated: toggle the selection of the single item in this control.
1824
1825 Raises ItemCountError if the control does not contain only one item.
1826
1827 by_label argument is ignored, and included only for backwards
1828 compatibility.
1829
1830 """
1831 deprecation(
1832 "control.items[0].selected = not control.items[0].selected")
1833 if len(self.items) != 1:
1834 raise ItemCountError(
1835 "'%s' is not a single-item control" % self.name)
1836 item = self.items[0]
1837 self._set_selected_state(item, not item.selected)
1838
1839 def set_single(self, selected, by_label=None):
1840 """Deprecated: set the selection of the single item in this control.
1841
1842 Raises ItemCountError if the control does not contain only one item.
1843
1844 by_label argument is ignored, and included only for backwards
1845 compatibility.
1846
1847 """
1848 deprecation(
1849 "control.items[0].selected = <boolean>")
1850 if len(self.items) != 1:
1851 raise ItemCountError(
1852 "'%s' is not a single-item control" % self.name)
1853 self._set_selected_state(self.items[0], selected)
1854
1855 def get_item_disabled(self, name, by_label=False, nr=None):
1856 """Get disabled state of named list item in a ListControl."""
1857 deprecation(
1858 "control.get(...).disabled")
1859 return self._get(name, by_label, nr).disabled
1860
1861 def set_item_disabled(self, disabled, name, by_label=False, nr=None):
1862 """Set disabled state of named list item in a ListControl.
1863
1864 disabled: boolean disabled state
1865
1866 """
1867 deprecation(
1868 "control.get(...).disabled = <boolean>")
1869 self._get(name, by_label, nr).disabled = disabled
1870
1871 def set_all_items_disabled(self, disabled):
1872 """Set disabled state of all list items in a ListControl.
1873
1874 disabled: boolean disabled state
1875
1876 """
1877 for o in self.items:
1878 o.disabled = disabled
1879
1880 def get_item_attrs(self, name, by_label=False, nr=None):
1881 """Return dictionary of HTML attributes for a single ListControl item.
1882
1883 The HTML element types that describe list items are: OPTION for SELECT
1884 controls, INPUT for the rest. These elements have HTML attributes that
1885 you may occasionally want to know about -- for example, the "alt" HTML
1886 attribute gives a text string describing the item (graphical browsers
1887 usually display this as a tooltip).
1888
1889 The returned dictionary maps HTML attribute names to values. The names
1890 and values are taken from the original HTML.
1891
1892 """
1893 deprecation(
1894 "control.get(...).attrs")
1895 return self._get(name, by_label, nr).attrs
1896
1897 def close_control(self):
1898 self._closed = True
1899
1900 def add_to_form(self, form):
1901 assert self._form is None or form == self._form, (
1902 "can't add control to more than one form")
1903 self._form = form
1904 if self.name is None:
1905 # always count nameless elements as separate controls
1906 Control.add_to_form(self, form)
1907 else:
1908 for ii in range(len(form.controls)-1, -1, -1):
1909 control = form.controls[ii]
1910 if control.name == self.name and control.type == self.type:
1911 if control._closed:
1912 Control.add_to_form(self, form)
1913 else:
1914 control.merge_control(self)
1915 break
1916 else:
1917 Control.add_to_form(self, form)
1918
1919 def merge_control(self, control):
1920 assert bool(control.multiple) == bool(self.multiple)
1921 # usually, isinstance(control, self.__class__)
1922 self.items.extend(control.items)
1923
1924 def fixup(self):
1925 """
1926 ListControls are built up from component list items (which are also
1927 ListControls) during parsing. This method should be called after all
1928 items have been added. See ListControl.__doc__ for the reason this is
1929 required.
1930
1931 """
1932 # Need to set default selection where no item was indicated as being
1933 # selected by the HTML:
1934
1935 # CHECKBOX:
1936 # Nothing should be selected.
1937 # SELECT/single, SELECT/multiple and RADIO:
1938 # RFC 1866 (HTML 2.0): says first item should be selected.
1939 # W3C HTML 4.01 Specification: says that client behaviour is
1940 # undefined in this case. For RADIO, exactly one must be selected,
1941 # though which one is undefined.
1942 # Both Netscape and Microsoft Internet Explorer (IE) choose first
1943 # item for SELECT/single. However, both IE5 and Mozilla (both 1.0
1944 # and Firebird 0.6) leave all items unselected for RADIO and
1945 # SELECT/multiple.
1946
1947 # Since both Netscape and IE all choose the first item for
1948 # SELECT/single, we do the same. OTOH, both Netscape and IE
1949 # leave SELECT/multiple with nothing selected, in violation of RFC 1866
1950 # (but not in violation of the W3C HTML 4 standard); the same is true
1951 # of RADIO (which *is* in violation of the HTML 4 standard). We follow
1952 # RFC 1866 if the _select_default attribute is set, and Netscape and IE
1953 # otherwise. RFC 1866 and HTML 4 are always violated insofar as you
1954 # can deselect all items in a RadioControl.
1955
1956 for o in self.items:
1957 # set items' controls to self, now that we've merged
1958 o.__dict__["_control"] = self
1959
1960 def __getattr__(self, name):
1961 if name == "value":
1962 compat = self._form.backwards_compat
1963 if self.name is None:
1964 return []
1965 return [o.name for o in self.items if o.selected and
1966 (not o.disabled or compat)]
1967 else:
1968 raise AttributeError("%s instance has no attribute '%s'" %
1969 (self.__class__.__name__, name))
1970
1971 def __setattr__(self, name, value):
1972 if name == "value":
1973 if self.disabled:
1974 raise AttributeError("control '%s' is disabled" % self.name)
1975 if self.readonly:
1976 raise AttributeError("control '%s' is readonly" % self.name)
1977 self._set_value(value)
1978 elif name in ("name", "type", "multiple"):
1979 raise AttributeError("%s attribute is readonly" % name)
1980 else:
1981 self.__dict__[name] = value
1982
1983 def _set_value(self, value):
1984 if value is None or isstringlike(value):
1985 raise TypeError("ListControl, must set a sequence")
1986 if not value:
1987 compat = self._form.backwards_compat
1988 for o in self.items:
1989 if not o.disabled or compat:
1990 o.selected = False
1991 elif self.multiple:
1992 self._multiple_set_value(value)
1993 elif len(value) > 1:
1994 raise ItemCountError(
1995 "single selection list, must set sequence of "
1996 "length 0 or 1")
1997 else:
1998 self._single_set_value(value)
1999
2000 def _get_items(self, name, target=1):
2001 all_items = self.get_items(name)
2002 items = [o for o in all_items if not o.disabled]
2003 if len(items) < target:
2004 if len(all_items) < target:
2005 raise ItemNotFoundError(
2006 "insufficient items with name %r" % name)
2007 else:
2008 raise AttributeError(
2009 "insufficient non-disabled items with name %s" % name)
2010 on = []
2011 off = []
2012 for o in items:
2013 if o.selected:
2014 on.append(o)
2015 else:
2016 off.append(o)
2017 return on, off
2018
2019 def _single_set_value(self, value):
2020 assert len(value) == 1
2021 on, off = self._get_items(value[0])
2022 assert len(on) <= 1
2023 if not on:
2024 off[0].selected = True
2025
2026 def _multiple_set_value(self, value):
2027 compat = self._form.backwards_compat
2028 turn_on = [] # transactional-ish
2029 turn_off = [item for item in self.items if
2030 item.selected and (not item.disabled or compat)]
2031 names = {}
2032 for nn in value:
2033 if nn in names.keys():
2034 names[nn] += 1
2035 else:
2036 names[nn] = 1
2037 for name, count in names.items():
2038 on, off = self._get_items(name, count)
2039 for i in range(count):
2040 if on:
2041 item = on[0]
2042 del on[0]
2043 del turn_off[turn_off.index(item)]
2044 else:
2045 item = off[0]
2046 del off[0]
2047 turn_on.append(item)
2048 for item in turn_off:
2049 item.selected = False
2050 for item in turn_on:
2051 item.selected = True
2052
2053 def set_value_by_label(self, value):
2054 """Set the value of control by item labels.
2055
2056 value is expected to be an iterable of strings that are substrings of
2057 the item labels that should be selected. Before substring matching is
2058 performed, the original label text is whitespace-compressed
2059 (consecutive whitespace characters are converted to a single space
2060 character) and leading and trailing whitespace is stripped. Ambiguous
2061 labels are accepted without complaint if the form's backwards_compat is
2062 True; otherwise, it will not complain as long as all ambiguous labels
2063 share the same item name (e.g. OPTION value).
2064
2065 """
2066 if isstringlike(value):
2067 raise TypeError(value)
2068 if not self.multiple and len(value) > 1:
2069 raise ItemCountError(
2070 "single selection list, must set sequence of "
2071 "length 0 or 1")
2072 items = []
2073 for nn in value:
2074 found = self.get_items(label=nn)
2075 if len(found) > 1:
2076 if not self._form.backwards_compat:
2077 # ambiguous labels are fine as long as item names (e.g.
2078 # OPTION values) are same
2079 opt_name = found[0].name
2080 if [o for o in found[1:] if o.name != opt_name]:
2081 raise AmbiguityError(nn)
2082 else:
2083 # OK, we'll guess :-( Assume first available item.
2084 found = found[:1]
2085 for o in found:
2086 # For the multiple-item case, we could try to be smarter,
2087 # saving them up and trying to resolve, but that's too much.
2088 if self._form.backwards_compat or o not in items:
2089 items.append(o)
2090 break
2091 else: # all of them are used
2092 raise ItemNotFoundError(nn)
2093 # now we have all the items that should be on
2094 # let's just turn everything off and then back on.
2095 self.value = []
2096 for o in items:
2097 o.selected = True
2098
2099 def get_value_by_label(self):
2100 """Return the value of the control as given by normalized labels."""
2101 res = []
2102 compat = self._form.backwards_compat
2103 for o in self.items:
2104 if (not o.disabled or compat) and o.selected:
2105 for l in o.get_labels():
2106 if l.text:
2107 res.append(l.text)
2108 break
2109 else:
2110 res.append(None)
2111 return res
2112
2113 def possible_items(self, by_label=False):
2114 """Deprecated: return the names or labels of all possible items.
2115
2116 Includes disabled items, which may be misleading for some use cases.
2117
2118 """
2119 deprecation(
2120 "[item.name for item in self.items]")
2121 if by_label:
2122 res = []
2123 for o in self.items:
2124 for l in o.get_labels():
2125 if l.text:
2126 res.append(l.text)
2127 break
2128 else:
2129 res.append(None)
2130 return res
2131 return [o.name for o in self.items]
2132
2133 def _totally_ordered_pairs(self):
2134 if self.disabled or self.name is None:
2135 return []
2136 else:
2137 return [(o._index, self.name, o.name) for o in self.items
2138 if o.selected and not o.disabled]
2139
2140 def __str__(self):
2141 name = self.name
2142 if name is None: name = "<None>"
2143
2144 display = [str(o) for o in self.items]
2145
2146 infos = []
2147 if self.disabled: infos.append("disabled")
2148 if self.readonly: infos.append("readonly")
2149 info = ", ".join(infos)
2150 if info: info = " (%s)" % info
2151
2152 return "<%s(%s=[%s])%s>" % (self.__class__.__name__,
2153 name, ", ".join(display), info)
2154
2155
2156 class RadioControl(ListControl):
2157 """
2158 Covers:
2159
2160 INPUT/RADIO
2161
2162 """
2163 def __init__(self, type, name, attrs, select_default=False, index=None):
2164 attrs.setdefault("value", "on")
2165 ListControl.__init__(self, type, name, attrs, select_default,
2166 called_as_base_class=True, index=index)
2167 self.__dict__["multiple"] = False
2168 o = Item(self, attrs, index)
2169 o.__dict__["_selected"] = attrs.has_key("checked")
2170
2171 def fixup(self):
2172 ListControl.fixup(self)
2173 found = [o for o in self.items if o.selected and not o.disabled]
2174 if not found:
2175 if self._select_default:
2176 for o in self.items:
2177 if not o.disabled:
2178 o.selected = True
2179 break
2180 else:
2181 # Ensure only one item selected. Choose the last one,
2182 # following IE and Firefox.
2183 for o in found[:-1]:
2184 o.selected = False
2185
2186 def get_labels(self):
2187 return []
2188
2189 class CheckboxControl(ListControl):
2190 """
2191 Covers:
2192
2193 INPUT/CHECKBOX
2194
2195 """
2196 def __init__(self, type, name, attrs, select_default=False, index=None):
2197 attrs.setdefault("value", "on")
2198 ListControl.__init__(self, type, name, attrs, select_default,
2199 called_as_base_class=True, index=index)
2200 self.__dict__["multiple"] = True
2201 o = Item(self, attrs, index)
2202 o.__dict__["_selected"] = attrs.has_key("checked")
2203
2204 def get_labels(self):
2205 return []
2206
2207
2208 class SelectControl(ListControl):
2209 """
2210 Covers:
2211
2212 SELECT (and OPTION)
2213
2214
2215 OPTION 'values', in HTML parlance, are Item 'names' in mechanize parlance.
2216
2217 SELECT control values and labels are subject to some messy defaulting
2218 rules. For example, if the HTML representation of the control is:
2219
2220 <SELECT name=year>
2221 <OPTION value=0 label="2002">current year</OPTION>
2222 <OPTION value=1>2001</OPTION>
2223 <OPTION>2000</OPTION>
2224 </SELECT>
2225
2226 The items, in order, have labels "2002", "2001" and "2000", whereas their
2227 names (the OPTION values) are "0", "1" and "2000" respectively. Note that
2228 the value of the last OPTION in this example defaults to its contents, as
2229 specified by RFC 1866, as do the labels of the second and third OPTIONs.
2230
2231 The OPTION labels are sometimes more meaningful than the OPTION values,
2232 which can make for more maintainable code.
2233
2234 Additional read-only public attribute: attrs
2235
2236 The attrs attribute is a dictionary of the original HTML attributes of the
2237 SELECT element. Other ListControls do not have this attribute, because in
2238 other cases the control as a whole does not correspond to any single HTML
2239 element. control.get(...).attrs may be used as usual to get at the HTML
2240 attributes of the HTML elements corresponding to individual list items (for
2241 SELECT controls, these are OPTION elements).
2242
2243 Another special case is that the Item.attrs dictionaries have a special key
2244 "contents" which does not correspond to any real HTML attribute, but rather
2245 contains the contents of the OPTION element:
2246
2247 <OPTION>this bit</OPTION>
2248
2249 """
2250 # HTML attributes here are treated slightly differently from other list
2251 # controls:
2252 # -The SELECT HTML attributes dictionary is stuffed into the OPTION
2253 # HTML attributes dictionary under the "__select" key.
2254 # -The content of each OPTION element is stored under the special
2255 # "contents" key of the dictionary.
2256 # After all this, the dictionary is passed to the SelectControl constructor
2257 # as the attrs argument, as usual. However:
2258 # -The first SelectControl constructed when building up a SELECT control
2259 # has a constructor attrs argument containing only the __select key -- so
2260 # this SelectControl represents an empty SELECT control.
2261 # -Subsequent SelectControls have both OPTION HTML-attribute in attrs and
2262 # the __select dictionary containing the SELECT HTML-attributes.
2263
2264 def __init__(self, type, name, attrs, select_default=False, index=None):
2265 # fish out the SELECT HTML attributes from the OPTION HTML attributes
2266 # dictionary
2267 self.attrs = attrs["__select"].copy()
2268 self.__dict__["_label"] = _get_label(self.attrs)
2269 self.__dict__["id"] = self.attrs.get("id")
2270 self.__dict__["multiple"] = self.attrs.has_key("multiple")
2271 # the majority of the contents, label, and value dance already happened
2272 contents = attrs.get("contents")
2273 attrs = attrs.copy()
2274 del attrs["__select"]
2275
2276 ListControl.__init__(self, type, name, self.attrs, select_default,
2277 called_as_base_class=True, index=index)
2278 self.disabled = self.attrs.has_key("disabled")
2279 self.readonly = self.attrs.has_key("readonly")
2280 if attrs.has_key("value"):
2281 # otherwise it is a marker 'select started' token
2282 o = Item(self, attrs, index)
2283 o.__dict__["_selected"] = attrs.has_key("selected")
2284 # add 'label' label and contents label, if different. If both are
2285 # provided, the 'label' label is used for display in HTML
2286 # 4.0-compliant browsers (and any lower spec? not sure) while the
2287 # contents are used for display in older or less-compliant
2288 # browsers. We make label objects for both, if the values are
2289 # different.
2290 label = attrs.get("label")
2291 if label:
2292 o._labels.append(Label({"__text": label}))
2293 if contents and contents != label:
2294 o._labels.append(Label({"__text": contents}))
2295 elif contents:
2296 o._labels.append(Label({"__text": contents}))
2297
2298 def fixup(self):
2299 ListControl.fixup(self)
2300 # Firefox doesn't exclude disabled items from those considered here
2301 # (i.e. from 'found', for both branches of the if below). Note that
2302 # IE6 doesn't support the disabled attribute on OPTIONs at all.
2303 found = [o for o in self.items if o.selected]
2304 if not found:
2305 if not self.multiple or self._select_default:
2306 for o in self.items:
2307 if not o.disabled:
2308 was_disabled = self.disabled
2309 self.disabled = False
2310 try:
2311 o.selected = True
2312 finally:
2313 o.disabled = was_disabled
2314 break
2315 elif not self.multiple:
2316 # Ensure only one item selected. Choose the last one,
2317 # following IE and Firefox.
2318 for o in found[:-1]:
2319 o.selected = False
2320
2321
2322 #---------------------------------------------------
2323 class SubmitControl(ScalarControl):
2324 """
2325 Covers:
2326
2327 INPUT/SUBMIT
2328 BUTTON/SUBMIT
2329
2330 """
2331 def __init__(self, type, name, attrs, index=None):
2332 ScalarControl.__init__(self, type, name, attrs, index)
2333 # IE5 defaults SUBMIT value to "Submit Query"; Firebird 0.6 leaves it
2334 # blank, Konqueror 3.1 defaults to "Submit". HTML spec. doesn't seem
2335 # to define this.
2336 if self.value is None: self.value = ""
2337 self.readonly = True
2338
2339 def get_labels(self):
2340 res = []
2341 if self.value:
2342 res.append(Label({"__text": self.value}))
2343 res.extend(ScalarControl.get_labels(self))
2344 return res
2345
2346 def is_of_kind(self, kind): return kind == "clickable"
2347
2348 def _click(self, form, coord, return_type, request_class=_request.Request):
2349 self._clicked = coord
2350 r = form._switch_click(return_type, request_class)
2351 self._clicked = False
2352 return r
2353
2354 def _totally_ordered_pairs(self):
2355 if not self._clicked:
2356 return []
2357 return ScalarControl._totally_ordered_pairs(self)
2358
2359
2360 #---------------------------------------------------
2361 class ImageControl(SubmitControl):
2362 """
2363 Covers:
2364
2365 INPUT/IMAGE
2366
2367 Coordinates are specified using one of the HTMLForm.click* methods.
2368
2369 """
2370 def __init__(self, type, name, attrs, index=None):
2371 SubmitControl.__init__(self, type, name, attrs, index)
2372 self.readonly = False
2373
2374 def _totally_ordered_pairs(self):
2375 clicked = self._clicked
2376 if self.disabled or not clicked:
2377 return []
2378 name = self.name
2379 if name is None: return []
2380 pairs = [
2381 (self._index, "%s.x" % name, str(clicked[0])),
2382 (self._index+1, "%s.y" % name, str(clicked[1])),
2383 ]
2384 value = self._value
2385 if value:
2386 pairs.append((self._index+2, name, value))
2387 return pairs
2388
2389 get_labels = ScalarControl.get_labels
2390
2391 # aliases, just to make str(control) and str(form) clearer
2392 class PasswordControl(TextControl): pass
2393 class HiddenControl(TextControl): pass
2394 class TextareaControl(TextControl): pass
2395 class SubmitButtonControl(SubmitControl): pass
2396
2397
2398 def is_listcontrol(control): return control.is_of_kind("list")
2399
2400
2401 class HTMLForm:
2402 """Represents a single HTML <form> ... </form> element.
2403
2404 A form consists of a sequence of controls that usually have names, and
2405 which can take on various values. The values of the various types of
2406 controls represent variously: text, zero-or-one-of-many or many-of-many
2407 choices, and files to be uploaded. Some controls can be clicked on to
2408 submit the form, and clickable controls' values sometimes include the
2409 coordinates of the click.
2410
2411 Forms can be filled in with data to be returned to the server, and then
2412 submitted, using the click method to generate a request object suitable for
2413 passing to mechanize.urlopen (or the click_request_data or click_pairs
2414 methods for integration with third-party code).
2415
2416 import mechanize
2417 forms = mechanize.ParseFile(html, base_uri)
2418 form = forms[0]
2419
2420 form["query"] = "Python"
2421 form.find_control("nr_results").get("lots").selected = True
2422
2423 response = mechanize.urlopen(form.click())
2424
2425 Usually, HTMLForm instances are not created directly. Instead, the
2426 ParseFile or ParseResponse factory functions are used. If you do construct
2427 HTMLForm objects yourself, however, note that an HTMLForm instance is only
2428 properly initialised after the fixup method has been called (ParseFile and
2429 ParseResponse do this for you). See ListControl.__doc__ for the reason
2430 this is required.
2431
2432 Indexing a form (form["control_name"]) returns the named Control's value
2433 attribute. Assignment to a form index (form["control_name"] = something)
2434 is equivalent to assignment to the named Control's value attribute. If you
2435 need to be more specific than just supplying the control's name, use the
2436 set_value and get_value methods.
2437
2438 ListControl values are lists of item names (specifically, the names of the
2439 items that are selected and not disabled, and hence are "successful" -- ie.
2440 cause data to be returned to the server). The list item's name is the
2441 value of the corresponding HTML element's"value" attribute.
2442
2443 Example:
2444
2445 <INPUT type="CHECKBOX" name="cheeses" value="leicester"></INPUT>
2446 <INPUT type="CHECKBOX" name="cheeses" value="cheddar"></INPUT>
2447
2448 defines a CHECKBOX control with name "cheeses" which has two items, named
2449 "leicester" and "cheddar".
2450
2451 Another example:
2452
2453 <SELECT name="more_cheeses">
2454 <OPTION>1</OPTION>
2455 <OPTION value="2" label="CHEDDAR">cheddar</OPTION>
2456 </SELECT>
2457
2458 defines a SELECT control with name "more_cheeses" which has two items,
2459 named "1" and "2" (because the OPTION element's value HTML attribute
2460 defaults to the element contents -- see SelectControl.__doc__ for more on
2461 these defaulting rules).
2462
2463 To select, deselect or otherwise manipulate individual list items, use the
2464 HTMLForm.find_control() and ListControl.get() methods. To set the whole
2465 value, do as for any other control: use indexing or the set_/get_value
2466 methods.
2467
2468 Example:
2469
2470 # select *only* the item named "cheddar"
2471 form["cheeses"] = ["cheddar"]
2472 # select "cheddar", leave other items unaffected
2473 form.find_control("cheeses").get("cheddar").selected = True
2474
2475 Some controls (RADIO and SELECT without the multiple attribute) can only
2476 have zero or one items selected at a time. Some controls (CHECKBOX and
2477 SELECT with the multiple attribute) can have multiple items selected at a
2478 time. To set the whole value of a ListControl, assign a sequence to a form
2479 index:
2480
2481 form["cheeses"] = ["cheddar", "leicester"]
2482
2483 If the ListControl is not multiple-selection, the assigned list must be of
2484 length one.
2485
2486 To check if a control has an item, if an item is selected, or if an item is
2487 successful (selected and not disabled), respectively:
2488
2489 "cheddar" in [item.name for item in form.find_control("cheeses").items]
2490 "cheddar" in [item.name for item in form.find_control("cheeses").items and
2491 item.selected]
2492 "cheddar" in form["cheeses"] # (or "cheddar" in form.get_value("cheeses"))
2493
2494 Note that some list items may be disabled (see below).
2495
2496 Note the following mistake:
2497
2498 form[control_name] = control_value
2499 assert form[control_name] == control_value # not necessarily true
2500
2501 The reason for this is that form[control_name] always gives the list items
2502 in the order they were listed in the HTML.
2503
2504 List items (hence list values, too) can be referred to in terms of list
2505 item labels rather than list item names using the appropriate label
2506 arguments. Note that each item may have several labels.
2507
2508 The question of default values of OPTION contents, labels and values is
2509 somewhat complicated: see SelectControl.__doc__ and
2510 ListControl.get_item_attrs.__doc__ if you think you need to know.
2511
2512 Controls can be disabled or readonly. In either case, the control's value
2513 cannot be changed until you clear those flags (see example below).
2514 Disabled is the state typically represented by browsers by 'greying out' a
2515 control. Disabled controls are not 'successful' -- they don't cause data
2516 to get returned to the server. Readonly controls usually appear in
2517 browsers as read-only text boxes. Readonly controls are successful. List
2518 items can also be disabled. Attempts to select or deselect disabled items
2519 fail with AttributeError.
2520
2521 If a lot of controls are readonly, it can be useful to do this:
2522
2523 form.set_all_readonly(False)
2524
2525 To clear a control's value attribute, so that it is not successful (until a
2526 value is subsequently set):
2527
2528 form.clear("cheeses")
2529
2530 More examples:
2531
2532 control = form.find_control("cheeses")
2533 control.disabled = False
2534 control.readonly = False
2535 control.get("gruyere").disabled = True
2536 control.items[0].selected = True
2537
2538 See the various Control classes for further documentation. Many methods
2539 take name, type, kind, id, label and nr arguments to specify the control to
2540 be operated on: see HTMLForm.find_control.__doc__.
2541
2542 ControlNotFoundError (subclass of ValueError) is raised if the specified
2543 control can't be found. This includes occasions where a non-ListControl
2544 is found, but the method (set, for example) requires a ListControl.
2545 ItemNotFoundError (subclass of ValueError) is raised if a list item can't
2546 be found. ItemCountError (subclass of ValueError) is raised if an attempt
2547 is made to select more than one item and the control doesn't allow that, or
2548 set/get_single are called and the control contains more than one item.
2549 AttributeError is raised if a control or item is readonly or disabled and
2550 an attempt is made to alter its value.
2551
2552 Security note: Remember that any passwords you store in HTMLForm instances
2553 will be saved to disk in the clear if you pickle them (directly or
2554 indirectly). The simplest solution to this is to avoid pickling HTMLForm
2555 objects. You could also pickle before filling in any password, or just set
2556 the password to "" before pickling.
2557
2558
2559 Public attributes:
2560
2561 action: full (absolute URI) form action
2562 method: "GET" or "POST"
2563 enctype: form transfer encoding MIME type
2564 name: name of form (None if no name was specified)
2565 attrs: dictionary mapping original HTML form attributes to their values
2566
2567 controls: list of Control instances; do not alter this list
2568 (instead, call form.new_control to make a Control and add it to the
2569 form, or control.add_to_form if you already have a Control instance)
2570
2571
2572
2573 Methods for form filling:
2574 -------------------------
2575
2576 Most of the these methods have very similar arguments. See
2577 HTMLForm.find_control.__doc__ for details of the name, type, kind, label
2578 and nr arguments.
2579
2580 def find_control(self,
2581 name=None, type=None, kind=None, id=None, predicate=None,
2582 nr=None, label=None)
2583
2584 get_value(name=None, type=None, kind=None, id=None, nr=None,
2585 by_label=False, # by_label is deprecated
2586 label=None)
2587 set_value(value,
2588 name=None, type=None, kind=None, id=None, nr=None,
2589 by_label=False, # by_label is deprecated
2590 label=None)
2591
2592 clear_all()
2593 clear(name=None, type=None, kind=None, id=None, nr=None, label=None)
2594
2595 set_all_readonly(readonly)
2596
2597
2598 Method applying only to FileControls:
2599
2600 add_file(file_object,
2601 content_type="application/octet-stream", filename=None,
2602 name=None, id=None, nr=None, label=None)
2603
2604
2605 Methods applying only to clickable controls:
2606
2607 click(name=None, type=None, id=None, nr=0, coord=(1,1), label=None)
2608 click_request_data(name=None, type=None, id=None, nr=0, coord=(1,1),
2609 label=None)
2610 click_pairs(name=None, type=None, id=None, nr=0, coord=(1,1), label=None)
2611
2612 """
2613
2614 type2class = {
2615 "text": TextControl,
2616 "password": PasswordControl,
2617 "hidden": HiddenControl,
2618 "textarea": TextareaControl,
2619
2620 "isindex": IsindexControl,
2621
2622 "file": FileControl,
2623
2624 "button": IgnoreControl,
2625 "buttonbutton": IgnoreControl,
2626 "reset": IgnoreControl,
2627 "resetbutton": IgnoreControl,
2628
2629 "submit": SubmitControl,
2630 "submitbutton": SubmitButtonControl,
2631 "image": ImageControl,
2632
2633 "radio": RadioControl,
2634 "checkbox": CheckboxControl,
2635 "select": SelectControl,
2636 }
2637
2638 #---------------------------------------------------
2639 # Initialisation. Use ParseResponse / ParseFile instead.
2640
2641 def __init__(self, action, method="GET",
2642 enctype="application/x-www-form-urlencoded",
2643 name=None, attrs=None,
2644 request_class=_request.Request,
2645 forms=None, labels=None, id_to_labels=None,
2646 backwards_compat=True):
2647 """
2648 In the usual case, use ParseResponse (or ParseFile) to create new
2649 HTMLForm objects.
2650
2651 action: full (absolute URI) form action
2652 method: "GET" or "POST"
2653 enctype: form transfer encoding MIME type
2654 name: name of form
2655 attrs: dictionary mapping original HTML form attributes to their values
2656
2657 """
2658 self.action = action
2659 self.method = method
2660 self.enctype = enctype
2661 self.name = name
2662 if attrs is not None:
2663 self.attrs = attrs.copy()
2664 else:
2665 self.attrs = {}
2666 self.controls = []
2667 self._request_class = request_class
2668
2669 # these attributes are used by zope.testbrowser
2670 self._forms = forms # this is a semi-public API!
2671 self._labels = labels # this is a semi-public API!
2672 self._id_to_labels = id_to_labels # this is a semi-public API!
2673
2674 self.backwards_compat = backwards_compat # note __setattr__
2675
2676 self._urlunparse = urlparse.urlunparse
2677 self._urlparse = urlparse.urlparse
2678
2679 def __getattr__(self, name):
2680 if name == "backwards_compat":
2681 return self._backwards_compat
2682 return getattr(HTMLForm, name)
2683
2684 def __setattr__(self, name, value):
2685 # yuck
2686 if name == "backwards_compat":
2687 name = "_backwards_compat"
2688 value = bool(value)
2689 for cc in self.controls:
2690 try:
2691 items = cc.items
2692 except AttributeError:
2693 continue
2694 else:
2695 for ii in items:
2696 for ll in ii.get_labels():
2697 ll._backwards_compat = value
2698 self.__dict__[name] = value
2699
2700 def new_control(self, type, name, attrs,
2701 ignore_unknown=False, select_default=False, index=None):
2702 """Adds a new control to the form.
2703
2704 This is usually called by ParseFile and ParseResponse. Don't call it
2705 youself unless you're building your own Control instances.
2706
2707 Note that controls representing lists of items are built up from
2708 controls holding only a single list item. See ListControl.__doc__ for
2709 further information.
2710
2711 type: type of control (see Control.__doc__ for a list)
2712 attrs: HTML attributes of control
2713 ignore_unknown: if true, use a dummy Control instance for controls of
2714 unknown type; otherwise, use a TextControl
2715 select_default: for RADIO and multiple-selection SELECT controls, pick
2716 the first item as the default if no 'selected' HTML attribute is
2717 present (this defaulting happens when the HTMLForm.fixup method is
2718 called)
2719 index: index of corresponding element in HTML (see
2720 MoreFormTests.test_interspersed_controls for motivation)
2721
2722 """
2723 type = type.lower()
2724 klass = self.type2class.get(type)
2725 if klass is None:
2726 if ignore_unknown:
2727 klass = IgnoreControl
2728 else:
2729 klass = TextControl
2730
2731 a = attrs.copy()
2732 if issubclass(klass, ListControl):
2733 control = klass(type, name, a, select_default, index)
2734 else:
2735 control = klass(type, name, a, index)
2736
2737 if type == "select" and len(attrs) == 1:
2738 for ii in range(len(self.controls)-1, -1, -1):
2739 ctl = self.controls[ii]
2740 if ctl.type == "select":
2741 ctl.close_control()
2742 break
2743
2744 control.add_to_form(self)
2745 control._urlparse = self._urlparse
2746 control._urlunparse = self._urlunparse
2747
2748 def fixup(self):
2749 """Normalise form after all controls have been added.
2750
2751 This is usually called by ParseFile and ParseResponse. Don't call it
2752 youself unless you're building your own Control instances.
2753
2754 This method should only be called once, after all controls have been
2755 added to the form.
2756
2757 """
2758 for control in self.controls:
2759 control.fixup()
2760 self.backwards_compat = self._backwards_compat
2761
2762 #---------------------------------------------------
2763 def __str__(self):
2764 header = "%s%s %s %s" % (
2765 (self.name and self.name+" " or ""),
2766 self.method, self.action, self.enctype)
2767 rep = [header]
2768 for control in self.controls:
2769 rep.append(" %s" % str(control))
2770 return "<%s>" % "\n".join(rep)
2771
2772 #---------------------------------------------------
2773 # Form-filling methods.
2774
2775 def __getitem__(self, name):
2776 return self.find_control(name).value
2777 def __contains__(self, name):
2778 return bool(self.find_control(name))
2779 def __setitem__(self, name, value):
2780 control = self.find_control(name)
2781 try:
2782 control.value = value
2783 except AttributeError, e:
2784 raise ValueError(str(e))
2785
2786 def get_value(self,
2787 name=None, type=None, kind=None, id=None, nr=None,
2788 by_label=False, # by_label is deprecated
2789 label=None):
2790 """Return value of control.
2791
2792 If only name and value arguments are supplied, equivalent to
2793
2794 form[name]
2795
2796 """
2797 if by_label:
2798 deprecation("form.get_value_by_label(...)")
2799 c = self.find_control(name, type, kind, id, label=label, nr=nr)
2800 if by_label:
2801 try:
2802 meth = c.get_value_by_label
2803 except AttributeError:
2804 raise NotImplementedError(
2805 "control '%s' does not yet support by_label" % c.name)
2806 else:
2807 return meth()
2808 else:
2809 return c.value
2810 def set_value(self, value,
2811 name=None, type=None, kind=None, id=None, nr=None,
2812 by_label=False, # by_label is deprecated
2813 label=None):
2814 """Set value of control.
2815
2816 If only name and value arguments are supplied, equivalent to
2817
2818 form[name] = value
2819
2820 """
2821 if by_label:
2822 deprecation("form.get_value_by_label(...)")
2823 c = self.find_control(name, type, kind, id, label=label, nr=nr)
2824 if by_label:
2825 try:
2826 meth = c.set_value_by_label
2827 except AttributeError:
2828 raise NotImplementedError(
2829 "control '%s' does not yet support by_label" % c.name)
2830 else:
2831 meth(value)
2832 else:
2833 c.value = value
2834 def get_value_by_label(
2835 self, name=None, type=None, kind=None, id=None, label=None, nr=None):
2836 """
2837
2838 All arguments should be passed by name.
2839
2840 """
2841 c = self.find_control(name, type, kind, id, label=label, nr=nr)
2842 return c.get_value_by_label()
2843
2844 def set_value_by_label(
2845 self, value,
2846 name=None, type=None, kind=None, id=None, label=None, nr=None):
2847 """
2848
2849 All arguments should be passed by name.
2850
2851 """
2852 c = self.find_control(name, type, kind, id, label=label, nr=nr)
2853 c.set_value_by_label(value)
2854
2855 def set_all_readonly(self, readonly):
2856 for control in self.controls:
2857 control.readonly = bool(readonly)
2858
2859 def clear_all(self):
2860 """Clear the value attributes of all controls in the form.
2861
2862 See HTMLForm.clear.__doc__.
2863
2864 """
2865 for control in self.controls:
2866 control.clear()
2867
2868 def clear(self,
2869 name=None, type=None, kind=None, id=None, nr=None, label=None):
2870 """Clear the value attribute of a control.
2871
2872 As a result, the affected control will not be successful until a value
2873 is subsequently set. AttributeError is raised on readonly controls.
2874
2875 """
2876 c = self.find_control(name, type, kind, id, label=label, nr=nr)
2877 c.clear()
2878
2879
2880 #---------------------------------------------------
2881 # Form-filling methods applying only to ListControls.
2882
2883 def possible_items(self, # deprecated
2884 name=None, type=None, kind=None, id=None,
2885 nr=None, by_label=False, label=None):
2886 """Return a list of all values that the specified control can take."""
2887 c = self._find_list_control(name, type, kind, id, label, nr)
2888 return c.possible_items(by_label)
2889
2890 def set(self, selected, item_name, # deprecated
2891 name=None, type=None, kind=None, id=None, nr=None,
2892 by_label=False, label=None):
2893 """Select / deselect named list item.
2894
2895 selected: boolean selected state
2896
2897 """
2898 self._find_list_control(name, type, kind, id, label, nr).set(
2899 selected, item_name, by_label)
2900 def toggle(self, item_name, # deprecated
2901 name=None, type=None, kind=None, id=None, nr=None,
2902 by_label=False, label=None):
2903 """Toggle selected state of named list item."""
2904 self._find_list_control(name, type, kind, id, label, nr).toggle(
2905 item_name, by_label)
2906
2907 def set_single(self, selected, # deprecated
2908 name=None, type=None, kind=None, id=None,
2909 nr=None, by_label=None, label=None):
2910 """Select / deselect list item in a control having only one item.
2911
2912 If the control has multiple list items, ItemCountError is raised.
2913
2914 This is just a convenience method, so you don't need to know the item's
2915 name -- the item name in these single-item controls is usually
2916 something meaningless like "1" or "on".
2917
2918 For example, if a checkbox has a single item named "on", the following
2919 two calls are equivalent:
2920
2921 control.toggle("on")
2922 control.toggle_single()
2923
2924 """ # by_label ignored and deprecated
2925 self._find_list_control(
2926 name, type, kind, id, label, nr).set_single(selected)
2927 def toggle_single(self, name=None, type=None, kind=None, id=None,
2928 nr=None, by_label=None, label=None): # deprecated
2929 """Toggle selected state of list item in control having only one item.
2930
2931 The rest is as for HTMLForm.set_single.__doc__.
2932
2933 """ # by_label ignored and deprecated
2934 self._find_list_control(name, type, kind, id, label, nr).toggle_single()
2935
2936 #---------------------------------------------------
2937 # Form-filling method applying only to FileControls.
2938
2939 def add_file(self, file_object, content_type=None, filename=None,
2940 name=None, id=None, nr=None, label=None):
2941 """Add a file to be uploaded.
2942
2943 file_object: file-like object (with read method) from which to read
2944 data to upload
2945 content_type: MIME content type of data to upload
2946 filename: filename to pass to server
2947
2948 If filename is None, no filename is sent to the server.
2949
2950 If content_type is None, the content type is guessed based on the
2951 filename and the data from read from the file object.
2952
2953 XXX
2954 At the moment, guessed content type is always application/octet-stream.
2955 Use sndhdr, imghdr modules. Should also try to guess HTML, XML, and
2956 plain text.
2957
2958 Note the following useful HTML attributes of file upload controls (see
2959 HTML 4.01 spec, section 17):
2960
2961 accept: comma-separated list of content types that the server will
2962 handle correctly; you can use this to filter out non-conforming files
2963 size: XXX IIRC, this is indicative of whether form wants multiple or
2964 single files
2965 maxlength: XXX hint of max content length in bytes?
2966
2967 """
2968 self.find_control(name, "file", id=id, label=label, nr=nr).add_file(
2969 file_object, content_type, filename)
2970
2971 #---------------------------------------------------
2972 # Form submission methods, applying only to clickable controls.
2973
2974 def click(self, name=None, type=None, id=None, nr=0, coord=(1,1),
2975 request_class=_request.Request,
2976 label=None):
2977 """Return request that would result from clicking on a control.
2978
2979 The request object is a mechanize.Request instance, which you can pass
2980 to mechanize.urlopen.
2981
2982 Only some control types (INPUT/SUBMIT & BUTTON/SUBMIT buttons and
2983 IMAGEs) can be clicked.
2984
2985 Will click on the first clickable control, subject to the name, type
2986 and nr arguments (as for find_control). If no name, type, id or number
2987 is specified and there are no clickable controls, a request will be
2988 returned for the form in its current, un-clicked, state.
2989
2990 IndexError is raised if any of name, type, id or nr is specified but no
2991 matching control is found. ValueError is raised if the HTMLForm has an
2992 enctype attribute that is not recognised.
2993
2994 You can optionally specify a coordinate to click at, which only makes a
2995 difference if you clicked on an image.
2996
2997 """
2998 return self._click(name, type, id, label, nr, coord, "request",
2999 self._request_class)
3000
3001 def click_request_data(self,
3002 name=None, type=None, id=None,
3003 nr=0, coord=(1,1),
3004 request_class=_request.Request,
3005 label=None):
3006 """As for click method, but return a tuple (url, data, headers).
3007
3008 You can use this data to send a request to the server. This is useful
3009 if you're using httplib or urllib rather than mechanize. Otherwise,
3010 use the click method.
3011
3012 # Untested. Have to subclass to add headers, I think -- so use
3013 # mechanize instead!
3014 import urllib
3015 url, data, hdrs = form.click_request_data()
3016 r = urllib.urlopen(url, data)
3017
3018 # Untested. I don't know of any reason to use httplib -- you can get
3019 # just as much control with mechanize.
3020 import httplib, urlparse
3021 url, data, hdrs = form.click_request_data()
3022 tup = urlparse(url)
3023 host, path = tup[1], urlparse.urlunparse((None, None)+tup[2:])
3024 conn = httplib.HTTPConnection(host)
3025 if data:
3026 httplib.request("POST", path, data, hdrs)
3027 else:
3028 httplib.request("GET", path, headers=hdrs)
3029 r = conn.getresponse()
3030
3031 """
3032 return self._click(name, type, id, label, nr, coord, "request_data",
3033 self._request_class)
3034
3035 def click_pairs(self, name=None, type=None, id=None,
3036 nr=0, coord=(1,1),
3037 label=None):
3038 """As for click_request_data, but returns a list of (key, value) pairs.
3039
3040 You can use this list as an argument to urllib.urlencode. This is
3041 usually only useful if you're using httplib or urllib rather than
3042 mechanize. It may also be useful if you want to manually tweak the
3043 keys and/or values, but this should not be necessary. Otherwise, use
3044 the click method.
3045
3046 Note that this method is only useful for forms of MIME type
3047 x-www-form-urlencoded. In particular, it does not return the
3048 information required for file upload. If you need file upload and are
3049 not using mechanize, use click_request_data.
3050 """
3051 return self._click(name, type, id, label, nr, coord, "pairs",
3052 self._request_class)
3053
3054 #---------------------------------------------------
3055
3056 def find_control(self,
3057 name=None, type=None, kind=None, id=None,
3058 predicate=None, nr=None,
3059 label=None):
3060 """Locate and return some specific control within the form.
3061
3062 At least one of the name, type, kind, predicate and nr arguments must
3063 be supplied. If no matching control is found, ControlNotFoundError is
3064 raised.
3065
3066 If name is specified, then the control must have the indicated name.
3067
3068 If type is specified then the control must have the specified type (in
3069 addition to the types possible for <input> HTML tags: "text",
3070 "password", "hidden", "submit", "image", "button", "radio", "checkbox",
3071 "file" we also have "reset", "buttonbutton", "submitbutton",
3072 "resetbutton", "textarea", "select" and "isindex").
3073
3074 If kind is specified, then the control must fall into the specified
3075 group, each of which satisfies a particular interface. The types are
3076 "text", "list", "multilist", "singlelist", "clickable" and "file".
3077
3078 If id is specified, then the control must have the indicated id.
3079
3080 If predicate is specified, then the control must match that function.
3081 The predicate function is passed the control as its single argument,
3082 and should return a boolean value indicating whether the control
3083 matched.
3084
3085 nr, if supplied, is the sequence number of the control (where 0 is the
3086 first). Note that control 0 is the first control matching all the
3087 other arguments (if supplied); it is not necessarily the first control
3088 in the form. If no nr is supplied, AmbiguityError is raised if
3089 multiple controls match the other arguments (unless the
3090 .backwards-compat attribute is true).
3091
3092 If label is specified, then the control must have this label. Note
3093 that radio controls and checkboxes never have labels: their items do.
3094
3095 """
3096 if ((name is None) and (type is None) and (kind is None) and
3097 (id is None) and (label is None) and (predicate is None) and
3098 (nr is None)):
3099 raise ValueError(
3100 "at least one argument must be supplied to specify control")
3101 return self._find_control(name, type, kind, id, label, predicate, nr)
3102
3103 #---------------------------------------------------
3104 # Private methods.
3105
3106 def _find_list_control(self,
3107 name=None, type=None, kind=None, id=None,
3108 label=None, nr=None):
3109 if ((name is None) and (type is None) and (kind is None) and
3110 (id is None) and (label is None) and (nr is None)):
3111 raise ValueError(
3112 "at least one argument must be supplied to specify control")
3113
3114 return self._find_control(name, type, kind, id, label,
3115 is_listcontrol, nr)
3116
3117 def _find_control(self, name, type, kind, id, label, predicate, nr):
3118 if ((name is not None) and (name is not Missing) and
3119 not isstringlike(name)):
3120 raise TypeError("control name must be string-like")
3121 if (type is not None) and not isstringlike(type):
3122 raise TypeError("control type must be string-like")
3123 if (kind is not None) and not isstringlike(kind):
3124 raise TypeError("control kind must be string-like")
3125 if (id is not None) and not isstringlike(id):
3126 raise TypeError("control id must be string-like")
3127 if (label is not None) and not isstringlike(label):
3128 raise TypeError("control label must be string-like")
3129 if (predicate is not None) and not callable(predicate):
3130 raise TypeError("control predicate must be callable")
3131 if (nr is not None) and nr < 0:
3132 raise ValueError("control number must be a positive integer")
3133
3134 orig_nr = nr
3135 found = None
3136 ambiguous = False
3137 if nr is None and self.backwards_compat:
3138 nr = 0
3139
3140 for control in self.controls:
3141 if ((name is not None and name != control.name) and
3142 (name is not Missing or control.name is not None)):
3143 continue
3144 if type is not None and type != control.type:
3145 continue
3146 if kind is not None and not control.is_of_kind(kind):
3147 continue
3148 if id is not None and id != control.id:
3149 continue
3150 if predicate and not predicate(control):
3151 continue
3152 if label:
3153 for l in control.get_labels():
3154 if l.text.find(label) > -1:
3155 break
3156 else:
3157 continue
3158 if nr is not None:
3159 if nr == 0:
3160 return control # early exit: unambiguous due to nr
3161 nr -= 1
3162 continue
3163 if found:
3164 ambiguous = True
3165 break
3166 found = control
3167
3168 if found and not ambiguous:
3169 return found
3170
3171 description = []
3172 if name is not None: description.append("name %s" % repr(name))
3173 if type is not None: description.append("type '%s'" % type)
3174 if kind is not None: description.append("kind '%s'" % kind)
3175 if id is not None: description.append("id '%s'" % id)
3176 if label is not None: description.append("label '%s'" % label)
3177 if predicate is not None:
3178 description.append("predicate %s" % predicate)
3179 if orig_nr: description.append("nr %d" % orig_nr)
3180 description = ", ".join(description)
3181
3182 if ambiguous:
3183 raise AmbiguityError("more than one control matching "+description)
3184 elif not found:
3185 raise ControlNotFoundError("no control matching "+description)
3186 assert False
3187
3188 def _click(self, name, type, id, label, nr, coord, return_type,
3189 request_class=_request.Request):
3190 try:
3191 control = self._find_control(
3192 name, type, "clickable", id, label, None, nr)
3193 except ControlNotFoundError:
3194 if ((name is not None) or (type is not None) or (id is not None) or
3195 (label is not None) or (nr != 0)):
3196 raise
3197 # no clickable controls, but no control was explicitly requested,
3198 # so return state without clicking any control
3199 return self._switch_click(return_type, request_class)
3200 else:
3201 return control._click(self, coord, return_type, request_class)
3202
3203 def _pairs(self):
3204 """Return sequence of (key, value) pairs suitable for urlencoding."""
3205 return [(k, v) for (i, k, v, c_i) in self._pairs_and_controls()]
3206
3207
3208 def _pairs_and_controls(self):
3209 """Return sequence of (index, key, value, control_index)
3210 of totally ordered pairs suitable for urlencoding.
3211
3212 control_index is the index of the control in self.controls
3213 """
3214 pairs = []
3215 for control_index in range(len(self.controls)):
3216 control = self.controls[control_index]
3217 for ii, key, val in control._totally_ordered_pairs():
3218 pairs.append((ii, key, val, control_index))
3219
3220 # stable sort by ONLY first item in tuple
3221 pairs.sort()
3222
3223 return pairs
3224
3225 def _request_data(self):
3226 """Return a tuple (url, data, headers)."""
3227 method = self.method.upper()
3228 #scheme, netloc, path, parameters, query, frag = urlparse.urlparse(self. action)
3229 parts = self._urlparse(self.action)
3230 rest, (query, frag) = parts[:-2], parts[-2:]
3231
3232 if method == "GET":
3233 if self.enctype != "application/x-www-form-urlencoded":
3234 raise ValueError(
3235 "unknown GET form encoding type '%s'" % self.enctype)
3236 parts = rest + (urllib.urlencode(self._pairs()), None)
3237 uri = self._urlunparse(parts)
3238 return uri, None, []
3239 elif method == "POST":
3240 parts = rest + (query, None)
3241 uri = self._urlunparse(parts)
3242 if self.enctype == "application/x-www-form-urlencoded":
3243 return (uri, urllib.urlencode(self._pairs()),
3244 [("Content-Type", self.enctype)])
3245 elif self.enctype == "multipart/form-data":
3246 data = StringIO()
3247 http_hdrs = []
3248 mw = MimeWriter(data, http_hdrs)
3249 mw.startmultipartbody("form-data", add_to_http_hdrs=True,
3250 prefix=0)
3251 for ii, k, v, control_index in self._pairs_and_controls():
3252 self.controls[control_index]._write_mime_data(mw, k, v)
3253 mw.lastpart()
3254 return uri, data.getvalue(), http_hdrs
3255 else:
3256 raise ValueError(
3257 "unknown POST form encoding type '%s'" % self.enctype)
3258 else:
3259 raise ValueError("Unknown method '%s'" % method)
3260
3261 def _switch_click(self, return_type, request_class=_request.Request):
3262 # This is called by HTMLForm and clickable Controls to hide switching
3263 # on return_type.
3264 if return_type == "pairs":
3265 return self._pairs()
3266 elif return_type == "request_data":
3267 return self._request_data()
3268 else:
3269 req_data = self._request_data()
3270 req = request_class(req_data[0], req_data[1])
3271 for key, val in req_data[2]:
3272 add_hdr = req.add_header
3273 if key.lower() == "content-type":
3274 try:
3275 add_hdr = req.add_unredirected_header
3276 except AttributeError:
3277 # pre-2.4 and not using ClientCookie
3278 pass
3279 add_hdr(key, val)
3280 return req
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698