| OLD | NEW |
| (Empty) | |
| 1 """HTML form handling for web clients. |
| 2 |
| 3 HTML form handling for web clients: useful for parsing HTML forms, filling them |
| 4 in and returning the completed forms to the server. This code developed from a |
| 5 port of Gisle Aas' Perl module HTML::Form, from the libwww-perl library, but |
| 6 the interface is not the same. |
| 7 |
| 8 The most useful docstring is the one for HTMLForm. |
| 9 |
| 10 RFC 1866: HTML 2.0 |
| 11 RFC 1867: Form-based File Upload in HTML |
| 12 RFC 2388: Returning Values from Forms: multipart/form-data |
| 13 HTML 3.2 Specification, W3C Recommendation 14 January 1997 (for ISINDEX) |
| 14 HTML 4.01 Specification, W3C Recommendation 24 December 1999 |
| 15 |
| 16 |
| 17 Copyright 2002-2007 John J. Lee <jjl@pobox.com> |
| 18 Copyright 2005 Gary Poster |
| 19 Copyright 2005 Zope Corporation |
| 20 Copyright 1998-2000 Gisle Aas. |
| 21 |
| 22 This code is free software; you can redistribute it and/or modify it |
| 23 under the terms of the BSD or ZPL 2.1 licenses (see the file |
| 24 COPYING.txt included with the distribution). |
| 25 |
| 26 """ |
| 27 |
| 28 # TODO: |
| 29 # Clean up post the merge into mechanize |
| 30 # * Remove code that was duplicated in ClientForm and mechanize |
| 31 # * Remove weird import stuff |
| 32 # * Remove pre-Python 2.4 compatibility cruft |
| 33 # * Clean up tests |
| 34 # * Later release: Remove the ClientForm 0.1 backwards-compatibility switch |
| 35 # Remove parser testing hack |
| 36 # Clean action URI |
| 37 # Switch to unicode throughout |
| 38 # See Wichert Akkerman's 2004-01-22 message to c.l.py. |
| 39 # Apply recommendations from google code project CURLIES |
| 40 # Apply recommendations from HTML 5 spec |
| 41 # Add charset parameter to Content-type headers? How to find value?? |
| 42 # Functional tests to add: |
| 43 # Single and multiple file upload |
| 44 # File upload with missing name (check standards) |
| 45 # mailto: submission & enctype text/plain?? |
| 46 |
| 47 # Replace by_label etc. with moniker / selector concept. Allows, e.g., a |
| 48 # choice between selection by value / id / label / element contents. Or |
| 49 # choice between matching labels exactly or by substring. etc. |
| 50 |
| 51 |
| 52 __all__ = ['AmbiguityError', 'CheckboxControl', 'Control', |
| 53 'ControlNotFoundError', 'FileControl', 'FormParser', 'HTMLForm', |
| 54 'HiddenControl', 'IgnoreControl', 'ImageControl', 'IsindexControl', |
| 55 'Item', 'ItemCountError', 'ItemNotFoundError', 'Label', |
| 56 'ListControl', 'LocateError', 'Missing', 'ParseError', 'ParseFile', |
| 57 'ParseFileEx', 'ParseResponse', 'ParseResponseEx','PasswordControl', |
| 58 'RadioControl', 'ScalarControl', 'SelectControl', |
| 59 'SubmitButtonControl', 'SubmitControl', 'TextControl', |
| 60 'TextareaControl', 'XHTMLCompatibleFormParser'] |
| 61 |
| 62 import HTMLParser |
| 63 from cStringIO import StringIO |
| 64 import inspect |
| 65 import logging |
| 66 import random |
| 67 import re |
| 68 import sys |
| 69 import urllib |
| 70 import urlparse |
| 71 import warnings |
| 72 |
| 73 import _beautifulsoup |
| 74 import _request |
| 75 |
| 76 # from Python itself, for backwards compatibility of raised exceptions |
| 77 import sgmllib |
| 78 # bundled copy of sgmllib |
| 79 import _sgmllib_copy |
| 80 |
| 81 |
| 82 VERSION = "0.2.11" |
| 83 |
| 84 CHUNK = 1024 # size of chunks fed to parser, in bytes |
| 85 |
| 86 DEFAULT_ENCODING = "latin-1" |
| 87 |
| 88 _logger = logging.getLogger("mechanize.forms") |
| 89 OPTIMIZATION_HACK = True |
| 90 |
| 91 def debug(msg, *args, **kwds): |
| 92 if OPTIMIZATION_HACK: |
| 93 return |
| 94 |
| 95 caller_name = inspect.stack()[1][3] |
| 96 extended_msg = '%%s %s' % msg |
| 97 extended_args = (caller_name,)+args |
| 98 _logger.debug(extended_msg, *extended_args, **kwds) |
| 99 |
| 100 def _show_debug_messages(): |
| 101 global OPTIMIZATION_HACK |
| 102 OPTIMIZATION_HACK = False |
| 103 _logger.setLevel(logging.DEBUG) |
| 104 handler = logging.StreamHandler(sys.stdout) |
| 105 handler.setLevel(logging.DEBUG) |
| 106 _logger.addHandler(handler) |
| 107 |
| 108 |
| 109 def deprecation(message, stack_offset=0): |
| 110 warnings.warn(message, DeprecationWarning, stacklevel=3+stack_offset) |
| 111 |
| 112 |
| 113 class Missing: pass |
| 114 |
| 115 _compress_re = re.compile(r"\s+") |
| 116 def compress_text(text): return _compress_re.sub(" ", text.strip()) |
| 117 |
| 118 def normalize_line_endings(text): |
| 119 return re.sub(r"(?:(?<!\r)\n)|(?:\r(?!\n))", "\r\n", text) |
| 120 |
| 121 |
| 122 def unescape(data, entities, encoding=DEFAULT_ENCODING): |
| 123 if data is None or "&" not in data: |
| 124 return data |
| 125 |
| 126 def replace_entities(match, entities=entities, encoding=encoding): |
| 127 ent = match.group() |
| 128 if ent[1] == "#": |
| 129 return unescape_charref(ent[2:-1], encoding) |
| 130 |
| 131 repl = entities.get(ent) |
| 132 if repl is not None: |
| 133 if type(repl) != type(""): |
| 134 try: |
| 135 repl = repl.encode(encoding) |
| 136 except UnicodeError: |
| 137 repl = ent |
| 138 else: |
| 139 repl = ent |
| 140 |
| 141 return repl |
| 142 |
| 143 return re.sub(r"&#?[A-Za-z0-9]+?;", replace_entities, data) |
| 144 |
| 145 def unescape_charref(data, encoding): |
| 146 name, base = data, 10 |
| 147 if name.startswith("x"): |
| 148 name, base= name[1:], 16 |
| 149 uc = unichr(int(name, base)) |
| 150 if encoding is None: |
| 151 return uc |
| 152 else: |
| 153 try: |
| 154 repl = uc.encode(encoding) |
| 155 except UnicodeError: |
| 156 repl = "&#%s;" % data |
| 157 return repl |
| 158 |
| 159 def get_entitydefs(): |
| 160 import htmlentitydefs |
| 161 from codecs import latin_1_decode |
| 162 entitydefs = {} |
| 163 try: |
| 164 htmlentitydefs.name2codepoint |
| 165 except AttributeError: |
| 166 entitydefs = {} |
| 167 for name, char in htmlentitydefs.entitydefs.items(): |
| 168 uc = latin_1_decode(char)[0] |
| 169 if uc.startswith("&#") and uc.endswith(";"): |
| 170 uc = unescape_charref(uc[2:-1], None) |
| 171 entitydefs["&%s;" % name] = uc |
| 172 else: |
| 173 for name, codepoint in htmlentitydefs.name2codepoint.items(): |
| 174 entitydefs["&%s;" % name] = unichr(codepoint) |
| 175 return entitydefs |
| 176 |
| 177 |
| 178 def issequence(x): |
| 179 try: |
| 180 x[0] |
| 181 except (TypeError, KeyError): |
| 182 return False |
| 183 except IndexError: |
| 184 pass |
| 185 return True |
| 186 |
| 187 def isstringlike(x): |
| 188 try: x+"" |
| 189 except: return False |
| 190 else: return True |
| 191 |
| 192 |
| 193 def choose_boundary(): |
| 194 """Return a string usable as a multipart boundary.""" |
| 195 # follow IE and firefox |
| 196 nonce = "".join([str(random.randint(0, sys.maxint-1)) for i in 0,1,2]) |
| 197 return "-"*27 + nonce |
| 198 |
| 199 # This cut-n-pasted MimeWriter from standard library is here so can add |
| 200 # to HTTP headers rather than message body when appropriate. It also uses |
| 201 # \r\n in place of \n. This is a bit nasty. |
| 202 class MimeWriter: |
| 203 |
| 204 """Generic MIME writer. |
| 205 |
| 206 Methods: |
| 207 |
| 208 __init__() |
| 209 addheader() |
| 210 flushheaders() |
| 211 startbody() |
| 212 startmultipartbody() |
| 213 nextpart() |
| 214 lastpart() |
| 215 |
| 216 A MIME writer is much more primitive than a MIME parser. It |
| 217 doesn't seek around on the output file, and it doesn't use large |
| 218 amounts of buffer space, so you have to write the parts in the |
| 219 order they should occur on the output file. It does buffer the |
| 220 headers you add, allowing you to rearrange their order. |
| 221 |
| 222 General usage is: |
| 223 |
| 224 f = <open the output file> |
| 225 w = MimeWriter(f) |
| 226 ...call w.addheader(key, value) 0 or more times... |
| 227 |
| 228 followed by either: |
| 229 |
| 230 f = w.startbody(content_type) |
| 231 ...call f.write(data) for body data... |
| 232 |
| 233 or: |
| 234 |
| 235 w.startmultipartbody(subtype) |
| 236 for each part: |
| 237 subwriter = w.nextpart() |
| 238 ...use the subwriter's methods to create the subpart... |
| 239 w.lastpart() |
| 240 |
| 241 The subwriter is another MimeWriter instance, and should be |
| 242 treated in the same way as the toplevel MimeWriter. This way, |
| 243 writing recursive body parts is easy. |
| 244 |
| 245 Warning: don't forget to call lastpart()! |
| 246 |
| 247 XXX There should be more state so calls made in the wrong order |
| 248 are detected. |
| 249 |
| 250 Some special cases: |
| 251 |
| 252 - startbody() just returns the file passed to the constructor; |
| 253 but don't use this knowledge, as it may be changed. |
| 254 |
| 255 - startmultipartbody() actually returns a file as well; |
| 256 this can be used to write the initial 'if you can read this your |
| 257 mailer is not MIME-aware' message. |
| 258 |
| 259 - If you call flushheaders(), the headers accumulated so far are |
| 260 written out (and forgotten); this is useful if you don't need a |
| 261 body part at all, e.g. for a subpart of type message/rfc822 |
| 262 that's (mis)used to store some header-like information. |
| 263 |
| 264 - Passing a keyword argument 'prefix=<flag>' to addheader(), |
| 265 start*body() affects where the header is inserted; 0 means |
| 266 append at the end, 1 means insert at the start; default is |
| 267 append for addheader(), but insert for start*body(), which use |
| 268 it to determine where the Content-type header goes. |
| 269 |
| 270 """ |
| 271 |
| 272 def __init__(self, fp, http_hdrs=None): |
| 273 self._http_hdrs = http_hdrs |
| 274 self._fp = fp |
| 275 self._headers = [] |
| 276 self._boundary = [] |
| 277 self._first_part = True |
| 278 |
| 279 def addheader(self, key, value, prefix=0, |
| 280 add_to_http_hdrs=0): |
| 281 """ |
| 282 prefix is ignored if add_to_http_hdrs is true. |
| 283 """ |
| 284 lines = value.split("\r\n") |
| 285 while lines and not lines[-1]: del lines[-1] |
| 286 while lines and not lines[0]: del lines[0] |
| 287 if add_to_http_hdrs: |
| 288 value = "".join(lines) |
| 289 # 2.2 urllib2 doesn't normalize header case |
| 290 self._http_hdrs.append((key.capitalize(), value)) |
| 291 else: |
| 292 for i in range(1, len(lines)): |
| 293 lines[i] = " " + lines[i].strip() |
| 294 value = "\r\n".join(lines) + "\r\n" |
| 295 line = key.title() + ": " + value |
| 296 if prefix: |
| 297 self._headers.insert(0, line) |
| 298 else: |
| 299 self._headers.append(line) |
| 300 |
| 301 def flushheaders(self): |
| 302 self._fp.writelines(self._headers) |
| 303 self._headers = [] |
| 304 |
| 305 def startbody(self, ctype=None, plist=[], prefix=1, |
| 306 add_to_http_hdrs=0, content_type=1): |
| 307 """ |
| 308 prefix is ignored if add_to_http_hdrs is true. |
| 309 """ |
| 310 if content_type and ctype: |
| 311 for name, value in plist: |
| 312 ctype = ctype + ';\r\n %s=%s' % (name, value) |
| 313 self.addheader("Content-Type", ctype, prefix=prefix, |
| 314 add_to_http_hdrs=add_to_http_hdrs) |
| 315 self.flushheaders() |
| 316 if not add_to_http_hdrs: self._fp.write("\r\n") |
| 317 self._first_part = True |
| 318 return self._fp |
| 319 |
| 320 def startmultipartbody(self, subtype, boundary=None, plist=[], prefix=1, |
| 321 add_to_http_hdrs=0, content_type=1): |
| 322 boundary = boundary or choose_boundary() |
| 323 self._boundary.append(boundary) |
| 324 return self.startbody("multipart/" + subtype, |
| 325 [("boundary", boundary)] + plist, |
| 326 prefix=prefix, |
| 327 add_to_http_hdrs=add_to_http_hdrs, |
| 328 content_type=content_type) |
| 329 |
| 330 def nextpart(self): |
| 331 boundary = self._boundary[-1] |
| 332 if self._first_part: |
| 333 self._first_part = False |
| 334 else: |
| 335 self._fp.write("\r\n") |
| 336 self._fp.write("--" + boundary + "\r\n") |
| 337 return self.__class__(self._fp) |
| 338 |
| 339 def lastpart(self): |
| 340 if self._first_part: |
| 341 self.nextpart() |
| 342 boundary = self._boundary.pop() |
| 343 self._fp.write("\r\n--" + boundary + "--\r\n") |
| 344 |
| 345 |
| 346 class LocateError(ValueError): pass |
| 347 class AmbiguityError(LocateError): pass |
| 348 class ControlNotFoundError(LocateError): pass |
| 349 class ItemNotFoundError(LocateError): pass |
| 350 |
| 351 class ItemCountError(ValueError): pass |
| 352 |
| 353 # for backwards compatibility, ParseError derives from exceptions that were |
| 354 # raised by versions of ClientForm <= 0.2.5 |
| 355 # TODO: move to _html |
| 356 class ParseError(sgmllib.SGMLParseError, |
| 357 HTMLParser.HTMLParseError): |
| 358 |
| 359 def __init__(self, *args, **kwds): |
| 360 Exception.__init__(self, *args, **kwds) |
| 361 |
| 362 def __str__(self): |
| 363 return Exception.__str__(self) |
| 364 |
| 365 |
| 366 class _AbstractFormParser: |
| 367 """forms attribute contains HTMLForm instances on completion.""" |
| 368 # thanks to Moshe Zadka for an example of sgmllib/htmllib usage |
| 369 def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING): |
| 370 if entitydefs is None: |
| 371 entitydefs = get_entitydefs() |
| 372 self._entitydefs = entitydefs |
| 373 self._encoding = encoding |
| 374 |
| 375 self.base = None |
| 376 self.forms = [] |
| 377 self.labels = [] |
| 378 self._current_label = None |
| 379 self._current_form = None |
| 380 self._select = None |
| 381 self._optgroup = None |
| 382 self._option = None |
| 383 self._textarea = None |
| 384 |
| 385 # forms[0] will contain all controls that are outside of any form |
| 386 # self._global_form is an alias for self.forms[0] |
| 387 self._global_form = None |
| 388 self.start_form([]) |
| 389 self.end_form() |
| 390 self._current_form = self._global_form = self.forms[0] |
| 391 |
| 392 def do_base(self, attrs): |
| 393 debug("%s", attrs) |
| 394 for key, value in attrs: |
| 395 if key == "href": |
| 396 self.base = self.unescape_attr_if_required(value) |
| 397 |
| 398 def end_body(self): |
| 399 debug("") |
| 400 if self._current_label is not None: |
| 401 self.end_label() |
| 402 if self._current_form is not self._global_form: |
| 403 self.end_form() |
| 404 |
| 405 def start_form(self, attrs): |
| 406 debug("%s", attrs) |
| 407 if self._current_form is not self._global_form: |
| 408 raise ParseError("nested FORMs") |
| 409 name = None |
| 410 action = None |
| 411 enctype = "application/x-www-form-urlencoded" |
| 412 method = "GET" |
| 413 d = {} |
| 414 for key, value in attrs: |
| 415 if key == "name": |
| 416 name = self.unescape_attr_if_required(value) |
| 417 elif key == "action": |
| 418 action = self.unescape_attr_if_required(value) |
| 419 elif key == "method": |
| 420 method = self.unescape_attr_if_required(value.upper()) |
| 421 elif key == "enctype": |
| 422 enctype = self.unescape_attr_if_required(value.lower()) |
| 423 d[key] = self.unescape_attr_if_required(value) |
| 424 controls = [] |
| 425 self._current_form = (name, action, method, enctype), d, controls |
| 426 |
| 427 def end_form(self): |
| 428 debug("") |
| 429 if self._current_label is not None: |
| 430 self.end_label() |
| 431 if self._current_form is self._global_form: |
| 432 raise ParseError("end of FORM before start") |
| 433 self.forms.append(self._current_form) |
| 434 self._current_form = self._global_form |
| 435 |
| 436 def start_select(self, attrs): |
| 437 debug("%s", attrs) |
| 438 if self._select is not None: |
| 439 raise ParseError("nested SELECTs") |
| 440 if self._textarea is not None: |
| 441 raise ParseError("SELECT inside TEXTAREA") |
| 442 d = {} |
| 443 for key, val in attrs: |
| 444 d[key] = self.unescape_attr_if_required(val) |
| 445 |
| 446 self._select = d |
| 447 self._add_label(d) |
| 448 |
| 449 self._append_select_control({"__select": d}) |
| 450 |
| 451 def end_select(self): |
| 452 debug("") |
| 453 if self._select is None: |
| 454 raise ParseError("end of SELECT before start") |
| 455 |
| 456 if self._option is not None: |
| 457 self._end_option() |
| 458 |
| 459 self._select = None |
| 460 |
| 461 def start_optgroup(self, attrs): |
| 462 debug("%s", attrs) |
| 463 if self._select is None: |
| 464 raise ParseError("OPTGROUP outside of SELECT") |
| 465 d = {} |
| 466 for key, val in attrs: |
| 467 d[key] = self.unescape_attr_if_required(val) |
| 468 |
| 469 self._optgroup = d |
| 470 |
| 471 def end_optgroup(self): |
| 472 debug("") |
| 473 if self._optgroup is None: |
| 474 raise ParseError("end of OPTGROUP before start") |
| 475 self._optgroup = None |
| 476 |
| 477 def _start_option(self, attrs): |
| 478 debug("%s", attrs) |
| 479 if self._select is None: |
| 480 raise ParseError("OPTION outside of SELECT") |
| 481 if self._option is not None: |
| 482 self._end_option() |
| 483 |
| 484 d = {} |
| 485 for key, val in attrs: |
| 486 d[key] = self.unescape_attr_if_required(val) |
| 487 |
| 488 self._option = {} |
| 489 self._option.update(d) |
| 490 if (self._optgroup and self._optgroup.has_key("disabled") and |
| 491 not self._option.has_key("disabled")): |
| 492 self._option["disabled"] = None |
| 493 |
| 494 def _end_option(self): |
| 495 debug("") |
| 496 if self._option is None: |
| 497 raise ParseError("end of OPTION before start") |
| 498 |
| 499 contents = self._option.get("contents", "").strip() |
| 500 self._option["contents"] = contents |
| 501 if not self._option.has_key("value"): |
| 502 self._option["value"] = contents |
| 503 if not self._option.has_key("label"): |
| 504 self._option["label"] = contents |
| 505 # stuff dict of SELECT HTML attrs into a special private key |
| 506 # (gets deleted again later) |
| 507 self._option["__select"] = self._select |
| 508 self._append_select_control(self._option) |
| 509 self._option = None |
| 510 |
| 511 def _append_select_control(self, attrs): |
| 512 debug("%s", attrs) |
| 513 controls = self._current_form[2] |
| 514 name = self._select.get("name") |
| 515 controls.append(("select", name, attrs)) |
| 516 |
| 517 def start_textarea(self, attrs): |
| 518 debug("%s", attrs) |
| 519 if self._textarea is not None: |
| 520 raise ParseError("nested TEXTAREAs") |
| 521 if self._select is not None: |
| 522 raise ParseError("TEXTAREA inside SELECT") |
| 523 d = {} |
| 524 for key, val in attrs: |
| 525 d[key] = self.unescape_attr_if_required(val) |
| 526 self._add_label(d) |
| 527 |
| 528 self._textarea = d |
| 529 |
| 530 def end_textarea(self): |
| 531 debug("") |
| 532 if self._textarea is None: |
| 533 raise ParseError("end of TEXTAREA before start") |
| 534 controls = self._current_form[2] |
| 535 name = self._textarea.get("name") |
| 536 controls.append(("textarea", name, self._textarea)) |
| 537 self._textarea = None |
| 538 |
| 539 def start_label(self, attrs): |
| 540 debug("%s", attrs) |
| 541 if self._current_label: |
| 542 self.end_label() |
| 543 d = {} |
| 544 for key, val in attrs: |
| 545 d[key] = self.unescape_attr_if_required(val) |
| 546 taken = bool(d.get("for")) # empty id is invalid |
| 547 d["__text"] = "" |
| 548 d["__taken"] = taken |
| 549 if taken: |
| 550 self.labels.append(d) |
| 551 self._current_label = d |
| 552 |
| 553 def end_label(self): |
| 554 debug("") |
| 555 label = self._current_label |
| 556 if label is None: |
| 557 # something is ugly in the HTML, but we're ignoring it |
| 558 return |
| 559 self._current_label = None |
| 560 # if it is staying around, it is True in all cases |
| 561 del label["__taken"] |
| 562 |
| 563 def _add_label(self, d): |
| 564 #debug("%s", d) |
| 565 if self._current_label is not None: |
| 566 if not self._current_label["__taken"]: |
| 567 self._current_label["__taken"] = True |
| 568 d["__label"] = self._current_label |
| 569 |
| 570 def handle_data(self, data): |
| 571 debug("%s", data) |
| 572 |
| 573 if self._option is not None: |
| 574 # self._option is a dictionary of the OPTION element's HTML |
| 575 # attributes, but it has two special keys, one of which is the |
| 576 # special "contents" key contains text between OPTION tags (the |
| 577 # other is the "__select" key: see the end_option method) |
| 578 map = self._option |
| 579 key = "contents" |
| 580 elif self._textarea is not None: |
| 581 map = self._textarea |
| 582 key = "value" |
| 583 data = normalize_line_endings(data) |
| 584 # not if within option or textarea |
| 585 elif self._current_label is not None: |
| 586 map = self._current_label |
| 587 key = "__text" |
| 588 else: |
| 589 return |
| 590 |
| 591 if data and not map.has_key(key): |
| 592 # according to |
| 593 # http://www.w3.org/TR/html4/appendix/notes.html#h-B.3.1 line break |
| 594 # immediately after start tags or immediately before end tags must |
| 595 # be ignored, but real browsers only ignore a line break after a |
| 596 # start tag, so we'll do that. |
| 597 if data[0:2] == "\r\n": |
| 598 data = data[2:] |
| 599 elif data[0:1] in ["\n", "\r"]: |
| 600 data = data[1:] |
| 601 map[key] = data |
| 602 else: |
| 603 map[key] = map[key] + data |
| 604 |
| 605 def do_button(self, attrs): |
| 606 debug("%s", attrs) |
| 607 d = {} |
| 608 d["type"] = "submit" # default |
| 609 for key, val in attrs: |
| 610 d[key] = self.unescape_attr_if_required(val) |
| 611 controls = self._current_form[2] |
| 612 |
| 613 type = d["type"] |
| 614 name = d.get("name") |
| 615 # we don't want to lose information, so use a type string that |
| 616 # doesn't clash with INPUT TYPE={SUBMIT,RESET,BUTTON} |
| 617 # e.g. type for BUTTON/RESET is "resetbutton" |
| 618 # (type for INPUT/RESET is "reset") |
| 619 type = type+"button" |
| 620 self._add_label(d) |
| 621 controls.append((type, name, d)) |
| 622 |
| 623 def do_input(self, attrs): |
| 624 debug("%s", attrs) |
| 625 d = {} |
| 626 d["type"] = "text" # default |
| 627 for key, val in attrs: |
| 628 d[key] = self.unescape_attr_if_required(val) |
| 629 controls = self._current_form[2] |
| 630 |
| 631 type = d["type"] |
| 632 name = d.get("name") |
| 633 self._add_label(d) |
| 634 controls.append((type, name, d)) |
| 635 |
| 636 def do_isindex(self, attrs): |
| 637 debug("%s", attrs) |
| 638 d = {} |
| 639 for key, val in attrs: |
| 640 d[key] = self.unescape_attr_if_required(val) |
| 641 controls = self._current_form[2] |
| 642 |
| 643 self._add_label(d) |
| 644 # isindex doesn't have type or name HTML attributes |
| 645 controls.append(("isindex", None, d)) |
| 646 |
| 647 def handle_entityref(self, name): |
| 648 #debug("%s", name) |
| 649 self.handle_data(unescape( |
| 650 '&%s;' % name, self._entitydefs, self._encoding)) |
| 651 |
| 652 def handle_charref(self, name): |
| 653 #debug("%s", name) |
| 654 self.handle_data(unescape_charref(name, self._encoding)) |
| 655 |
| 656 def unescape_attr(self, name): |
| 657 #debug("%s", name) |
| 658 return unescape(name, self._entitydefs, self._encoding) |
| 659 |
| 660 def unescape_attrs(self, attrs): |
| 661 #debug("%s", attrs) |
| 662 escaped_attrs = {} |
| 663 for key, val in attrs.items(): |
| 664 try: |
| 665 val.items |
| 666 except AttributeError: |
| 667 escaped_attrs[key] = self.unescape_attr(val) |
| 668 else: |
| 669 # e.g. "__select" -- yuck! |
| 670 escaped_attrs[key] = self.unescape_attrs(val) |
| 671 return escaped_attrs |
| 672 |
| 673 def unknown_entityref(self, ref): self.handle_data("&%s;" % ref) |
| 674 def unknown_charref(self, ref): self.handle_data("&#%s;" % ref) |
| 675 |
| 676 |
| 677 class XHTMLCompatibleFormParser(_AbstractFormParser, HTMLParser.HTMLParser): |
| 678 """Good for XHTML, bad for tolerance of incorrect HTML.""" |
| 679 # thanks to Michael Howitz for this! |
| 680 def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING): |
| 681 HTMLParser.HTMLParser.__init__(self) |
| 682 _AbstractFormParser.__init__(self, entitydefs, encoding) |
| 683 |
| 684 def feed(self, data): |
| 685 try: |
| 686 HTMLParser.HTMLParser.feed(self, data) |
| 687 except HTMLParser.HTMLParseError, exc: |
| 688 raise ParseError(exc) |
| 689 |
| 690 def start_option(self, attrs): |
| 691 _AbstractFormParser._start_option(self, attrs) |
| 692 |
| 693 def end_option(self): |
| 694 _AbstractFormParser._end_option(self) |
| 695 |
| 696 def handle_starttag(self, tag, attrs): |
| 697 try: |
| 698 method = getattr(self, "start_" + tag) |
| 699 except AttributeError: |
| 700 try: |
| 701 method = getattr(self, "do_" + tag) |
| 702 except AttributeError: |
| 703 pass # unknown tag |
| 704 else: |
| 705 method(attrs) |
| 706 else: |
| 707 method(attrs) |
| 708 |
| 709 def handle_endtag(self, tag): |
| 710 try: |
| 711 method = getattr(self, "end_" + tag) |
| 712 except AttributeError: |
| 713 pass # unknown tag |
| 714 else: |
| 715 method() |
| 716 |
| 717 def unescape(self, name): |
| 718 # Use the entitydefs passed into constructor, not |
| 719 # HTMLParser.HTMLParser's entitydefs. |
| 720 return self.unescape_attr(name) |
| 721 |
| 722 def unescape_attr_if_required(self, name): |
| 723 return name # HTMLParser.HTMLParser already did it |
| 724 def unescape_attrs_if_required(self, attrs): |
| 725 return attrs # ditto |
| 726 |
| 727 def close(self): |
| 728 HTMLParser.HTMLParser.close(self) |
| 729 self.end_body() |
| 730 |
| 731 |
| 732 class _AbstractSgmllibParser(_AbstractFormParser): |
| 733 |
| 734 def do_option(self, attrs): |
| 735 _AbstractFormParser._start_option(self, attrs) |
| 736 |
| 737 # we override this attr to decode hex charrefs |
| 738 entity_or_charref = re.compile( |
| 739 '&(?:([a-zA-Z][-.a-zA-Z0-9]*)|#(x?[0-9a-fA-F]+))(;?)') |
| 740 def convert_entityref(self, name): |
| 741 return unescape("&%s;" % name, self._entitydefs, self._encoding) |
| 742 def convert_charref(self, name): |
| 743 return unescape_charref("%s" % name, self._encoding) |
| 744 def unescape_attr_if_required(self, name): |
| 745 return name # sgmllib already did it |
| 746 def unescape_attrs_if_required(self, attrs): |
| 747 return attrs # ditto |
| 748 |
| 749 |
| 750 class FormParser(_AbstractSgmllibParser, _sgmllib_copy.SGMLParser): |
| 751 """Good for tolerance of incorrect HTML, bad for XHTML.""" |
| 752 def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING): |
| 753 _sgmllib_copy.SGMLParser.__init__(self) |
| 754 _AbstractFormParser.__init__(self, entitydefs, encoding) |
| 755 |
| 756 def feed(self, data): |
| 757 try: |
| 758 _sgmllib_copy.SGMLParser.feed(self, data) |
| 759 except _sgmllib_copy.SGMLParseError, exc: |
| 760 raise ParseError(exc) |
| 761 |
| 762 def close(self): |
| 763 _sgmllib_copy.SGMLParser.close(self) |
| 764 self.end_body() |
| 765 |
| 766 |
| 767 class _AbstractBSFormParser(_AbstractSgmllibParser): |
| 768 |
| 769 bs_base_class = None |
| 770 |
| 771 def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING): |
| 772 _AbstractFormParser.__init__(self, entitydefs, encoding) |
| 773 self.bs_base_class.__init__(self) |
| 774 |
| 775 def handle_data(self, data): |
| 776 _AbstractFormParser.handle_data(self, data) |
| 777 self.bs_base_class.handle_data(self, data) |
| 778 |
| 779 def feed(self, data): |
| 780 try: |
| 781 self.bs_base_class.feed(self, data) |
| 782 except _sgmllib_copy.SGMLParseError, exc: |
| 783 raise ParseError(exc) |
| 784 |
| 785 def close(self): |
| 786 self.bs_base_class.close(self) |
| 787 self.end_body() |
| 788 |
| 789 |
| 790 class RobustFormParser(_AbstractBSFormParser, _beautifulsoup.BeautifulSoup): |
| 791 |
| 792 """Tries to be highly tolerant of incorrect HTML.""" |
| 793 |
| 794 bs_base_class = _beautifulsoup.BeautifulSoup |
| 795 |
| 796 |
| 797 class NestingRobustFormParser(_AbstractBSFormParser, |
| 798 _beautifulsoup.ICantBelieveItsBeautifulSoup): |
| 799 |
| 800 """Tries to be highly tolerant of incorrect HTML. |
| 801 |
| 802 Different from RobustFormParser in that it more often guesses nesting |
| 803 above missing end tags (see BeautifulSoup docs). |
| 804 """ |
| 805 |
| 806 bs_base_class = _beautifulsoup.ICantBelieveItsBeautifulSoup |
| 807 |
| 808 |
| 809 #FormParser = XHTMLCompatibleFormParser # testing hack |
| 810 #FormParser = RobustFormParser # testing hack |
| 811 |
| 812 |
| 813 def ParseResponseEx(response, |
| 814 select_default=False, |
| 815 form_parser_class=FormParser, |
| 816 request_class=_request.Request, |
| 817 entitydefs=None, |
| 818 encoding=DEFAULT_ENCODING, |
| 819 |
| 820 # private |
| 821 _urljoin=urlparse.urljoin, |
| 822 _urlparse=urlparse.urlparse, |
| 823 _urlunparse=urlparse.urlunparse, |
| 824 ): |
| 825 """Identical to ParseResponse, except that: |
| 826 |
| 827 1. The returned list contains an extra item. The first form in the list |
| 828 contains all controls not contained in any FORM element. |
| 829 |
| 830 2. The arguments ignore_errors and backwards_compat have been removed. |
| 831 |
| 832 3. Backwards-compatibility mode (backwards_compat=True) is not available. |
| 833 """ |
| 834 return _ParseFileEx(response, response.geturl(), |
| 835 select_default, |
| 836 False, |
| 837 form_parser_class, |
| 838 request_class, |
| 839 entitydefs, |
| 840 False, |
| 841 encoding, |
| 842 _urljoin=_urljoin, |
| 843 _urlparse=_urlparse, |
| 844 _urlunparse=_urlunparse, |
| 845 ) |
| 846 |
| 847 def ParseFileEx(file, base_uri, |
| 848 select_default=False, |
| 849 form_parser_class=FormParser, |
| 850 request_class=_request.Request, |
| 851 entitydefs=None, |
| 852 encoding=DEFAULT_ENCODING, |
| 853 |
| 854 # private |
| 855 _urljoin=urlparse.urljoin, |
| 856 _urlparse=urlparse.urlparse, |
| 857 _urlunparse=urlparse.urlunparse, |
| 858 ): |
| 859 """Identical to ParseFile, except that: |
| 860 |
| 861 1. The returned list contains an extra item. The first form in the list |
| 862 contains all controls not contained in any FORM element. |
| 863 |
| 864 2. The arguments ignore_errors and backwards_compat have been removed. |
| 865 |
| 866 3. Backwards-compatibility mode (backwards_compat=True) is not available. |
| 867 """ |
| 868 return _ParseFileEx(file, base_uri, |
| 869 select_default, |
| 870 False, |
| 871 form_parser_class, |
| 872 request_class, |
| 873 entitydefs, |
| 874 False, |
| 875 encoding, |
| 876 _urljoin=_urljoin, |
| 877 _urlparse=_urlparse, |
| 878 _urlunparse=_urlunparse, |
| 879 ) |
| 880 |
| 881 def ParseString(text, base_uri, *args, **kwds): |
| 882 fh = StringIO(text) |
| 883 return ParseFileEx(fh, base_uri, *args, **kwds) |
| 884 |
| 885 def ParseResponse(response, *args, **kwds): |
| 886 """Parse HTTP response and return a list of HTMLForm instances. |
| 887 |
| 888 The return value of mechanize.urlopen can be conveniently passed to this |
| 889 function as the response parameter. |
| 890 |
| 891 mechanize.ParseError is raised on parse errors. |
| 892 |
| 893 response: file-like object (supporting read() method) with a method |
| 894 geturl(), returning the URI of the HTTP response |
| 895 select_default: for multiple-selection SELECT controls and RADIO controls, |
| 896 pick the first item as the default if none are selected in the HTML |
| 897 form_parser_class: class to instantiate and use to pass |
| 898 request_class: class to return from .click() method (default is |
| 899 mechanize.Request) |
| 900 entitydefs: mapping like {"&": "&", ...} containing HTML entity |
| 901 definitions (a sensible default is used) |
| 902 encoding: character encoding used for encoding numeric character references |
| 903 when matching link text. mechanize does not attempt to find the encoding |
| 904 in a META HTTP-EQUIV attribute in the document itself (mechanize, for |
| 905 example, does do that and will pass the correct value to mechanize using |
| 906 this parameter). |
| 907 |
| 908 backwards_compat: boolean that determines whether the returned HTMLForm |
| 909 objects are backwards-compatible with old code. If backwards_compat is |
| 910 true: |
| 911 |
| 912 - ClientForm 0.1 code will continue to work as before. |
| 913 |
| 914 - Label searches that do not specify a nr (number or count) will always |
| 915 get the first match, even if other controls match. If |
| 916 backwards_compat is False, label searches that have ambiguous results |
| 917 will raise an AmbiguityError. |
| 918 |
| 919 - Item label matching is done by strict string comparison rather than |
| 920 substring matching. |
| 921 |
| 922 - De-selecting individual list items is allowed even if the Item is |
| 923 disabled. |
| 924 |
| 925 The backwards_compat argument will be removed in a future release. |
| 926 |
| 927 Pass a true value for select_default if you want the behaviour specified by |
| 928 RFC 1866 (the HTML 2.0 standard), which is to select the first item in a |
| 929 RADIO or multiple-selection SELECT control if none were selected in the |
| 930 HTML. Most browsers (including Microsoft Internet Explorer (IE) and |
| 931 Netscape Navigator) instead leave all items unselected in these cases. The |
| 932 W3C HTML 4.0 standard leaves this behaviour undefined in the case of |
| 933 multiple-selection SELECT controls, but insists that at least one RADIO |
| 934 button should be checked at all times, in contradiction to browser |
| 935 behaviour. |
| 936 |
| 937 There is a choice of parsers. mechanize.XHTMLCompatibleFormParser (uses |
| 938 HTMLParser.HTMLParser) works best for XHTML, mechanize.FormParser (uses |
| 939 bundled copy of sgmllib.SGMLParser) (the default) works better for ordinary |
| 940 grubby HTML. Note that HTMLParser is only available in Python 2.2 and |
| 941 later. You can pass your own class in here as a hack to work around bad |
| 942 HTML, but at your own risk: there is no well-defined interface. |
| 943 |
| 944 """ |
| 945 return _ParseFileEx(response, response.geturl(), *args, **kwds)[1:] |
| 946 |
| 947 def ParseFile(file, base_uri, *args, **kwds): |
| 948 """Parse HTML and return a list of HTMLForm instances. |
| 949 |
| 950 mechanize.ParseError is raised on parse errors. |
| 951 |
| 952 file: file-like object (supporting read() method) containing HTML with zero |
| 953 or more forms to be parsed |
| 954 base_uri: the URI of the document (note that the base URI used to submit |
| 955 the form will be that given in the BASE element if present, not that of |
| 956 the document) |
| 957 |
| 958 For the other arguments and further details, see ParseResponse.__doc__. |
| 959 |
| 960 """ |
| 961 return _ParseFileEx(file, base_uri, *args, **kwds)[1:] |
| 962 |
| 963 def _ParseFileEx(file, base_uri, |
| 964 select_default=False, |
| 965 ignore_errors=False, |
| 966 form_parser_class=FormParser, |
| 967 request_class=_request.Request, |
| 968 entitydefs=None, |
| 969 backwards_compat=True, |
| 970 encoding=DEFAULT_ENCODING, |
| 971 _urljoin=urlparse.urljoin, |
| 972 _urlparse=urlparse.urlparse, |
| 973 _urlunparse=urlparse.urlunparse, |
| 974 ): |
| 975 if backwards_compat: |
| 976 deprecation("operating in backwards-compatibility mode", 1) |
| 977 fp = form_parser_class(entitydefs, encoding) |
| 978 while 1: |
| 979 data = file.read(CHUNK) |
| 980 try: |
| 981 fp.feed(data) |
| 982 except ParseError, e: |
| 983 e.base_uri = base_uri |
| 984 raise |
| 985 if len(data) != CHUNK: break |
| 986 fp.close() |
| 987 if fp.base is not None: |
| 988 # HTML BASE element takes precedence over document URI |
| 989 base_uri = fp.base |
| 990 labels = [] # Label(label) for label in fp.labels] |
| 991 id_to_labels = {} |
| 992 for l in fp.labels: |
| 993 label = Label(l) |
| 994 labels.append(label) |
| 995 for_id = l["for"] |
| 996 coll = id_to_labels.get(for_id) |
| 997 if coll is None: |
| 998 id_to_labels[for_id] = [label] |
| 999 else: |
| 1000 coll.append(label) |
| 1001 forms = [] |
| 1002 for (name, action, method, enctype), attrs, controls in fp.forms: |
| 1003 if action is None: |
| 1004 action = base_uri |
| 1005 else: |
| 1006 action = _urljoin(base_uri, action) |
| 1007 # would be nice to make HTMLForm class (form builder) pluggable |
| 1008 form = HTMLForm( |
| 1009 action, method, enctype, name, attrs, request_class, |
| 1010 forms, labels, id_to_labels, backwards_compat) |
| 1011 form._urlparse = _urlparse |
| 1012 form._urlunparse = _urlunparse |
| 1013 for ii in range(len(controls)): |
| 1014 type, name, attrs = controls[ii] |
| 1015 # index=ii*10 allows ImageControl to return multiple ordered pairs |
| 1016 form.new_control( |
| 1017 type, name, attrs, select_default=select_default, index=ii*10) |
| 1018 forms.append(form) |
| 1019 for form in forms: |
| 1020 form.fixup() |
| 1021 return forms |
| 1022 |
| 1023 |
| 1024 class Label: |
| 1025 def __init__(self, attrs): |
| 1026 self.id = attrs.get("for") |
| 1027 self._text = attrs.get("__text").strip() |
| 1028 self._ctext = compress_text(self._text) |
| 1029 self.attrs = attrs |
| 1030 self._backwards_compat = False # maintained by HTMLForm |
| 1031 |
| 1032 def __getattr__(self, name): |
| 1033 if name == "text": |
| 1034 if self._backwards_compat: |
| 1035 return self._text |
| 1036 else: |
| 1037 return self._ctext |
| 1038 return getattr(Label, name) |
| 1039 |
| 1040 def __setattr__(self, name, value): |
| 1041 if name == "text": |
| 1042 # don't see any need for this, so make it read-only |
| 1043 raise AttributeError("text attribute is read-only") |
| 1044 self.__dict__[name] = value |
| 1045 |
| 1046 def __str__(self): |
| 1047 return "<Label(id=%r, text=%r)>" % (self.id, self.text) |
| 1048 |
| 1049 |
| 1050 def _get_label(attrs): |
| 1051 text = attrs.get("__label") |
| 1052 if text is not None: |
| 1053 return Label(text) |
| 1054 else: |
| 1055 return None |
| 1056 |
| 1057 class Control: |
| 1058 """An HTML form control. |
| 1059 |
| 1060 An HTMLForm contains a sequence of Controls. The Controls in an HTMLForm |
| 1061 are accessed using the HTMLForm.find_control method or the |
| 1062 HTMLForm.controls attribute. |
| 1063 |
| 1064 Control instances are usually constructed using the ParseFile / |
| 1065 ParseResponse functions. If you use those functions, you can ignore the |
| 1066 rest of this paragraph. A Control is only properly initialised after the |
| 1067 fixup method has been called. In fact, this is only strictly necessary for |
| 1068 ListControl instances. This is necessary because ListControls are built up |
| 1069 from ListControls each containing only a single item, and their initial |
| 1070 value(s) can only be known after the sequence is complete. |
| 1071 |
| 1072 The types and values that are acceptable for assignment to the value |
| 1073 attribute are defined by subclasses. |
| 1074 |
| 1075 If the disabled attribute is true, this represents the state typically |
| 1076 represented by browsers by 'greying out' a control. If the disabled |
| 1077 attribute is true, the Control will raise AttributeError if an attempt is |
| 1078 made to change its value. In addition, the control will not be considered |
| 1079 'successful' as defined by the W3C HTML 4 standard -- ie. it will |
| 1080 contribute no data to the return value of the HTMLForm.click* methods. To |
| 1081 enable a control, set the disabled attribute to a false value. |
| 1082 |
| 1083 If the readonly attribute is true, the Control will raise AttributeError if |
| 1084 an attempt is made to change its value. To make a control writable, set |
| 1085 the readonly attribute to a false value. |
| 1086 |
| 1087 All controls have the disabled and readonly attributes, not only those that |
| 1088 may have the HTML attributes of the same names. |
| 1089 |
| 1090 On assignment to the value attribute, the following exceptions are raised: |
| 1091 TypeError, AttributeError (if the value attribute should not be assigned |
| 1092 to, because the control is disabled, for example) and ValueError. |
| 1093 |
| 1094 If the name or value attributes are None, or the value is an empty list, or |
| 1095 if the control is disabled, the control is not successful. |
| 1096 |
| 1097 Public attributes: |
| 1098 |
| 1099 type: string describing type of control (see the keys of the |
| 1100 HTMLForm.type2class dictionary for the allowable values) (readonly) |
| 1101 name: name of control (readonly) |
| 1102 value: current value of control (subclasses may allow a single value, a |
| 1103 sequence of values, or either) |
| 1104 disabled: disabled state |
| 1105 readonly: readonly state |
| 1106 id: value of id HTML attribute |
| 1107 |
| 1108 """ |
| 1109 def __init__(self, type, name, attrs, index=None): |
| 1110 """ |
| 1111 type: string describing type of control (see the keys of the |
| 1112 HTMLForm.type2class dictionary for the allowable values) |
| 1113 name: control name |
| 1114 attrs: HTML attributes of control's HTML element |
| 1115 |
| 1116 """ |
| 1117 raise NotImplementedError() |
| 1118 |
| 1119 def add_to_form(self, form): |
| 1120 self._form = form |
| 1121 form.controls.append(self) |
| 1122 |
| 1123 def fixup(self): |
| 1124 pass |
| 1125 |
| 1126 def is_of_kind(self, kind): |
| 1127 raise NotImplementedError() |
| 1128 |
| 1129 def clear(self): |
| 1130 raise NotImplementedError() |
| 1131 |
| 1132 def __getattr__(self, name): raise NotImplementedError() |
| 1133 def __setattr__(self, name, value): raise NotImplementedError() |
| 1134 |
| 1135 def pairs(self): |
| 1136 """Return list of (key, value) pairs suitable for passing to urlencode. |
| 1137 """ |
| 1138 return [(k, v) for (i, k, v) in self._totally_ordered_pairs()] |
| 1139 |
| 1140 def _totally_ordered_pairs(self): |
| 1141 """Return list of (key, value, index) tuples. |
| 1142 |
| 1143 Like pairs, but allows preserving correct ordering even where several |
| 1144 controls are involved. |
| 1145 |
| 1146 """ |
| 1147 raise NotImplementedError() |
| 1148 |
| 1149 def _write_mime_data(self, mw, name, value): |
| 1150 """Write data for a subitem of this control to a MimeWriter.""" |
| 1151 # called by HTMLForm |
| 1152 mw2 = mw.nextpart() |
| 1153 mw2.addheader("Content-Disposition", |
| 1154 'form-data; name="%s"' % name, 1) |
| 1155 f = mw2.startbody(prefix=0) |
| 1156 f.write(value) |
| 1157 |
| 1158 def __str__(self): |
| 1159 raise NotImplementedError() |
| 1160 |
| 1161 def get_labels(self): |
| 1162 """Return all labels (Label instances) for this control. |
| 1163 |
| 1164 If the control was surrounded by a <label> tag, that will be the first |
| 1165 label; all other labels, connected by 'for' and 'id', are in the order |
| 1166 that appear in the HTML. |
| 1167 |
| 1168 """ |
| 1169 res = [] |
| 1170 if self._label: |
| 1171 res.append(self._label) |
| 1172 if self.id: |
| 1173 res.extend(self._form._id_to_labels.get(self.id, ())) |
| 1174 return res |
| 1175 |
| 1176 |
| 1177 #--------------------------------------------------- |
| 1178 class ScalarControl(Control): |
| 1179 """Control whose value is not restricted to one of a prescribed set. |
| 1180 |
| 1181 Some ScalarControls don't accept any value attribute. Otherwise, takes a |
| 1182 single value, which must be string-like. |
| 1183 |
| 1184 Additional read-only public attribute: |
| 1185 |
| 1186 attrs: dictionary mapping the names of original HTML attributes of the |
| 1187 control to their values |
| 1188 |
| 1189 """ |
| 1190 def __init__(self, type, name, attrs, index=None): |
| 1191 self._index = index |
| 1192 self._label = _get_label(attrs) |
| 1193 self.__dict__["type"] = type.lower() |
| 1194 self.__dict__["name"] = name |
| 1195 self._value = attrs.get("value") |
| 1196 self.disabled = attrs.has_key("disabled") |
| 1197 self.readonly = attrs.has_key("readonly") |
| 1198 self.id = attrs.get("id") |
| 1199 |
| 1200 self.attrs = attrs.copy() |
| 1201 |
| 1202 self._clicked = False |
| 1203 |
| 1204 self._urlparse = urlparse.urlparse |
| 1205 self._urlunparse = urlparse.urlunparse |
| 1206 |
| 1207 def __getattr__(self, name): |
| 1208 if name == "value": |
| 1209 return self.__dict__["_value"] |
| 1210 else: |
| 1211 raise AttributeError("%s instance has no attribute '%s'" % |
| 1212 (self.__class__.__name__, name)) |
| 1213 |
| 1214 def __setattr__(self, name, value): |
| 1215 if name == "value": |
| 1216 if not isstringlike(value): |
| 1217 raise TypeError("must assign a string") |
| 1218 elif self.readonly: |
| 1219 raise AttributeError("control '%s' is readonly" % self.name) |
| 1220 elif self.disabled: |
| 1221 raise AttributeError("control '%s' is disabled" % self.name) |
| 1222 self.__dict__["_value"] = value |
| 1223 elif name in ("name", "type"): |
| 1224 raise AttributeError("%s attribute is readonly" % name) |
| 1225 else: |
| 1226 self.__dict__[name] = value |
| 1227 |
| 1228 def _totally_ordered_pairs(self): |
| 1229 name = self.name |
| 1230 value = self.value |
| 1231 if name is None or value is None or self.disabled: |
| 1232 return [] |
| 1233 return [(self._index, name, value)] |
| 1234 |
| 1235 def clear(self): |
| 1236 if self.readonly: |
| 1237 raise AttributeError("control '%s' is readonly" % self.name) |
| 1238 self.__dict__["_value"] = None |
| 1239 |
| 1240 def __str__(self): |
| 1241 name = self.name |
| 1242 value = self.value |
| 1243 if name is None: name = "<None>" |
| 1244 if value is None: value = "<None>" |
| 1245 |
| 1246 infos = [] |
| 1247 if self.disabled: infos.append("disabled") |
| 1248 if self.readonly: infos.append("readonly") |
| 1249 info = ", ".join(infos) |
| 1250 if info: info = " (%s)" % info |
| 1251 |
| 1252 return "<%s(%s=%s)%s>" % (self.__class__.__name__, name, value, info) |
| 1253 |
| 1254 |
| 1255 #--------------------------------------------------- |
| 1256 class TextControl(ScalarControl): |
| 1257 """Textual input control. |
| 1258 |
| 1259 Covers: |
| 1260 |
| 1261 INPUT/TEXT |
| 1262 INPUT/PASSWORD |
| 1263 INPUT/HIDDEN |
| 1264 TEXTAREA |
| 1265 |
| 1266 """ |
| 1267 def __init__(self, type, name, attrs, index=None): |
| 1268 ScalarControl.__init__(self, type, name, attrs, index) |
| 1269 if self.type == "hidden": self.readonly = True |
| 1270 if self._value is None: |
| 1271 self._value = "" |
| 1272 |
| 1273 def is_of_kind(self, kind): return kind == "text" |
| 1274 |
| 1275 #--------------------------------------------------- |
| 1276 class FileControl(ScalarControl): |
| 1277 """File upload with INPUT TYPE=FILE. |
| 1278 |
| 1279 The value attribute of a FileControl is always None. Use add_file instead. |
| 1280 |
| 1281 Additional public method: add_file |
| 1282 |
| 1283 """ |
| 1284 |
| 1285 def __init__(self, type, name, attrs, index=None): |
| 1286 ScalarControl.__init__(self, type, name, attrs, index) |
| 1287 self._value = None |
| 1288 self._upload_data = [] |
| 1289 |
| 1290 def is_of_kind(self, kind): return kind == "file" |
| 1291 |
| 1292 def clear(self): |
| 1293 if self.readonly: |
| 1294 raise AttributeError("control '%s' is readonly" % self.name) |
| 1295 self._upload_data = [] |
| 1296 |
| 1297 def __setattr__(self, name, value): |
| 1298 if name in ("value", "name", "type"): |
| 1299 raise AttributeError("%s attribute is readonly" % name) |
| 1300 else: |
| 1301 self.__dict__[name] = value |
| 1302 |
| 1303 def add_file(self, file_object, content_type=None, filename=None): |
| 1304 if not hasattr(file_object, "read"): |
| 1305 raise TypeError("file-like object must have read method") |
| 1306 if content_type is not None and not isstringlike(content_type): |
| 1307 raise TypeError("content type must be None or string-like") |
| 1308 if filename is not None and not isstringlike(filename): |
| 1309 raise TypeError("filename must be None or string-like") |
| 1310 if content_type is None: |
| 1311 content_type = "application/octet-stream" |
| 1312 self._upload_data.append((file_object, content_type, filename)) |
| 1313 |
| 1314 def _totally_ordered_pairs(self): |
| 1315 # XXX should it be successful even if unnamed? |
| 1316 if self.name is None or self.disabled: |
| 1317 return [] |
| 1318 return [(self._index, self.name, "")] |
| 1319 |
| 1320 # If enctype is application/x-www-form-urlencoded and there's a FILE |
| 1321 # control present, what should be sent? Strictly, it should be 'name=data' |
| 1322 # (see HTML 4.01 spec., section 17.13.2), but code sends "name=" ATM. What |
| 1323 # about multiple file upload? |
| 1324 def _write_mime_data(self, mw, _name, _value): |
| 1325 # called by HTMLForm |
| 1326 # assert _name == self.name and _value == '' |
| 1327 if len(self._upload_data) < 2: |
| 1328 if len(self._upload_data) == 0: |
| 1329 file_object = StringIO() |
| 1330 content_type = "application/octet-stream" |
| 1331 filename = "" |
| 1332 else: |
| 1333 file_object, content_type, filename = self._upload_data[0] |
| 1334 if filename is None: |
| 1335 filename = "" |
| 1336 mw2 = mw.nextpart() |
| 1337 fn_part = '; filename="%s"' % filename |
| 1338 disp = 'form-data; name="%s"%s' % (self.name, fn_part) |
| 1339 mw2.addheader("Content-Disposition", disp, prefix=1) |
| 1340 fh = mw2.startbody(content_type, prefix=0) |
| 1341 fh.write(file_object.read()) |
| 1342 else: |
| 1343 # multiple files |
| 1344 mw2 = mw.nextpart() |
| 1345 disp = 'form-data; name="%s"' % self.name |
| 1346 mw2.addheader("Content-Disposition", disp, prefix=1) |
| 1347 fh = mw2.startmultipartbody("mixed", prefix=0) |
| 1348 for file_object, content_type, filename in self._upload_data: |
| 1349 mw3 = mw2.nextpart() |
| 1350 if filename is None: |
| 1351 filename = "" |
| 1352 fn_part = '; filename="%s"' % filename |
| 1353 disp = "file%s" % fn_part |
| 1354 mw3.addheader("Content-Disposition", disp, prefix=1) |
| 1355 fh2 = mw3.startbody(content_type, prefix=0) |
| 1356 fh2.write(file_object.read()) |
| 1357 mw2.lastpart() |
| 1358 |
| 1359 def __str__(self): |
| 1360 name = self.name |
| 1361 if name is None: name = "<None>" |
| 1362 |
| 1363 if not self._upload_data: |
| 1364 value = "<No files added>" |
| 1365 else: |
| 1366 value = [] |
| 1367 for file, ctype, filename in self._upload_data: |
| 1368 if filename is None: |
| 1369 value.append("<Unnamed file>") |
| 1370 else: |
| 1371 value.append(filename) |
| 1372 value = ", ".join(value) |
| 1373 |
| 1374 info = [] |
| 1375 if self.disabled: info.append("disabled") |
| 1376 if self.readonly: info.append("readonly") |
| 1377 info = ", ".join(info) |
| 1378 if info: info = " (%s)" % info |
| 1379 |
| 1380 return "<%s(%s=%s)%s>" % (self.__class__.__name__, name, value, info) |
| 1381 |
| 1382 |
| 1383 #--------------------------------------------------- |
| 1384 class IsindexControl(ScalarControl): |
| 1385 """ISINDEX control. |
| 1386 |
| 1387 ISINDEX is the odd-one-out of HTML form controls. In fact, it isn't really |
| 1388 part of regular HTML forms at all, and predates it. You're only allowed |
| 1389 one ISINDEX per HTML document. ISINDEX and regular form submission are |
| 1390 mutually exclusive -- either submit a form, or the ISINDEX. |
| 1391 |
| 1392 Having said this, since ISINDEX controls may appear in forms (which is |
| 1393 probably bad HTML), ParseFile / ParseResponse will include them in the |
| 1394 HTMLForm instances it returns. You can set the ISINDEX's value, as with |
| 1395 any other control (but note that ISINDEX controls have no name, so you'll |
| 1396 need to use the type argument of set_value!). When you submit the form, |
| 1397 the ISINDEX will not be successful (ie., no data will get returned to the |
| 1398 server as a result of its presence), unless you click on the ISINDEX |
| 1399 control, in which case the ISINDEX gets submitted instead of the form: |
| 1400 |
| 1401 form.set_value("my isindex value", type="isindex") |
| 1402 mechanize.urlopen(form.click(type="isindex")) |
| 1403 |
| 1404 ISINDEX elements outside of FORMs are ignored. If you want to submit one |
| 1405 by hand, do it like so: |
| 1406 |
| 1407 url = urlparse.urljoin(page_uri, "?"+urllib.quote_plus("my isindex value")) |
| 1408 result = mechanize.urlopen(url) |
| 1409 |
| 1410 """ |
| 1411 def __init__(self, type, name, attrs, index=None): |
| 1412 ScalarControl.__init__(self, type, name, attrs, index) |
| 1413 if self._value is None: |
| 1414 self._value = "" |
| 1415 |
| 1416 def is_of_kind(self, kind): return kind in ["text", "clickable"] |
| 1417 |
| 1418 def _totally_ordered_pairs(self): |
| 1419 return [] |
| 1420 |
| 1421 def _click(self, form, coord, return_type, request_class=_request.Request): |
| 1422 # Relative URL for ISINDEX submission: instead of "foo=bar+baz", |
| 1423 # want "bar+baz". |
| 1424 # This doesn't seem to be specified in HTML 4.01 spec. (ISINDEX is |
| 1425 # deprecated in 4.01, but it should still say how to submit it). |
| 1426 # Submission of ISINDEX is explained in the HTML 3.2 spec, though. |
| 1427 parts = self._urlparse(form.action) |
| 1428 rest, (query, frag) = parts[:-2], parts[-2:] |
| 1429 parts = rest + (urllib.quote_plus(self.value), None) |
| 1430 url = self._urlunparse(parts) |
| 1431 req_data = url, None, [] |
| 1432 |
| 1433 if return_type == "pairs": |
| 1434 return [] |
| 1435 elif return_type == "request_data": |
| 1436 return req_data |
| 1437 else: |
| 1438 return request_class(url) |
| 1439 |
| 1440 def __str__(self): |
| 1441 value = self.value |
| 1442 if value is None: value = "<None>" |
| 1443 |
| 1444 infos = [] |
| 1445 if self.disabled: infos.append("disabled") |
| 1446 if self.readonly: infos.append("readonly") |
| 1447 info = ", ".join(infos) |
| 1448 if info: info = " (%s)" % info |
| 1449 |
| 1450 return "<%s(%s)%s>" % (self.__class__.__name__, value, info) |
| 1451 |
| 1452 |
| 1453 #--------------------------------------------------- |
| 1454 class IgnoreControl(ScalarControl): |
| 1455 """Control that we're not interested in. |
| 1456 |
| 1457 Covers: |
| 1458 |
| 1459 INPUT/RESET |
| 1460 BUTTON/RESET |
| 1461 INPUT/BUTTON |
| 1462 BUTTON/BUTTON |
| 1463 |
| 1464 These controls are always unsuccessful, in the terminology of HTML 4 (ie. |
| 1465 they never require any information to be returned to the server). |
| 1466 |
| 1467 BUTTON/BUTTON is used to generate events for script embedded in HTML. |
| 1468 |
| 1469 The value attribute of IgnoreControl is always None. |
| 1470 |
| 1471 """ |
| 1472 def __init__(self, type, name, attrs, index=None): |
| 1473 ScalarControl.__init__(self, type, name, attrs, index) |
| 1474 self._value = None |
| 1475 |
| 1476 def is_of_kind(self, kind): return False |
| 1477 |
| 1478 def __setattr__(self, name, value): |
| 1479 if name == "value": |
| 1480 raise AttributeError( |
| 1481 "control '%s' is ignored, hence read-only" % self.name) |
| 1482 elif name in ("name", "type"): |
| 1483 raise AttributeError("%s attribute is readonly" % name) |
| 1484 else: |
| 1485 self.__dict__[name] = value |
| 1486 |
| 1487 |
| 1488 #--------------------------------------------------- |
| 1489 # ListControls |
| 1490 |
| 1491 # helpers and subsidiary classes |
| 1492 |
| 1493 class Item: |
| 1494 def __init__(self, control, attrs, index=None): |
| 1495 label = _get_label(attrs) |
| 1496 self.__dict__.update({ |
| 1497 "name": attrs["value"], |
| 1498 "_labels": label and [label] or [], |
| 1499 "attrs": attrs, |
| 1500 "_control": control, |
| 1501 "disabled": attrs.has_key("disabled"), |
| 1502 "_selected": False, |
| 1503 "id": attrs.get("id"), |
| 1504 "_index": index, |
| 1505 }) |
| 1506 control.items.append(self) |
| 1507 |
| 1508 def get_labels(self): |
| 1509 """Return all labels (Label instances) for this item. |
| 1510 |
| 1511 For items that represent radio buttons or checkboxes, if the item was |
| 1512 surrounded by a <label> tag, that will be the first label; all other |
| 1513 labels, connected by 'for' and 'id', are in the order that appear in |
| 1514 the HTML. |
| 1515 |
| 1516 For items that represent select options, if the option had a label |
| 1517 attribute, that will be the first label. If the option has contents |
| 1518 (text within the option tags) and it is not the same as the label |
| 1519 attribute (if any), that will be a label. There is nothing in the |
| 1520 spec to my knowledge that makes an option with an id unable to be the |
| 1521 target of a label's for attribute, so those are included, if any, for |
| 1522 the sake of consistency and completeness. |
| 1523 |
| 1524 """ |
| 1525 res = [] |
| 1526 res.extend(self._labels) |
| 1527 if self.id: |
| 1528 res.extend(self._control._form._id_to_labels.get(self.id, ())) |
| 1529 return res |
| 1530 |
| 1531 def __getattr__(self, name): |
| 1532 if name=="selected": |
| 1533 return self._selected |
| 1534 raise AttributeError(name) |
| 1535 |
| 1536 def __setattr__(self, name, value): |
| 1537 if name == "selected": |
| 1538 self._control._set_selected_state(self, value) |
| 1539 elif name == "disabled": |
| 1540 self.__dict__["disabled"] = bool(value) |
| 1541 else: |
| 1542 raise AttributeError(name) |
| 1543 |
| 1544 def __str__(self): |
| 1545 res = self.name |
| 1546 if self.selected: |
| 1547 res = "*" + res |
| 1548 if self.disabled: |
| 1549 res = "(%s)" % res |
| 1550 return res |
| 1551 |
| 1552 def __repr__(self): |
| 1553 # XXX appending the attrs without distinguishing them from name and id |
| 1554 # is silly |
| 1555 attrs = [("name", self.name), ("id", self.id)]+self.attrs.items() |
| 1556 return "<%s %s>" % ( |
| 1557 self.__class__.__name__, |
| 1558 " ".join(["%s=%r" % (k, v) for k, v in attrs]) |
| 1559 ) |
| 1560 |
| 1561 def disambiguate(items, nr, **kwds): |
| 1562 msgs = [] |
| 1563 for key, value in kwds.items(): |
| 1564 msgs.append("%s=%r" % (key, value)) |
| 1565 msg = " ".join(msgs) |
| 1566 if not items: |
| 1567 raise ItemNotFoundError(msg) |
| 1568 if nr is None: |
| 1569 if len(items) > 1: |
| 1570 raise AmbiguityError(msg) |
| 1571 nr = 0 |
| 1572 if len(items) <= nr: |
| 1573 raise ItemNotFoundError(msg) |
| 1574 return items[nr] |
| 1575 |
| 1576 class ListControl(Control): |
| 1577 """Control representing a sequence of items. |
| 1578 |
| 1579 The value attribute of a ListControl represents the successful list items |
| 1580 in the control. The successful list items are those that are selected and |
| 1581 not disabled. |
| 1582 |
| 1583 ListControl implements both list controls that take a length-1 value |
| 1584 (single-selection) and those that take length >1 values |
| 1585 (multiple-selection). |
| 1586 |
| 1587 ListControls accept sequence values only. Some controls only accept |
| 1588 sequences of length 0 or 1 (RADIO, and single-selection SELECT). |
| 1589 In those cases, ItemCountError is raised if len(sequence) > 1. CHECKBOXes |
| 1590 and multiple-selection SELECTs (those having the "multiple" HTML attribute) |
| 1591 accept sequences of any length. |
| 1592 |
| 1593 Note the following mistake: |
| 1594 |
| 1595 control.value = some_value |
| 1596 assert control.value == some_value # not necessarily true |
| 1597 |
| 1598 The reason for this is that the value attribute always gives the list items |
| 1599 in the order they were listed in the HTML. |
| 1600 |
| 1601 ListControl items can also be referred to by their labels instead of names. |
| 1602 Use the label argument to .get(), and the .set_value_by_label(), |
| 1603 .get_value_by_label() methods. |
| 1604 |
| 1605 Note that, rather confusingly, though SELECT controls are represented in |
| 1606 HTML by SELECT elements (which contain OPTION elements, representing |
| 1607 individual list items), CHECKBOXes and RADIOs are not represented by *any* |
| 1608 element. Instead, those controls are represented by a collection of INPUT |
| 1609 elements. For example, this is a SELECT control, named "control1": |
| 1610 |
| 1611 <select name="control1"> |
| 1612 <option>foo</option> |
| 1613 <option value="1">bar</option> |
| 1614 </select> |
| 1615 |
| 1616 and this is a CHECKBOX control, named "control2": |
| 1617 |
| 1618 <input type="checkbox" name="control2" value="foo" id="cbe1"> |
| 1619 <input type="checkbox" name="control2" value="bar" id="cbe2"> |
| 1620 |
| 1621 The id attribute of a CHECKBOX or RADIO ListControl is always that of its |
| 1622 first element (for example, "cbe1" above). |
| 1623 |
| 1624 |
| 1625 Additional read-only public attribute: multiple. |
| 1626 |
| 1627 """ |
| 1628 |
| 1629 # ListControls are built up by the parser from their component items by |
| 1630 # creating one ListControl per item, consolidating them into a single |
| 1631 # master ListControl held by the HTMLForm: |
| 1632 |
| 1633 # -User calls form.new_control(...) |
| 1634 # -Form creates Control, and calls control.add_to_form(self). |
| 1635 # -Control looks for a Control with the same name and type in the form, |
| 1636 # and if it finds one, merges itself with that control by calling |
| 1637 # control.merge_control(self). The first Control added to the form, of |
| 1638 # a particular name and type, is the only one that survives in the |
| 1639 # form. |
| 1640 # -Form calls control.fixup for all its controls. ListControls in the |
| 1641 # form know they can now safely pick their default values. |
| 1642 |
| 1643 # To create a ListControl without an HTMLForm, use: |
| 1644 |
| 1645 # control.merge_control(new_control) |
| 1646 |
| 1647 # (actually, it's much easier just to use ParseFile) |
| 1648 |
| 1649 _label = None |
| 1650 |
| 1651 def __init__(self, type, name, attrs={}, select_default=False, |
| 1652 called_as_base_class=False, index=None): |
| 1653 """ |
| 1654 select_default: for RADIO and multiple-selection SELECT controls, pick |
| 1655 the first item as the default if no 'selected' HTML attribute is |
| 1656 present |
| 1657 |
| 1658 """ |
| 1659 if not called_as_base_class: |
| 1660 raise NotImplementedError() |
| 1661 |
| 1662 self.__dict__["type"] = type.lower() |
| 1663 self.__dict__["name"] = name |
| 1664 self._value = attrs.get("value") |
| 1665 self.disabled = False |
| 1666 self.readonly = False |
| 1667 self.id = attrs.get("id") |
| 1668 self._closed = False |
| 1669 |
| 1670 # As Controls are merged in with .merge_control(), self.attrs will |
| 1671 # refer to each Control in turn -- always the most recently merged |
| 1672 # control. Each merged-in Control instance corresponds to a single |
| 1673 # list item: see ListControl.__doc__. |
| 1674 self.items = [] |
| 1675 self._form = None |
| 1676 |
| 1677 self._select_default = select_default |
| 1678 self._clicked = False |
| 1679 |
| 1680 def clear(self): |
| 1681 self.value = [] |
| 1682 |
| 1683 def is_of_kind(self, kind): |
| 1684 if kind == "list": |
| 1685 return True |
| 1686 elif kind == "multilist": |
| 1687 return bool(self.multiple) |
| 1688 elif kind == "singlelist": |
| 1689 return not self.multiple |
| 1690 else: |
| 1691 return False |
| 1692 |
| 1693 def get_items(self, name=None, label=None, id=None, |
| 1694 exclude_disabled=False): |
| 1695 """Return matching items by name or label. |
| 1696 |
| 1697 For argument docs, see the docstring for .get() |
| 1698 |
| 1699 """ |
| 1700 if name is not None and not isstringlike(name): |
| 1701 raise TypeError("item name must be string-like") |
| 1702 if label is not None and not isstringlike(label): |
| 1703 raise TypeError("item label must be string-like") |
| 1704 if id is not None and not isstringlike(id): |
| 1705 raise TypeError("item id must be string-like") |
| 1706 items = [] # order is important |
| 1707 compat = self._form.backwards_compat |
| 1708 for o in self.items: |
| 1709 if exclude_disabled and o.disabled: |
| 1710 continue |
| 1711 if name is not None and o.name != name: |
| 1712 continue |
| 1713 if label is not None: |
| 1714 for l in o.get_labels(): |
| 1715 if ((compat and l.text == label) or |
| 1716 (not compat and l.text.find(label) > -1)): |
| 1717 break |
| 1718 else: |
| 1719 continue |
| 1720 if id is not None and o.id != id: |
| 1721 continue |
| 1722 items.append(o) |
| 1723 return items |
| 1724 |
| 1725 def get(self, name=None, label=None, id=None, nr=None, |
| 1726 exclude_disabled=False): |
| 1727 """Return item by name or label, disambiguating if necessary with nr. |
| 1728 |
| 1729 All arguments must be passed by name, with the exception of 'name', |
| 1730 which may be used as a positional argument. |
| 1731 |
| 1732 If name is specified, then the item must have the indicated name. |
| 1733 |
| 1734 If label is specified, then the item must have a label whose |
| 1735 whitespace-compressed, stripped, text substring-matches the indicated |
| 1736 label string (e.g. label="please choose" will match |
| 1737 " Do please choose an item "). |
| 1738 |
| 1739 If id is specified, then the item must have the indicated id. |
| 1740 |
| 1741 nr is an optional 0-based index of the items matching the query. |
| 1742 |
| 1743 If nr is the default None value and more than item is found, raises |
| 1744 AmbiguityError (unless the HTMLForm instance's backwards_compat |
| 1745 attribute is true). |
| 1746 |
| 1747 If no item is found, or if items are found but nr is specified and not |
| 1748 found, raises ItemNotFoundError. |
| 1749 |
| 1750 Optionally excludes disabled items. |
| 1751 |
| 1752 """ |
| 1753 if nr is None and self._form.backwards_compat: |
| 1754 nr = 0 # :-/ |
| 1755 items = self.get_items(name, label, id, exclude_disabled) |
| 1756 return disambiguate(items, nr, name=name, label=label, id=id) |
| 1757 |
| 1758 def _get(self, name, by_label=False, nr=None, exclude_disabled=False): |
| 1759 # strictly for use by deprecated methods |
| 1760 if by_label: |
| 1761 name, label = None, name |
| 1762 else: |
| 1763 name, label = name, None |
| 1764 return self.get(name, label, nr, exclude_disabled) |
| 1765 |
| 1766 def toggle(self, name, by_label=False, nr=None): |
| 1767 """Deprecated: given a name or label and optional disambiguating index |
| 1768 nr, toggle the matching item's selection. |
| 1769 |
| 1770 Selecting items follows the behavior described in the docstring of the |
| 1771 'get' method. |
| 1772 |
| 1773 if the item is disabled, or this control is disabled or readonly, |
| 1774 raise AttributeError. |
| 1775 |
| 1776 """ |
| 1777 deprecation( |
| 1778 "item = control.get(...); item.selected = not item.selected") |
| 1779 o = self._get(name, by_label, nr) |
| 1780 self._set_selected_state(o, not o.selected) |
| 1781 |
| 1782 def set(self, selected, name, by_label=False, nr=None): |
| 1783 """Deprecated: given a name or label and optional disambiguating index |
| 1784 nr, set the matching item's selection to the bool value of selected. |
| 1785 |
| 1786 Selecting items follows the behavior described in the docstring of the |
| 1787 'get' method. |
| 1788 |
| 1789 if the item is disabled, or this control is disabled or readonly, |
| 1790 raise AttributeError. |
| 1791 |
| 1792 """ |
| 1793 deprecation( |
| 1794 "control.get(...).selected = <boolean>") |
| 1795 self._set_selected_state(self._get(name, by_label, nr), selected) |
| 1796 |
| 1797 def _set_selected_state(self, item, action): |
| 1798 # action: |
| 1799 # bool False: off |
| 1800 # bool True: on |
| 1801 if self.disabled: |
| 1802 raise AttributeError("control '%s' is disabled" % self.name) |
| 1803 if self.readonly: |
| 1804 raise AttributeError("control '%s' is readonly" % self.name) |
| 1805 action == bool(action) |
| 1806 compat = self._form.backwards_compat |
| 1807 if not compat and item.disabled: |
| 1808 raise AttributeError("item is disabled") |
| 1809 else: |
| 1810 if compat and item.disabled and action: |
| 1811 raise AttributeError("item is disabled") |
| 1812 if self.multiple: |
| 1813 item.__dict__["_selected"] = action |
| 1814 else: |
| 1815 if not action: |
| 1816 item.__dict__["_selected"] = False |
| 1817 else: |
| 1818 for o in self.items: |
| 1819 o.__dict__["_selected"] = False |
| 1820 item.__dict__["_selected"] = True |
| 1821 |
| 1822 def toggle_single(self, by_label=None): |
| 1823 """Deprecated: toggle the selection of the single item in this control. |
| 1824 |
| 1825 Raises ItemCountError if the control does not contain only one item. |
| 1826 |
| 1827 by_label argument is ignored, and included only for backwards |
| 1828 compatibility. |
| 1829 |
| 1830 """ |
| 1831 deprecation( |
| 1832 "control.items[0].selected = not control.items[0].selected") |
| 1833 if len(self.items) != 1: |
| 1834 raise ItemCountError( |
| 1835 "'%s' is not a single-item control" % self.name) |
| 1836 item = self.items[0] |
| 1837 self._set_selected_state(item, not item.selected) |
| 1838 |
| 1839 def set_single(self, selected, by_label=None): |
| 1840 """Deprecated: set the selection of the single item in this control. |
| 1841 |
| 1842 Raises ItemCountError if the control does not contain only one item. |
| 1843 |
| 1844 by_label argument is ignored, and included only for backwards |
| 1845 compatibility. |
| 1846 |
| 1847 """ |
| 1848 deprecation( |
| 1849 "control.items[0].selected = <boolean>") |
| 1850 if len(self.items) != 1: |
| 1851 raise ItemCountError( |
| 1852 "'%s' is not a single-item control" % self.name) |
| 1853 self._set_selected_state(self.items[0], selected) |
| 1854 |
| 1855 def get_item_disabled(self, name, by_label=False, nr=None): |
| 1856 """Get disabled state of named list item in a ListControl.""" |
| 1857 deprecation( |
| 1858 "control.get(...).disabled") |
| 1859 return self._get(name, by_label, nr).disabled |
| 1860 |
| 1861 def set_item_disabled(self, disabled, name, by_label=False, nr=None): |
| 1862 """Set disabled state of named list item in a ListControl. |
| 1863 |
| 1864 disabled: boolean disabled state |
| 1865 |
| 1866 """ |
| 1867 deprecation( |
| 1868 "control.get(...).disabled = <boolean>") |
| 1869 self._get(name, by_label, nr).disabled = disabled |
| 1870 |
| 1871 def set_all_items_disabled(self, disabled): |
| 1872 """Set disabled state of all list items in a ListControl. |
| 1873 |
| 1874 disabled: boolean disabled state |
| 1875 |
| 1876 """ |
| 1877 for o in self.items: |
| 1878 o.disabled = disabled |
| 1879 |
| 1880 def get_item_attrs(self, name, by_label=False, nr=None): |
| 1881 """Return dictionary of HTML attributes for a single ListControl item. |
| 1882 |
| 1883 The HTML element types that describe list items are: OPTION for SELECT |
| 1884 controls, INPUT for the rest. These elements have HTML attributes that |
| 1885 you may occasionally want to know about -- for example, the "alt" HTML |
| 1886 attribute gives a text string describing the item (graphical browsers |
| 1887 usually display this as a tooltip). |
| 1888 |
| 1889 The returned dictionary maps HTML attribute names to values. The names |
| 1890 and values are taken from the original HTML. |
| 1891 |
| 1892 """ |
| 1893 deprecation( |
| 1894 "control.get(...).attrs") |
| 1895 return self._get(name, by_label, nr).attrs |
| 1896 |
| 1897 def close_control(self): |
| 1898 self._closed = True |
| 1899 |
| 1900 def add_to_form(self, form): |
| 1901 assert self._form is None or form == self._form, ( |
| 1902 "can't add control to more than one form") |
| 1903 self._form = form |
| 1904 if self.name is None: |
| 1905 # always count nameless elements as separate controls |
| 1906 Control.add_to_form(self, form) |
| 1907 else: |
| 1908 for ii in range(len(form.controls)-1, -1, -1): |
| 1909 control = form.controls[ii] |
| 1910 if control.name == self.name and control.type == self.type: |
| 1911 if control._closed: |
| 1912 Control.add_to_form(self, form) |
| 1913 else: |
| 1914 control.merge_control(self) |
| 1915 break |
| 1916 else: |
| 1917 Control.add_to_form(self, form) |
| 1918 |
| 1919 def merge_control(self, control): |
| 1920 assert bool(control.multiple) == bool(self.multiple) |
| 1921 # usually, isinstance(control, self.__class__) |
| 1922 self.items.extend(control.items) |
| 1923 |
| 1924 def fixup(self): |
| 1925 """ |
| 1926 ListControls are built up from component list items (which are also |
| 1927 ListControls) during parsing. This method should be called after all |
| 1928 items have been added. See ListControl.__doc__ for the reason this is |
| 1929 required. |
| 1930 |
| 1931 """ |
| 1932 # Need to set default selection where no item was indicated as being |
| 1933 # selected by the HTML: |
| 1934 |
| 1935 # CHECKBOX: |
| 1936 # Nothing should be selected. |
| 1937 # SELECT/single, SELECT/multiple and RADIO: |
| 1938 # RFC 1866 (HTML 2.0): says first item should be selected. |
| 1939 # W3C HTML 4.01 Specification: says that client behaviour is |
| 1940 # undefined in this case. For RADIO, exactly one must be selected, |
| 1941 # though which one is undefined. |
| 1942 # Both Netscape and Microsoft Internet Explorer (IE) choose first |
| 1943 # item for SELECT/single. However, both IE5 and Mozilla (both 1.0 |
| 1944 # and Firebird 0.6) leave all items unselected for RADIO and |
| 1945 # SELECT/multiple. |
| 1946 |
| 1947 # Since both Netscape and IE all choose the first item for |
| 1948 # SELECT/single, we do the same. OTOH, both Netscape and IE |
| 1949 # leave SELECT/multiple with nothing selected, in violation of RFC 1866 |
| 1950 # (but not in violation of the W3C HTML 4 standard); the same is true |
| 1951 # of RADIO (which *is* in violation of the HTML 4 standard). We follow |
| 1952 # RFC 1866 if the _select_default attribute is set, and Netscape and IE |
| 1953 # otherwise. RFC 1866 and HTML 4 are always violated insofar as you |
| 1954 # can deselect all items in a RadioControl. |
| 1955 |
| 1956 for o in self.items: |
| 1957 # set items' controls to self, now that we've merged |
| 1958 o.__dict__["_control"] = self |
| 1959 |
| 1960 def __getattr__(self, name): |
| 1961 if name == "value": |
| 1962 compat = self._form.backwards_compat |
| 1963 if self.name is None: |
| 1964 return [] |
| 1965 return [o.name for o in self.items if o.selected and |
| 1966 (not o.disabled or compat)] |
| 1967 else: |
| 1968 raise AttributeError("%s instance has no attribute '%s'" % |
| 1969 (self.__class__.__name__, name)) |
| 1970 |
| 1971 def __setattr__(self, name, value): |
| 1972 if name == "value": |
| 1973 if self.disabled: |
| 1974 raise AttributeError("control '%s' is disabled" % self.name) |
| 1975 if self.readonly: |
| 1976 raise AttributeError("control '%s' is readonly" % self.name) |
| 1977 self._set_value(value) |
| 1978 elif name in ("name", "type", "multiple"): |
| 1979 raise AttributeError("%s attribute is readonly" % name) |
| 1980 else: |
| 1981 self.__dict__[name] = value |
| 1982 |
| 1983 def _set_value(self, value): |
| 1984 if value is None or isstringlike(value): |
| 1985 raise TypeError("ListControl, must set a sequence") |
| 1986 if not value: |
| 1987 compat = self._form.backwards_compat |
| 1988 for o in self.items: |
| 1989 if not o.disabled or compat: |
| 1990 o.selected = False |
| 1991 elif self.multiple: |
| 1992 self._multiple_set_value(value) |
| 1993 elif len(value) > 1: |
| 1994 raise ItemCountError( |
| 1995 "single selection list, must set sequence of " |
| 1996 "length 0 or 1") |
| 1997 else: |
| 1998 self._single_set_value(value) |
| 1999 |
| 2000 def _get_items(self, name, target=1): |
| 2001 all_items = self.get_items(name) |
| 2002 items = [o for o in all_items if not o.disabled] |
| 2003 if len(items) < target: |
| 2004 if len(all_items) < target: |
| 2005 raise ItemNotFoundError( |
| 2006 "insufficient items with name %r" % name) |
| 2007 else: |
| 2008 raise AttributeError( |
| 2009 "insufficient non-disabled items with name %s" % name) |
| 2010 on = [] |
| 2011 off = [] |
| 2012 for o in items: |
| 2013 if o.selected: |
| 2014 on.append(o) |
| 2015 else: |
| 2016 off.append(o) |
| 2017 return on, off |
| 2018 |
| 2019 def _single_set_value(self, value): |
| 2020 assert len(value) == 1 |
| 2021 on, off = self._get_items(value[0]) |
| 2022 assert len(on) <= 1 |
| 2023 if not on: |
| 2024 off[0].selected = True |
| 2025 |
| 2026 def _multiple_set_value(self, value): |
| 2027 compat = self._form.backwards_compat |
| 2028 turn_on = [] # transactional-ish |
| 2029 turn_off = [item for item in self.items if |
| 2030 item.selected and (not item.disabled or compat)] |
| 2031 names = {} |
| 2032 for nn in value: |
| 2033 if nn in names.keys(): |
| 2034 names[nn] += 1 |
| 2035 else: |
| 2036 names[nn] = 1 |
| 2037 for name, count in names.items(): |
| 2038 on, off = self._get_items(name, count) |
| 2039 for i in range(count): |
| 2040 if on: |
| 2041 item = on[0] |
| 2042 del on[0] |
| 2043 del turn_off[turn_off.index(item)] |
| 2044 else: |
| 2045 item = off[0] |
| 2046 del off[0] |
| 2047 turn_on.append(item) |
| 2048 for item in turn_off: |
| 2049 item.selected = False |
| 2050 for item in turn_on: |
| 2051 item.selected = True |
| 2052 |
| 2053 def set_value_by_label(self, value): |
| 2054 """Set the value of control by item labels. |
| 2055 |
| 2056 value is expected to be an iterable of strings that are substrings of |
| 2057 the item labels that should be selected. Before substring matching is |
| 2058 performed, the original label text is whitespace-compressed |
| 2059 (consecutive whitespace characters are converted to a single space |
| 2060 character) and leading and trailing whitespace is stripped. Ambiguous |
| 2061 labels are accepted without complaint if the form's backwards_compat is |
| 2062 True; otherwise, it will not complain as long as all ambiguous labels |
| 2063 share the same item name (e.g. OPTION value). |
| 2064 |
| 2065 """ |
| 2066 if isstringlike(value): |
| 2067 raise TypeError(value) |
| 2068 if not self.multiple and len(value) > 1: |
| 2069 raise ItemCountError( |
| 2070 "single selection list, must set sequence of " |
| 2071 "length 0 or 1") |
| 2072 items = [] |
| 2073 for nn in value: |
| 2074 found = self.get_items(label=nn) |
| 2075 if len(found) > 1: |
| 2076 if not self._form.backwards_compat: |
| 2077 # ambiguous labels are fine as long as item names (e.g. |
| 2078 # OPTION values) are same |
| 2079 opt_name = found[0].name |
| 2080 if [o for o in found[1:] if o.name != opt_name]: |
| 2081 raise AmbiguityError(nn) |
| 2082 else: |
| 2083 # OK, we'll guess :-( Assume first available item. |
| 2084 found = found[:1] |
| 2085 for o in found: |
| 2086 # For the multiple-item case, we could try to be smarter, |
| 2087 # saving them up and trying to resolve, but that's too much. |
| 2088 if self._form.backwards_compat or o not in items: |
| 2089 items.append(o) |
| 2090 break |
| 2091 else: # all of them are used |
| 2092 raise ItemNotFoundError(nn) |
| 2093 # now we have all the items that should be on |
| 2094 # let's just turn everything off and then back on. |
| 2095 self.value = [] |
| 2096 for o in items: |
| 2097 o.selected = True |
| 2098 |
| 2099 def get_value_by_label(self): |
| 2100 """Return the value of the control as given by normalized labels.""" |
| 2101 res = [] |
| 2102 compat = self._form.backwards_compat |
| 2103 for o in self.items: |
| 2104 if (not o.disabled or compat) and o.selected: |
| 2105 for l in o.get_labels(): |
| 2106 if l.text: |
| 2107 res.append(l.text) |
| 2108 break |
| 2109 else: |
| 2110 res.append(None) |
| 2111 return res |
| 2112 |
| 2113 def possible_items(self, by_label=False): |
| 2114 """Deprecated: return the names or labels of all possible items. |
| 2115 |
| 2116 Includes disabled items, which may be misleading for some use cases. |
| 2117 |
| 2118 """ |
| 2119 deprecation( |
| 2120 "[item.name for item in self.items]") |
| 2121 if by_label: |
| 2122 res = [] |
| 2123 for o in self.items: |
| 2124 for l in o.get_labels(): |
| 2125 if l.text: |
| 2126 res.append(l.text) |
| 2127 break |
| 2128 else: |
| 2129 res.append(None) |
| 2130 return res |
| 2131 return [o.name for o in self.items] |
| 2132 |
| 2133 def _totally_ordered_pairs(self): |
| 2134 if self.disabled or self.name is None: |
| 2135 return [] |
| 2136 else: |
| 2137 return [(o._index, self.name, o.name) for o in self.items |
| 2138 if o.selected and not o.disabled] |
| 2139 |
| 2140 def __str__(self): |
| 2141 name = self.name |
| 2142 if name is None: name = "<None>" |
| 2143 |
| 2144 display = [str(o) for o in self.items] |
| 2145 |
| 2146 infos = [] |
| 2147 if self.disabled: infos.append("disabled") |
| 2148 if self.readonly: infos.append("readonly") |
| 2149 info = ", ".join(infos) |
| 2150 if info: info = " (%s)" % info |
| 2151 |
| 2152 return "<%s(%s=[%s])%s>" % (self.__class__.__name__, |
| 2153 name, ", ".join(display), info) |
| 2154 |
| 2155 |
| 2156 class RadioControl(ListControl): |
| 2157 """ |
| 2158 Covers: |
| 2159 |
| 2160 INPUT/RADIO |
| 2161 |
| 2162 """ |
| 2163 def __init__(self, type, name, attrs, select_default=False, index=None): |
| 2164 attrs.setdefault("value", "on") |
| 2165 ListControl.__init__(self, type, name, attrs, select_default, |
| 2166 called_as_base_class=True, index=index) |
| 2167 self.__dict__["multiple"] = False |
| 2168 o = Item(self, attrs, index) |
| 2169 o.__dict__["_selected"] = attrs.has_key("checked") |
| 2170 |
| 2171 def fixup(self): |
| 2172 ListControl.fixup(self) |
| 2173 found = [o for o in self.items if o.selected and not o.disabled] |
| 2174 if not found: |
| 2175 if self._select_default: |
| 2176 for o in self.items: |
| 2177 if not o.disabled: |
| 2178 o.selected = True |
| 2179 break |
| 2180 else: |
| 2181 # Ensure only one item selected. Choose the last one, |
| 2182 # following IE and Firefox. |
| 2183 for o in found[:-1]: |
| 2184 o.selected = False |
| 2185 |
| 2186 def get_labels(self): |
| 2187 return [] |
| 2188 |
| 2189 class CheckboxControl(ListControl): |
| 2190 """ |
| 2191 Covers: |
| 2192 |
| 2193 INPUT/CHECKBOX |
| 2194 |
| 2195 """ |
| 2196 def __init__(self, type, name, attrs, select_default=False, index=None): |
| 2197 attrs.setdefault("value", "on") |
| 2198 ListControl.__init__(self, type, name, attrs, select_default, |
| 2199 called_as_base_class=True, index=index) |
| 2200 self.__dict__["multiple"] = True |
| 2201 o = Item(self, attrs, index) |
| 2202 o.__dict__["_selected"] = attrs.has_key("checked") |
| 2203 |
| 2204 def get_labels(self): |
| 2205 return [] |
| 2206 |
| 2207 |
| 2208 class SelectControl(ListControl): |
| 2209 """ |
| 2210 Covers: |
| 2211 |
| 2212 SELECT (and OPTION) |
| 2213 |
| 2214 |
| 2215 OPTION 'values', in HTML parlance, are Item 'names' in mechanize parlance. |
| 2216 |
| 2217 SELECT control values and labels are subject to some messy defaulting |
| 2218 rules. For example, if the HTML representation of the control is: |
| 2219 |
| 2220 <SELECT name=year> |
| 2221 <OPTION value=0 label="2002">current year</OPTION> |
| 2222 <OPTION value=1>2001</OPTION> |
| 2223 <OPTION>2000</OPTION> |
| 2224 </SELECT> |
| 2225 |
| 2226 The items, in order, have labels "2002", "2001" and "2000", whereas their |
| 2227 names (the OPTION values) are "0", "1" and "2000" respectively. Note that |
| 2228 the value of the last OPTION in this example defaults to its contents, as |
| 2229 specified by RFC 1866, as do the labels of the second and third OPTIONs. |
| 2230 |
| 2231 The OPTION labels are sometimes more meaningful than the OPTION values, |
| 2232 which can make for more maintainable code. |
| 2233 |
| 2234 Additional read-only public attribute: attrs |
| 2235 |
| 2236 The attrs attribute is a dictionary of the original HTML attributes of the |
| 2237 SELECT element. Other ListControls do not have this attribute, because in |
| 2238 other cases the control as a whole does not correspond to any single HTML |
| 2239 element. control.get(...).attrs may be used as usual to get at the HTML |
| 2240 attributes of the HTML elements corresponding to individual list items (for |
| 2241 SELECT controls, these are OPTION elements). |
| 2242 |
| 2243 Another special case is that the Item.attrs dictionaries have a special key |
| 2244 "contents" which does not correspond to any real HTML attribute, but rather |
| 2245 contains the contents of the OPTION element: |
| 2246 |
| 2247 <OPTION>this bit</OPTION> |
| 2248 |
| 2249 """ |
| 2250 # HTML attributes here are treated slightly differently from other list |
| 2251 # controls: |
| 2252 # -The SELECT HTML attributes dictionary is stuffed into the OPTION |
| 2253 # HTML attributes dictionary under the "__select" key. |
| 2254 # -The content of each OPTION element is stored under the special |
| 2255 # "contents" key of the dictionary. |
| 2256 # After all this, the dictionary is passed to the SelectControl constructor |
| 2257 # as the attrs argument, as usual. However: |
| 2258 # -The first SelectControl constructed when building up a SELECT control |
| 2259 # has a constructor attrs argument containing only the __select key -- so |
| 2260 # this SelectControl represents an empty SELECT control. |
| 2261 # -Subsequent SelectControls have both OPTION HTML-attribute in attrs and |
| 2262 # the __select dictionary containing the SELECT HTML-attributes. |
| 2263 |
| 2264 def __init__(self, type, name, attrs, select_default=False, index=None): |
| 2265 # fish out the SELECT HTML attributes from the OPTION HTML attributes |
| 2266 # dictionary |
| 2267 self.attrs = attrs["__select"].copy() |
| 2268 self.__dict__["_label"] = _get_label(self.attrs) |
| 2269 self.__dict__["id"] = self.attrs.get("id") |
| 2270 self.__dict__["multiple"] = self.attrs.has_key("multiple") |
| 2271 # the majority of the contents, label, and value dance already happened |
| 2272 contents = attrs.get("contents") |
| 2273 attrs = attrs.copy() |
| 2274 del attrs["__select"] |
| 2275 |
| 2276 ListControl.__init__(self, type, name, self.attrs, select_default, |
| 2277 called_as_base_class=True, index=index) |
| 2278 self.disabled = self.attrs.has_key("disabled") |
| 2279 self.readonly = self.attrs.has_key("readonly") |
| 2280 if attrs.has_key("value"): |
| 2281 # otherwise it is a marker 'select started' token |
| 2282 o = Item(self, attrs, index) |
| 2283 o.__dict__["_selected"] = attrs.has_key("selected") |
| 2284 # add 'label' label and contents label, if different. If both are |
| 2285 # provided, the 'label' label is used for display in HTML |
| 2286 # 4.0-compliant browsers (and any lower spec? not sure) while the |
| 2287 # contents are used for display in older or less-compliant |
| 2288 # browsers. We make label objects for both, if the values are |
| 2289 # different. |
| 2290 label = attrs.get("label") |
| 2291 if label: |
| 2292 o._labels.append(Label({"__text": label})) |
| 2293 if contents and contents != label: |
| 2294 o._labels.append(Label({"__text": contents})) |
| 2295 elif contents: |
| 2296 o._labels.append(Label({"__text": contents})) |
| 2297 |
| 2298 def fixup(self): |
| 2299 ListControl.fixup(self) |
| 2300 # Firefox doesn't exclude disabled items from those considered here |
| 2301 # (i.e. from 'found', for both branches of the if below). Note that |
| 2302 # IE6 doesn't support the disabled attribute on OPTIONs at all. |
| 2303 found = [o for o in self.items if o.selected] |
| 2304 if not found: |
| 2305 if not self.multiple or self._select_default: |
| 2306 for o in self.items: |
| 2307 if not o.disabled: |
| 2308 was_disabled = self.disabled |
| 2309 self.disabled = False |
| 2310 try: |
| 2311 o.selected = True |
| 2312 finally: |
| 2313 o.disabled = was_disabled |
| 2314 break |
| 2315 elif not self.multiple: |
| 2316 # Ensure only one item selected. Choose the last one, |
| 2317 # following IE and Firefox. |
| 2318 for o in found[:-1]: |
| 2319 o.selected = False |
| 2320 |
| 2321 |
| 2322 #--------------------------------------------------- |
| 2323 class SubmitControl(ScalarControl): |
| 2324 """ |
| 2325 Covers: |
| 2326 |
| 2327 INPUT/SUBMIT |
| 2328 BUTTON/SUBMIT |
| 2329 |
| 2330 """ |
| 2331 def __init__(self, type, name, attrs, index=None): |
| 2332 ScalarControl.__init__(self, type, name, attrs, index) |
| 2333 # IE5 defaults SUBMIT value to "Submit Query"; Firebird 0.6 leaves it |
| 2334 # blank, Konqueror 3.1 defaults to "Submit". HTML spec. doesn't seem |
| 2335 # to define this. |
| 2336 if self.value is None: self.value = "" |
| 2337 self.readonly = True |
| 2338 |
| 2339 def get_labels(self): |
| 2340 res = [] |
| 2341 if self.value: |
| 2342 res.append(Label({"__text": self.value})) |
| 2343 res.extend(ScalarControl.get_labels(self)) |
| 2344 return res |
| 2345 |
| 2346 def is_of_kind(self, kind): return kind == "clickable" |
| 2347 |
| 2348 def _click(self, form, coord, return_type, request_class=_request.Request): |
| 2349 self._clicked = coord |
| 2350 r = form._switch_click(return_type, request_class) |
| 2351 self._clicked = False |
| 2352 return r |
| 2353 |
| 2354 def _totally_ordered_pairs(self): |
| 2355 if not self._clicked: |
| 2356 return [] |
| 2357 return ScalarControl._totally_ordered_pairs(self) |
| 2358 |
| 2359 |
| 2360 #--------------------------------------------------- |
| 2361 class ImageControl(SubmitControl): |
| 2362 """ |
| 2363 Covers: |
| 2364 |
| 2365 INPUT/IMAGE |
| 2366 |
| 2367 Coordinates are specified using one of the HTMLForm.click* methods. |
| 2368 |
| 2369 """ |
| 2370 def __init__(self, type, name, attrs, index=None): |
| 2371 SubmitControl.__init__(self, type, name, attrs, index) |
| 2372 self.readonly = False |
| 2373 |
| 2374 def _totally_ordered_pairs(self): |
| 2375 clicked = self._clicked |
| 2376 if self.disabled or not clicked: |
| 2377 return [] |
| 2378 name = self.name |
| 2379 if name is None: return [] |
| 2380 pairs = [ |
| 2381 (self._index, "%s.x" % name, str(clicked[0])), |
| 2382 (self._index+1, "%s.y" % name, str(clicked[1])), |
| 2383 ] |
| 2384 value = self._value |
| 2385 if value: |
| 2386 pairs.append((self._index+2, name, value)) |
| 2387 return pairs |
| 2388 |
| 2389 get_labels = ScalarControl.get_labels |
| 2390 |
| 2391 # aliases, just to make str(control) and str(form) clearer |
| 2392 class PasswordControl(TextControl): pass |
| 2393 class HiddenControl(TextControl): pass |
| 2394 class TextareaControl(TextControl): pass |
| 2395 class SubmitButtonControl(SubmitControl): pass |
| 2396 |
| 2397 |
| 2398 def is_listcontrol(control): return control.is_of_kind("list") |
| 2399 |
| 2400 |
| 2401 class HTMLForm: |
| 2402 """Represents a single HTML <form> ... </form> element. |
| 2403 |
| 2404 A form consists of a sequence of controls that usually have names, and |
| 2405 which can take on various values. The values of the various types of |
| 2406 controls represent variously: text, zero-or-one-of-many or many-of-many |
| 2407 choices, and files to be uploaded. Some controls can be clicked on to |
| 2408 submit the form, and clickable controls' values sometimes include the |
| 2409 coordinates of the click. |
| 2410 |
| 2411 Forms can be filled in with data to be returned to the server, and then |
| 2412 submitted, using the click method to generate a request object suitable for |
| 2413 passing to mechanize.urlopen (or the click_request_data or click_pairs |
| 2414 methods for integration with third-party code). |
| 2415 |
| 2416 import mechanize |
| 2417 forms = mechanize.ParseFile(html, base_uri) |
| 2418 form = forms[0] |
| 2419 |
| 2420 form["query"] = "Python" |
| 2421 form.find_control("nr_results").get("lots").selected = True |
| 2422 |
| 2423 response = mechanize.urlopen(form.click()) |
| 2424 |
| 2425 Usually, HTMLForm instances are not created directly. Instead, the |
| 2426 ParseFile or ParseResponse factory functions are used. If you do construct |
| 2427 HTMLForm objects yourself, however, note that an HTMLForm instance is only |
| 2428 properly initialised after the fixup method has been called (ParseFile and |
| 2429 ParseResponse do this for you). See ListControl.__doc__ for the reason |
| 2430 this is required. |
| 2431 |
| 2432 Indexing a form (form["control_name"]) returns the named Control's value |
| 2433 attribute. Assignment to a form index (form["control_name"] = something) |
| 2434 is equivalent to assignment to the named Control's value attribute. If you |
| 2435 need to be more specific than just supplying the control's name, use the |
| 2436 set_value and get_value methods. |
| 2437 |
| 2438 ListControl values are lists of item names (specifically, the names of the |
| 2439 items that are selected and not disabled, and hence are "successful" -- ie. |
| 2440 cause data to be returned to the server). The list item's name is the |
| 2441 value of the corresponding HTML element's"value" attribute. |
| 2442 |
| 2443 Example: |
| 2444 |
| 2445 <INPUT type="CHECKBOX" name="cheeses" value="leicester"></INPUT> |
| 2446 <INPUT type="CHECKBOX" name="cheeses" value="cheddar"></INPUT> |
| 2447 |
| 2448 defines a CHECKBOX control with name "cheeses" which has two items, named |
| 2449 "leicester" and "cheddar". |
| 2450 |
| 2451 Another example: |
| 2452 |
| 2453 <SELECT name="more_cheeses"> |
| 2454 <OPTION>1</OPTION> |
| 2455 <OPTION value="2" label="CHEDDAR">cheddar</OPTION> |
| 2456 </SELECT> |
| 2457 |
| 2458 defines a SELECT control with name "more_cheeses" which has two items, |
| 2459 named "1" and "2" (because the OPTION element's value HTML attribute |
| 2460 defaults to the element contents -- see SelectControl.__doc__ for more on |
| 2461 these defaulting rules). |
| 2462 |
| 2463 To select, deselect or otherwise manipulate individual list items, use the |
| 2464 HTMLForm.find_control() and ListControl.get() methods. To set the whole |
| 2465 value, do as for any other control: use indexing or the set_/get_value |
| 2466 methods. |
| 2467 |
| 2468 Example: |
| 2469 |
| 2470 # select *only* the item named "cheddar" |
| 2471 form["cheeses"] = ["cheddar"] |
| 2472 # select "cheddar", leave other items unaffected |
| 2473 form.find_control("cheeses").get("cheddar").selected = True |
| 2474 |
| 2475 Some controls (RADIO and SELECT without the multiple attribute) can only |
| 2476 have zero or one items selected at a time. Some controls (CHECKBOX and |
| 2477 SELECT with the multiple attribute) can have multiple items selected at a |
| 2478 time. To set the whole value of a ListControl, assign a sequence to a form |
| 2479 index: |
| 2480 |
| 2481 form["cheeses"] = ["cheddar", "leicester"] |
| 2482 |
| 2483 If the ListControl is not multiple-selection, the assigned list must be of |
| 2484 length one. |
| 2485 |
| 2486 To check if a control has an item, if an item is selected, or if an item is |
| 2487 successful (selected and not disabled), respectively: |
| 2488 |
| 2489 "cheddar" in [item.name for item in form.find_control("cheeses").items] |
| 2490 "cheddar" in [item.name for item in form.find_control("cheeses").items and |
| 2491 item.selected] |
| 2492 "cheddar" in form["cheeses"] # (or "cheddar" in form.get_value("cheeses")) |
| 2493 |
| 2494 Note that some list items may be disabled (see below). |
| 2495 |
| 2496 Note the following mistake: |
| 2497 |
| 2498 form[control_name] = control_value |
| 2499 assert form[control_name] == control_value # not necessarily true |
| 2500 |
| 2501 The reason for this is that form[control_name] always gives the list items |
| 2502 in the order they were listed in the HTML. |
| 2503 |
| 2504 List items (hence list values, too) can be referred to in terms of list |
| 2505 item labels rather than list item names using the appropriate label |
| 2506 arguments. Note that each item may have several labels. |
| 2507 |
| 2508 The question of default values of OPTION contents, labels and values is |
| 2509 somewhat complicated: see SelectControl.__doc__ and |
| 2510 ListControl.get_item_attrs.__doc__ if you think you need to know. |
| 2511 |
| 2512 Controls can be disabled or readonly. In either case, the control's value |
| 2513 cannot be changed until you clear those flags (see example below). |
| 2514 Disabled is the state typically represented by browsers by 'greying out' a |
| 2515 control. Disabled controls are not 'successful' -- they don't cause data |
| 2516 to get returned to the server. Readonly controls usually appear in |
| 2517 browsers as read-only text boxes. Readonly controls are successful. List |
| 2518 items can also be disabled. Attempts to select or deselect disabled items |
| 2519 fail with AttributeError. |
| 2520 |
| 2521 If a lot of controls are readonly, it can be useful to do this: |
| 2522 |
| 2523 form.set_all_readonly(False) |
| 2524 |
| 2525 To clear a control's value attribute, so that it is not successful (until a |
| 2526 value is subsequently set): |
| 2527 |
| 2528 form.clear("cheeses") |
| 2529 |
| 2530 More examples: |
| 2531 |
| 2532 control = form.find_control("cheeses") |
| 2533 control.disabled = False |
| 2534 control.readonly = False |
| 2535 control.get("gruyere").disabled = True |
| 2536 control.items[0].selected = True |
| 2537 |
| 2538 See the various Control classes for further documentation. Many methods |
| 2539 take name, type, kind, id, label and nr arguments to specify the control to |
| 2540 be operated on: see HTMLForm.find_control.__doc__. |
| 2541 |
| 2542 ControlNotFoundError (subclass of ValueError) is raised if the specified |
| 2543 control can't be found. This includes occasions where a non-ListControl |
| 2544 is found, but the method (set, for example) requires a ListControl. |
| 2545 ItemNotFoundError (subclass of ValueError) is raised if a list item can't |
| 2546 be found. ItemCountError (subclass of ValueError) is raised if an attempt |
| 2547 is made to select more than one item and the control doesn't allow that, or |
| 2548 set/get_single are called and the control contains more than one item. |
| 2549 AttributeError is raised if a control or item is readonly or disabled and |
| 2550 an attempt is made to alter its value. |
| 2551 |
| 2552 Security note: Remember that any passwords you store in HTMLForm instances |
| 2553 will be saved to disk in the clear if you pickle them (directly or |
| 2554 indirectly). The simplest solution to this is to avoid pickling HTMLForm |
| 2555 objects. You could also pickle before filling in any password, or just set |
| 2556 the password to "" before pickling. |
| 2557 |
| 2558 |
| 2559 Public attributes: |
| 2560 |
| 2561 action: full (absolute URI) form action |
| 2562 method: "GET" or "POST" |
| 2563 enctype: form transfer encoding MIME type |
| 2564 name: name of form (None if no name was specified) |
| 2565 attrs: dictionary mapping original HTML form attributes to their values |
| 2566 |
| 2567 controls: list of Control instances; do not alter this list |
| 2568 (instead, call form.new_control to make a Control and add it to the |
| 2569 form, or control.add_to_form if you already have a Control instance) |
| 2570 |
| 2571 |
| 2572 |
| 2573 Methods for form filling: |
| 2574 ------------------------- |
| 2575 |
| 2576 Most of the these methods have very similar arguments. See |
| 2577 HTMLForm.find_control.__doc__ for details of the name, type, kind, label |
| 2578 and nr arguments. |
| 2579 |
| 2580 def find_control(self, |
| 2581 name=None, type=None, kind=None, id=None, predicate=None, |
| 2582 nr=None, label=None) |
| 2583 |
| 2584 get_value(name=None, type=None, kind=None, id=None, nr=None, |
| 2585 by_label=False, # by_label is deprecated |
| 2586 label=None) |
| 2587 set_value(value, |
| 2588 name=None, type=None, kind=None, id=None, nr=None, |
| 2589 by_label=False, # by_label is deprecated |
| 2590 label=None) |
| 2591 |
| 2592 clear_all() |
| 2593 clear(name=None, type=None, kind=None, id=None, nr=None, label=None) |
| 2594 |
| 2595 set_all_readonly(readonly) |
| 2596 |
| 2597 |
| 2598 Method applying only to FileControls: |
| 2599 |
| 2600 add_file(file_object, |
| 2601 content_type="application/octet-stream", filename=None, |
| 2602 name=None, id=None, nr=None, label=None) |
| 2603 |
| 2604 |
| 2605 Methods applying only to clickable controls: |
| 2606 |
| 2607 click(name=None, type=None, id=None, nr=0, coord=(1,1), label=None) |
| 2608 click_request_data(name=None, type=None, id=None, nr=0, coord=(1,1), |
| 2609 label=None) |
| 2610 click_pairs(name=None, type=None, id=None, nr=0, coord=(1,1), label=None) |
| 2611 |
| 2612 """ |
| 2613 |
| 2614 type2class = { |
| 2615 "text": TextControl, |
| 2616 "password": PasswordControl, |
| 2617 "hidden": HiddenControl, |
| 2618 "textarea": TextareaControl, |
| 2619 |
| 2620 "isindex": IsindexControl, |
| 2621 |
| 2622 "file": FileControl, |
| 2623 |
| 2624 "button": IgnoreControl, |
| 2625 "buttonbutton": IgnoreControl, |
| 2626 "reset": IgnoreControl, |
| 2627 "resetbutton": IgnoreControl, |
| 2628 |
| 2629 "submit": SubmitControl, |
| 2630 "submitbutton": SubmitButtonControl, |
| 2631 "image": ImageControl, |
| 2632 |
| 2633 "radio": RadioControl, |
| 2634 "checkbox": CheckboxControl, |
| 2635 "select": SelectControl, |
| 2636 } |
| 2637 |
| 2638 #--------------------------------------------------- |
| 2639 # Initialisation. Use ParseResponse / ParseFile instead. |
| 2640 |
| 2641 def __init__(self, action, method="GET", |
| 2642 enctype="application/x-www-form-urlencoded", |
| 2643 name=None, attrs=None, |
| 2644 request_class=_request.Request, |
| 2645 forms=None, labels=None, id_to_labels=None, |
| 2646 backwards_compat=True): |
| 2647 """ |
| 2648 In the usual case, use ParseResponse (or ParseFile) to create new |
| 2649 HTMLForm objects. |
| 2650 |
| 2651 action: full (absolute URI) form action |
| 2652 method: "GET" or "POST" |
| 2653 enctype: form transfer encoding MIME type |
| 2654 name: name of form |
| 2655 attrs: dictionary mapping original HTML form attributes to their values |
| 2656 |
| 2657 """ |
| 2658 self.action = action |
| 2659 self.method = method |
| 2660 self.enctype = enctype |
| 2661 self.name = name |
| 2662 if attrs is not None: |
| 2663 self.attrs = attrs.copy() |
| 2664 else: |
| 2665 self.attrs = {} |
| 2666 self.controls = [] |
| 2667 self._request_class = request_class |
| 2668 |
| 2669 # these attributes are used by zope.testbrowser |
| 2670 self._forms = forms # this is a semi-public API! |
| 2671 self._labels = labels # this is a semi-public API! |
| 2672 self._id_to_labels = id_to_labels # this is a semi-public API! |
| 2673 |
| 2674 self.backwards_compat = backwards_compat # note __setattr__ |
| 2675 |
| 2676 self._urlunparse = urlparse.urlunparse |
| 2677 self._urlparse = urlparse.urlparse |
| 2678 |
| 2679 def __getattr__(self, name): |
| 2680 if name == "backwards_compat": |
| 2681 return self._backwards_compat |
| 2682 return getattr(HTMLForm, name) |
| 2683 |
| 2684 def __setattr__(self, name, value): |
| 2685 # yuck |
| 2686 if name == "backwards_compat": |
| 2687 name = "_backwards_compat" |
| 2688 value = bool(value) |
| 2689 for cc in self.controls: |
| 2690 try: |
| 2691 items = cc.items |
| 2692 except AttributeError: |
| 2693 continue |
| 2694 else: |
| 2695 for ii in items: |
| 2696 for ll in ii.get_labels(): |
| 2697 ll._backwards_compat = value |
| 2698 self.__dict__[name] = value |
| 2699 |
| 2700 def new_control(self, type, name, attrs, |
| 2701 ignore_unknown=False, select_default=False, index=None): |
| 2702 """Adds a new control to the form. |
| 2703 |
| 2704 This is usually called by ParseFile and ParseResponse. Don't call it |
| 2705 youself unless you're building your own Control instances. |
| 2706 |
| 2707 Note that controls representing lists of items are built up from |
| 2708 controls holding only a single list item. See ListControl.__doc__ for |
| 2709 further information. |
| 2710 |
| 2711 type: type of control (see Control.__doc__ for a list) |
| 2712 attrs: HTML attributes of control |
| 2713 ignore_unknown: if true, use a dummy Control instance for controls of |
| 2714 unknown type; otherwise, use a TextControl |
| 2715 select_default: for RADIO and multiple-selection SELECT controls, pick |
| 2716 the first item as the default if no 'selected' HTML attribute is |
| 2717 present (this defaulting happens when the HTMLForm.fixup method is |
| 2718 called) |
| 2719 index: index of corresponding element in HTML (see |
| 2720 MoreFormTests.test_interspersed_controls for motivation) |
| 2721 |
| 2722 """ |
| 2723 type = type.lower() |
| 2724 klass = self.type2class.get(type) |
| 2725 if klass is None: |
| 2726 if ignore_unknown: |
| 2727 klass = IgnoreControl |
| 2728 else: |
| 2729 klass = TextControl |
| 2730 |
| 2731 a = attrs.copy() |
| 2732 if issubclass(klass, ListControl): |
| 2733 control = klass(type, name, a, select_default, index) |
| 2734 else: |
| 2735 control = klass(type, name, a, index) |
| 2736 |
| 2737 if type == "select" and len(attrs) == 1: |
| 2738 for ii in range(len(self.controls)-1, -1, -1): |
| 2739 ctl = self.controls[ii] |
| 2740 if ctl.type == "select": |
| 2741 ctl.close_control() |
| 2742 break |
| 2743 |
| 2744 control.add_to_form(self) |
| 2745 control._urlparse = self._urlparse |
| 2746 control._urlunparse = self._urlunparse |
| 2747 |
| 2748 def fixup(self): |
| 2749 """Normalise form after all controls have been added. |
| 2750 |
| 2751 This is usually called by ParseFile and ParseResponse. Don't call it |
| 2752 youself unless you're building your own Control instances. |
| 2753 |
| 2754 This method should only be called once, after all controls have been |
| 2755 added to the form. |
| 2756 |
| 2757 """ |
| 2758 for control in self.controls: |
| 2759 control.fixup() |
| 2760 self.backwards_compat = self._backwards_compat |
| 2761 |
| 2762 #--------------------------------------------------- |
| 2763 def __str__(self): |
| 2764 header = "%s%s %s %s" % ( |
| 2765 (self.name and self.name+" " or ""), |
| 2766 self.method, self.action, self.enctype) |
| 2767 rep = [header] |
| 2768 for control in self.controls: |
| 2769 rep.append(" %s" % str(control)) |
| 2770 return "<%s>" % "\n".join(rep) |
| 2771 |
| 2772 #--------------------------------------------------- |
| 2773 # Form-filling methods. |
| 2774 |
| 2775 def __getitem__(self, name): |
| 2776 return self.find_control(name).value |
| 2777 def __contains__(self, name): |
| 2778 return bool(self.find_control(name)) |
| 2779 def __setitem__(self, name, value): |
| 2780 control = self.find_control(name) |
| 2781 try: |
| 2782 control.value = value |
| 2783 except AttributeError, e: |
| 2784 raise ValueError(str(e)) |
| 2785 |
| 2786 def get_value(self, |
| 2787 name=None, type=None, kind=None, id=None, nr=None, |
| 2788 by_label=False, # by_label is deprecated |
| 2789 label=None): |
| 2790 """Return value of control. |
| 2791 |
| 2792 If only name and value arguments are supplied, equivalent to |
| 2793 |
| 2794 form[name] |
| 2795 |
| 2796 """ |
| 2797 if by_label: |
| 2798 deprecation("form.get_value_by_label(...)") |
| 2799 c = self.find_control(name, type, kind, id, label=label, nr=nr) |
| 2800 if by_label: |
| 2801 try: |
| 2802 meth = c.get_value_by_label |
| 2803 except AttributeError: |
| 2804 raise NotImplementedError( |
| 2805 "control '%s' does not yet support by_label" % c.name) |
| 2806 else: |
| 2807 return meth() |
| 2808 else: |
| 2809 return c.value |
| 2810 def set_value(self, value, |
| 2811 name=None, type=None, kind=None, id=None, nr=None, |
| 2812 by_label=False, # by_label is deprecated |
| 2813 label=None): |
| 2814 """Set value of control. |
| 2815 |
| 2816 If only name and value arguments are supplied, equivalent to |
| 2817 |
| 2818 form[name] = value |
| 2819 |
| 2820 """ |
| 2821 if by_label: |
| 2822 deprecation("form.get_value_by_label(...)") |
| 2823 c = self.find_control(name, type, kind, id, label=label, nr=nr) |
| 2824 if by_label: |
| 2825 try: |
| 2826 meth = c.set_value_by_label |
| 2827 except AttributeError: |
| 2828 raise NotImplementedError( |
| 2829 "control '%s' does not yet support by_label" % c.name) |
| 2830 else: |
| 2831 meth(value) |
| 2832 else: |
| 2833 c.value = value |
| 2834 def get_value_by_label( |
| 2835 self, name=None, type=None, kind=None, id=None, label=None, nr=None): |
| 2836 """ |
| 2837 |
| 2838 All arguments should be passed by name. |
| 2839 |
| 2840 """ |
| 2841 c = self.find_control(name, type, kind, id, label=label, nr=nr) |
| 2842 return c.get_value_by_label() |
| 2843 |
| 2844 def set_value_by_label( |
| 2845 self, value, |
| 2846 name=None, type=None, kind=None, id=None, label=None, nr=None): |
| 2847 """ |
| 2848 |
| 2849 All arguments should be passed by name. |
| 2850 |
| 2851 """ |
| 2852 c = self.find_control(name, type, kind, id, label=label, nr=nr) |
| 2853 c.set_value_by_label(value) |
| 2854 |
| 2855 def set_all_readonly(self, readonly): |
| 2856 for control in self.controls: |
| 2857 control.readonly = bool(readonly) |
| 2858 |
| 2859 def clear_all(self): |
| 2860 """Clear the value attributes of all controls in the form. |
| 2861 |
| 2862 See HTMLForm.clear.__doc__. |
| 2863 |
| 2864 """ |
| 2865 for control in self.controls: |
| 2866 control.clear() |
| 2867 |
| 2868 def clear(self, |
| 2869 name=None, type=None, kind=None, id=None, nr=None, label=None): |
| 2870 """Clear the value attribute of a control. |
| 2871 |
| 2872 As a result, the affected control will not be successful until a value |
| 2873 is subsequently set. AttributeError is raised on readonly controls. |
| 2874 |
| 2875 """ |
| 2876 c = self.find_control(name, type, kind, id, label=label, nr=nr) |
| 2877 c.clear() |
| 2878 |
| 2879 |
| 2880 #--------------------------------------------------- |
| 2881 # Form-filling methods applying only to ListControls. |
| 2882 |
| 2883 def possible_items(self, # deprecated |
| 2884 name=None, type=None, kind=None, id=None, |
| 2885 nr=None, by_label=False, label=None): |
| 2886 """Return a list of all values that the specified control can take.""" |
| 2887 c = self._find_list_control(name, type, kind, id, label, nr) |
| 2888 return c.possible_items(by_label) |
| 2889 |
| 2890 def set(self, selected, item_name, # deprecated |
| 2891 name=None, type=None, kind=None, id=None, nr=None, |
| 2892 by_label=False, label=None): |
| 2893 """Select / deselect named list item. |
| 2894 |
| 2895 selected: boolean selected state |
| 2896 |
| 2897 """ |
| 2898 self._find_list_control(name, type, kind, id, label, nr).set( |
| 2899 selected, item_name, by_label) |
| 2900 def toggle(self, item_name, # deprecated |
| 2901 name=None, type=None, kind=None, id=None, nr=None, |
| 2902 by_label=False, label=None): |
| 2903 """Toggle selected state of named list item.""" |
| 2904 self._find_list_control(name, type, kind, id, label, nr).toggle( |
| 2905 item_name, by_label) |
| 2906 |
| 2907 def set_single(self, selected, # deprecated |
| 2908 name=None, type=None, kind=None, id=None, |
| 2909 nr=None, by_label=None, label=None): |
| 2910 """Select / deselect list item in a control having only one item. |
| 2911 |
| 2912 If the control has multiple list items, ItemCountError is raised. |
| 2913 |
| 2914 This is just a convenience method, so you don't need to know the item's |
| 2915 name -- the item name in these single-item controls is usually |
| 2916 something meaningless like "1" or "on". |
| 2917 |
| 2918 For example, if a checkbox has a single item named "on", the following |
| 2919 two calls are equivalent: |
| 2920 |
| 2921 control.toggle("on") |
| 2922 control.toggle_single() |
| 2923 |
| 2924 """ # by_label ignored and deprecated |
| 2925 self._find_list_control( |
| 2926 name, type, kind, id, label, nr).set_single(selected) |
| 2927 def toggle_single(self, name=None, type=None, kind=None, id=None, |
| 2928 nr=None, by_label=None, label=None): # deprecated |
| 2929 """Toggle selected state of list item in control having only one item. |
| 2930 |
| 2931 The rest is as for HTMLForm.set_single.__doc__. |
| 2932 |
| 2933 """ # by_label ignored and deprecated |
| 2934 self._find_list_control(name, type, kind, id, label, nr).toggle_single() |
| 2935 |
| 2936 #--------------------------------------------------- |
| 2937 # Form-filling method applying only to FileControls. |
| 2938 |
| 2939 def add_file(self, file_object, content_type=None, filename=None, |
| 2940 name=None, id=None, nr=None, label=None): |
| 2941 """Add a file to be uploaded. |
| 2942 |
| 2943 file_object: file-like object (with read method) from which to read |
| 2944 data to upload |
| 2945 content_type: MIME content type of data to upload |
| 2946 filename: filename to pass to server |
| 2947 |
| 2948 If filename is None, no filename is sent to the server. |
| 2949 |
| 2950 If content_type is None, the content type is guessed based on the |
| 2951 filename and the data from read from the file object. |
| 2952 |
| 2953 XXX |
| 2954 At the moment, guessed content type is always application/octet-stream. |
| 2955 Use sndhdr, imghdr modules. Should also try to guess HTML, XML, and |
| 2956 plain text. |
| 2957 |
| 2958 Note the following useful HTML attributes of file upload controls (see |
| 2959 HTML 4.01 spec, section 17): |
| 2960 |
| 2961 accept: comma-separated list of content types that the server will |
| 2962 handle correctly; you can use this to filter out non-conforming files |
| 2963 size: XXX IIRC, this is indicative of whether form wants multiple or |
| 2964 single files |
| 2965 maxlength: XXX hint of max content length in bytes? |
| 2966 |
| 2967 """ |
| 2968 self.find_control(name, "file", id=id, label=label, nr=nr).add_file( |
| 2969 file_object, content_type, filename) |
| 2970 |
| 2971 #--------------------------------------------------- |
| 2972 # Form submission methods, applying only to clickable controls. |
| 2973 |
| 2974 def click(self, name=None, type=None, id=None, nr=0, coord=(1,1), |
| 2975 request_class=_request.Request, |
| 2976 label=None): |
| 2977 """Return request that would result from clicking on a control. |
| 2978 |
| 2979 The request object is a mechanize.Request instance, which you can pass |
| 2980 to mechanize.urlopen. |
| 2981 |
| 2982 Only some control types (INPUT/SUBMIT & BUTTON/SUBMIT buttons and |
| 2983 IMAGEs) can be clicked. |
| 2984 |
| 2985 Will click on the first clickable control, subject to the name, type |
| 2986 and nr arguments (as for find_control). If no name, type, id or number |
| 2987 is specified and there are no clickable controls, a request will be |
| 2988 returned for the form in its current, un-clicked, state. |
| 2989 |
| 2990 IndexError is raised if any of name, type, id or nr is specified but no |
| 2991 matching control is found. ValueError is raised if the HTMLForm has an |
| 2992 enctype attribute that is not recognised. |
| 2993 |
| 2994 You can optionally specify a coordinate to click at, which only makes a |
| 2995 difference if you clicked on an image. |
| 2996 |
| 2997 """ |
| 2998 return self._click(name, type, id, label, nr, coord, "request", |
| 2999 self._request_class) |
| 3000 |
| 3001 def click_request_data(self, |
| 3002 name=None, type=None, id=None, |
| 3003 nr=0, coord=(1,1), |
| 3004 request_class=_request.Request, |
| 3005 label=None): |
| 3006 """As for click method, but return a tuple (url, data, headers). |
| 3007 |
| 3008 You can use this data to send a request to the server. This is useful |
| 3009 if you're using httplib or urllib rather than mechanize. Otherwise, |
| 3010 use the click method. |
| 3011 |
| 3012 # Untested. Have to subclass to add headers, I think -- so use |
| 3013 # mechanize instead! |
| 3014 import urllib |
| 3015 url, data, hdrs = form.click_request_data() |
| 3016 r = urllib.urlopen(url, data) |
| 3017 |
| 3018 # Untested. I don't know of any reason to use httplib -- you can get |
| 3019 # just as much control with mechanize. |
| 3020 import httplib, urlparse |
| 3021 url, data, hdrs = form.click_request_data() |
| 3022 tup = urlparse(url) |
| 3023 host, path = tup[1], urlparse.urlunparse((None, None)+tup[2:]) |
| 3024 conn = httplib.HTTPConnection(host) |
| 3025 if data: |
| 3026 httplib.request("POST", path, data, hdrs) |
| 3027 else: |
| 3028 httplib.request("GET", path, headers=hdrs) |
| 3029 r = conn.getresponse() |
| 3030 |
| 3031 """ |
| 3032 return self._click(name, type, id, label, nr, coord, "request_data", |
| 3033 self._request_class) |
| 3034 |
| 3035 def click_pairs(self, name=None, type=None, id=None, |
| 3036 nr=0, coord=(1,1), |
| 3037 label=None): |
| 3038 """As for click_request_data, but returns a list of (key, value) pairs. |
| 3039 |
| 3040 You can use this list as an argument to urllib.urlencode. This is |
| 3041 usually only useful if you're using httplib or urllib rather than |
| 3042 mechanize. It may also be useful if you want to manually tweak the |
| 3043 keys and/or values, but this should not be necessary. Otherwise, use |
| 3044 the click method. |
| 3045 |
| 3046 Note that this method is only useful for forms of MIME type |
| 3047 x-www-form-urlencoded. In particular, it does not return the |
| 3048 information required for file upload. If you need file upload and are |
| 3049 not using mechanize, use click_request_data. |
| 3050 """ |
| 3051 return self._click(name, type, id, label, nr, coord, "pairs", |
| 3052 self._request_class) |
| 3053 |
| 3054 #--------------------------------------------------- |
| 3055 |
| 3056 def find_control(self, |
| 3057 name=None, type=None, kind=None, id=None, |
| 3058 predicate=None, nr=None, |
| 3059 label=None): |
| 3060 """Locate and return some specific control within the form. |
| 3061 |
| 3062 At least one of the name, type, kind, predicate and nr arguments must |
| 3063 be supplied. If no matching control is found, ControlNotFoundError is |
| 3064 raised. |
| 3065 |
| 3066 If name is specified, then the control must have the indicated name. |
| 3067 |
| 3068 If type is specified then the control must have the specified type (in |
| 3069 addition to the types possible for <input> HTML tags: "text", |
| 3070 "password", "hidden", "submit", "image", "button", "radio", "checkbox", |
| 3071 "file" we also have "reset", "buttonbutton", "submitbutton", |
| 3072 "resetbutton", "textarea", "select" and "isindex"). |
| 3073 |
| 3074 If kind is specified, then the control must fall into the specified |
| 3075 group, each of which satisfies a particular interface. The types are |
| 3076 "text", "list", "multilist", "singlelist", "clickable" and "file". |
| 3077 |
| 3078 If id is specified, then the control must have the indicated id. |
| 3079 |
| 3080 If predicate is specified, then the control must match that function. |
| 3081 The predicate function is passed the control as its single argument, |
| 3082 and should return a boolean value indicating whether the control |
| 3083 matched. |
| 3084 |
| 3085 nr, if supplied, is the sequence number of the control (where 0 is the |
| 3086 first). Note that control 0 is the first control matching all the |
| 3087 other arguments (if supplied); it is not necessarily the first control |
| 3088 in the form. If no nr is supplied, AmbiguityError is raised if |
| 3089 multiple controls match the other arguments (unless the |
| 3090 .backwards-compat attribute is true). |
| 3091 |
| 3092 If label is specified, then the control must have this label. Note |
| 3093 that radio controls and checkboxes never have labels: their items do. |
| 3094 |
| 3095 """ |
| 3096 if ((name is None) and (type is None) and (kind is None) and |
| 3097 (id is None) and (label is None) and (predicate is None) and |
| 3098 (nr is None)): |
| 3099 raise ValueError( |
| 3100 "at least one argument must be supplied to specify control") |
| 3101 return self._find_control(name, type, kind, id, label, predicate, nr) |
| 3102 |
| 3103 #--------------------------------------------------- |
| 3104 # Private methods. |
| 3105 |
| 3106 def _find_list_control(self, |
| 3107 name=None, type=None, kind=None, id=None, |
| 3108 label=None, nr=None): |
| 3109 if ((name is None) and (type is None) and (kind is None) and |
| 3110 (id is None) and (label is None) and (nr is None)): |
| 3111 raise ValueError( |
| 3112 "at least one argument must be supplied to specify control") |
| 3113 |
| 3114 return self._find_control(name, type, kind, id, label, |
| 3115 is_listcontrol, nr) |
| 3116 |
| 3117 def _find_control(self, name, type, kind, id, label, predicate, nr): |
| 3118 if ((name is not None) and (name is not Missing) and |
| 3119 not isstringlike(name)): |
| 3120 raise TypeError("control name must be string-like") |
| 3121 if (type is not None) and not isstringlike(type): |
| 3122 raise TypeError("control type must be string-like") |
| 3123 if (kind is not None) and not isstringlike(kind): |
| 3124 raise TypeError("control kind must be string-like") |
| 3125 if (id is not None) and not isstringlike(id): |
| 3126 raise TypeError("control id must be string-like") |
| 3127 if (label is not None) and not isstringlike(label): |
| 3128 raise TypeError("control label must be string-like") |
| 3129 if (predicate is not None) and not callable(predicate): |
| 3130 raise TypeError("control predicate must be callable") |
| 3131 if (nr is not None) and nr < 0: |
| 3132 raise ValueError("control number must be a positive integer") |
| 3133 |
| 3134 orig_nr = nr |
| 3135 found = None |
| 3136 ambiguous = False |
| 3137 if nr is None and self.backwards_compat: |
| 3138 nr = 0 |
| 3139 |
| 3140 for control in self.controls: |
| 3141 if ((name is not None and name != control.name) and |
| 3142 (name is not Missing or control.name is not None)): |
| 3143 continue |
| 3144 if type is not None and type != control.type: |
| 3145 continue |
| 3146 if kind is not None and not control.is_of_kind(kind): |
| 3147 continue |
| 3148 if id is not None and id != control.id: |
| 3149 continue |
| 3150 if predicate and not predicate(control): |
| 3151 continue |
| 3152 if label: |
| 3153 for l in control.get_labels(): |
| 3154 if l.text.find(label) > -1: |
| 3155 break |
| 3156 else: |
| 3157 continue |
| 3158 if nr is not None: |
| 3159 if nr == 0: |
| 3160 return control # early exit: unambiguous due to nr |
| 3161 nr -= 1 |
| 3162 continue |
| 3163 if found: |
| 3164 ambiguous = True |
| 3165 break |
| 3166 found = control |
| 3167 |
| 3168 if found and not ambiguous: |
| 3169 return found |
| 3170 |
| 3171 description = [] |
| 3172 if name is not None: description.append("name %s" % repr(name)) |
| 3173 if type is not None: description.append("type '%s'" % type) |
| 3174 if kind is not None: description.append("kind '%s'" % kind) |
| 3175 if id is not None: description.append("id '%s'" % id) |
| 3176 if label is not None: description.append("label '%s'" % label) |
| 3177 if predicate is not None: |
| 3178 description.append("predicate %s" % predicate) |
| 3179 if orig_nr: description.append("nr %d" % orig_nr) |
| 3180 description = ", ".join(description) |
| 3181 |
| 3182 if ambiguous: |
| 3183 raise AmbiguityError("more than one control matching "+description) |
| 3184 elif not found: |
| 3185 raise ControlNotFoundError("no control matching "+description) |
| 3186 assert False |
| 3187 |
| 3188 def _click(self, name, type, id, label, nr, coord, return_type, |
| 3189 request_class=_request.Request): |
| 3190 try: |
| 3191 control = self._find_control( |
| 3192 name, type, "clickable", id, label, None, nr) |
| 3193 except ControlNotFoundError: |
| 3194 if ((name is not None) or (type is not None) or (id is not None) or |
| 3195 (label is not None) or (nr != 0)): |
| 3196 raise |
| 3197 # no clickable controls, but no control was explicitly requested, |
| 3198 # so return state without clicking any control |
| 3199 return self._switch_click(return_type, request_class) |
| 3200 else: |
| 3201 return control._click(self, coord, return_type, request_class) |
| 3202 |
| 3203 def _pairs(self): |
| 3204 """Return sequence of (key, value) pairs suitable for urlencoding.""" |
| 3205 return [(k, v) for (i, k, v, c_i) in self._pairs_and_controls()] |
| 3206 |
| 3207 |
| 3208 def _pairs_and_controls(self): |
| 3209 """Return sequence of (index, key, value, control_index) |
| 3210 of totally ordered pairs suitable for urlencoding. |
| 3211 |
| 3212 control_index is the index of the control in self.controls |
| 3213 """ |
| 3214 pairs = [] |
| 3215 for control_index in range(len(self.controls)): |
| 3216 control = self.controls[control_index] |
| 3217 for ii, key, val in control._totally_ordered_pairs(): |
| 3218 pairs.append((ii, key, val, control_index)) |
| 3219 |
| 3220 # stable sort by ONLY first item in tuple |
| 3221 pairs.sort() |
| 3222 |
| 3223 return pairs |
| 3224 |
| 3225 def _request_data(self): |
| 3226 """Return a tuple (url, data, headers).""" |
| 3227 method = self.method.upper() |
| 3228 #scheme, netloc, path, parameters, query, frag = urlparse.urlparse(self.
action) |
| 3229 parts = self._urlparse(self.action) |
| 3230 rest, (query, frag) = parts[:-2], parts[-2:] |
| 3231 |
| 3232 if method == "GET": |
| 3233 if self.enctype != "application/x-www-form-urlencoded": |
| 3234 raise ValueError( |
| 3235 "unknown GET form encoding type '%s'" % self.enctype) |
| 3236 parts = rest + (urllib.urlencode(self._pairs()), None) |
| 3237 uri = self._urlunparse(parts) |
| 3238 return uri, None, [] |
| 3239 elif method == "POST": |
| 3240 parts = rest + (query, None) |
| 3241 uri = self._urlunparse(parts) |
| 3242 if self.enctype == "application/x-www-form-urlencoded": |
| 3243 return (uri, urllib.urlencode(self._pairs()), |
| 3244 [("Content-Type", self.enctype)]) |
| 3245 elif self.enctype == "multipart/form-data": |
| 3246 data = StringIO() |
| 3247 http_hdrs = [] |
| 3248 mw = MimeWriter(data, http_hdrs) |
| 3249 mw.startmultipartbody("form-data", add_to_http_hdrs=True, |
| 3250 prefix=0) |
| 3251 for ii, k, v, control_index in self._pairs_and_controls(): |
| 3252 self.controls[control_index]._write_mime_data(mw, k, v) |
| 3253 mw.lastpart() |
| 3254 return uri, data.getvalue(), http_hdrs |
| 3255 else: |
| 3256 raise ValueError( |
| 3257 "unknown POST form encoding type '%s'" % self.enctype) |
| 3258 else: |
| 3259 raise ValueError("Unknown method '%s'" % method) |
| 3260 |
| 3261 def _switch_click(self, return_type, request_class=_request.Request): |
| 3262 # This is called by HTMLForm and clickable Controls to hide switching |
| 3263 # on return_type. |
| 3264 if return_type == "pairs": |
| 3265 return self._pairs() |
| 3266 elif return_type == "request_data": |
| 3267 return self._request_data() |
| 3268 else: |
| 3269 req_data = self._request_data() |
| 3270 req = request_class(req_data[0], req_data[1]) |
| 3271 for key, val in req_data[2]: |
| 3272 add_hdr = req.add_header |
| 3273 if key.lower() == "content-type": |
| 3274 try: |
| 3275 add_hdr = req.add_unredirected_header |
| 3276 except AttributeError: |
| 3277 # pre-2.4 and not using ClientCookie |
| 3278 pass |
| 3279 add_hdr(key, val) |
| 3280 return req |
| OLD | NEW |