Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(485)

Side by Side Diff: components/autofill/content/renderer/form_autofill_util.cc

Issue 1012093004: Autofill: Improve the order of heuristics to apply when inferring labels (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: no redundant inferring labels from label tags Created 5 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « chrome/test/data/autofill/heuristics/output/bug_465587.out ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2013 The Chromium Authors. All rights reserved. 1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/autofill/content/renderer/form_autofill_util.h" 5 #include "components/autofill/content/renderer/form_autofill_util.h"
6 6
7 #include <map> 7 #include <map>
8 #include <set> 8 #include <set>
9 9
10 #include "base/command_line.h" 10 #include "base/command_line.h"
(...skipping 111 matching lines...) Expand 10 before | Expand all | Expand 10 after
122 122
123 WebElement cur_element = parent_node.to<WebElement>(); 123 WebElement cur_element = parent_node.to<WebElement>();
124 if (cur_element.hasHTMLTagName("form") || 124 if (cur_element.hasHTMLTagName("form") ||
125 cur_element.hasHTMLTagName("fieldset")) { 125 cur_element.hasHTMLTagName("fieldset")) {
126 return true; 126 return true;
127 } 127 }
128 } 128 }
129 return false; 129 return false;
130 } 130 }
131 131
132 // Returns true if |node| is an element and it is a container type that
133 // InferLabelForElement() can traverse.
134 bool IsTraversableContainerElement(const WebNode& node) {
135 if (!node.isElementNode())
136 return false;
137
138 std::string tag_name = node.toConst<WebElement>().tagName().utf8();
139 return (tag_name == "DD" ||
Evan Stade 2015/03/18 01:51:29 nit: parens are unnecessary
Lei Zhang 2015/03/18 02:02:18 but the parens tells my editor to line up the entr
140 tag_name == "DIV" ||
141 tag_name == "FIELDSET" ||
142 tag_name == "LI" ||
143 tag_name == "TD" ||
144 tag_name == "TABLE");
145 }
146
132 // Check whether the given field satisfies the REQUIRE_AUTOCOMPLETE requirement. 147 // Check whether the given field satisfies the REQUIRE_AUTOCOMPLETE requirement.
133 bool SatisfiesRequireAutocomplete(const WebInputElement& input_element) { 148 bool SatisfiesRequireAutocomplete(const WebInputElement& input_element) {
134 return input_element.autoComplete(); 149 return input_element.autoComplete();
135 } 150 }
136 151
137 // Returns the colspan for a <td> / <th>. Defaults to 1. 152 // Returns the colspan for a <td> / <th>. Defaults to 1.
138 size_t CalculateTableCellColumnSpan(const WebElement& element) { 153 size_t CalculateTableCellColumnSpan(const WebElement& element) {
139 DCHECK(element.hasHTMLTagName("td") || element.hasHTMLTagName("th")); 154 DCHECK(element.hasHTMLTagName("td") || element.hasHTMLTagName("th"));
140 155
141 size_t span = 1; 156 size_t span = 1;
(...skipping 397 matching lines...) Expand 10 before | Expand all | Expand 10 after
539 // Because this is already traversing the <div> structure, if it finds a <label> 554 // Because this is already traversing the <div> structure, if it finds a <label>
540 // sibling along the way, infer from that <label>. 555 // sibling along the way, infer from that <label>.
541 base::string16 InferLabelFromDivTable(const WebFormControlElement& element) { 556 base::string16 InferLabelFromDivTable(const WebFormControlElement& element) {
542 WebNode node = element.parentNode(); 557 WebNode node = element.parentNode();
543 bool looking_for_parent = true; 558 bool looking_for_parent = true;
544 std::set<WebNode> divs_to_skip; 559 std::set<WebNode> divs_to_skip;
545 560
546 // Search the sibling and parent <div>s until we find a candidate label. 561 // Search the sibling and parent <div>s until we find a candidate label.
547 base::string16 inferred_label; 562 base::string16 inferred_label;
548 CR_DEFINE_STATIC_LOCAL(WebString, kDiv, ("div")); 563 CR_DEFINE_STATIC_LOCAL(WebString, kDiv, ("div"));
549 CR_DEFINE_STATIC_LOCAL(WebString, kTable, ("table"));
550 CR_DEFINE_STATIC_LOCAL(WebString, kFieldSet, ("fieldset"));
551 CR_DEFINE_STATIC_LOCAL(WebString, kLabel, ("label")); 564 CR_DEFINE_STATIC_LOCAL(WebString, kLabel, ("label"));
552 while (inferred_label.empty() && !node.isNull()) { 565 while (inferred_label.empty() && !node.isNull()) {
553 if (HasTagName(node, kDiv)) { 566 if (HasTagName(node, kDiv)) {
554 if (looking_for_parent) 567 if (looking_for_parent)
555 inferred_label = FindChildTextWithIgnoreList(node, divs_to_skip); 568 inferred_label = FindChildTextWithIgnoreList(node, divs_to_skip);
556 else 569 else
557 inferred_label = FindChildText(node); 570 inferred_label = FindChildText(node);
558 571
559 // Avoid sibling DIVs that contain autofillable fields. 572 // Avoid sibling DIVs that contain autofillable fields.
560 if (!looking_for_parent && !inferred_label.empty()) { 573 if (!looking_for_parent && !inferred_label.empty()) {
561 CR_DEFINE_STATIC_LOCAL(WebString, kSelector, 574 CR_DEFINE_STATIC_LOCAL(WebString, kSelector,
562 ("input, select, textarea")); 575 ("input, select, textarea"));
563 blink::WebExceptionCode ec = 0; 576 blink::WebExceptionCode ec = 0;
564 WebElement result_element = node.querySelector(kSelector, ec); 577 WebElement result_element = node.querySelector(kSelector, ec);
565 if (!result_element.isNull()) { 578 if (!result_element.isNull()) {
566 inferred_label.clear(); 579 inferred_label.clear();
567 divs_to_skip.insert(node); 580 divs_to_skip.insert(node);
568 } 581 }
569 } 582 }
570 583
571 looking_for_parent = false; 584 looking_for_parent = false;
572 } else if (!looking_for_parent && HasTagName(node, kLabel)) { 585 } else if (!looking_for_parent && HasTagName(node, kLabel)) {
573 WebLabelElement label_element = node.to<WebLabelElement>(); 586 WebLabelElement label_element = node.to<WebLabelElement>();
574 if (label_element.correspondingControl().isNull()) 587 if (label_element.correspondingControl().isNull())
575 inferred_label = FindChildText(node); 588 inferred_label = FindChildText(node);
576 } else if (looking_for_parent && 589 } else if (looking_for_parent && IsTraversableContainerElement(node)) {
577 (HasTagName(node, kTable) || HasTagName(node, kFieldSet))) { 590 // If the element is in a non-div container, its label most likely is too.
578 // If the element is in a table or fieldset, its label most likely is too.
579 break; 591 break;
580 } 592 }
581 593
582 if (node.previousSibling().isNull()) { 594 if (node.previousSibling().isNull()) {
583 // If there are no more siblings, continue walking up the tree. 595 // If there are no more siblings, continue walking up the tree.
584 looking_for_parent = true; 596 looking_for_parent = true;
585 } 597 }
586 598
587 node = looking_for_parent ? node.parentNode() : node.previousSibling(); 599 node = looking_for_parent ? node.parentNode() : node.previousSibling();
588 } 600 }
(...skipping 21 matching lines...) Expand all
610 while (!previous.isNull() && previous.isTextNode()) 622 while (!previous.isNull() && previous.isTextNode())
611 previous = previous.previousSibling(); 623 previous = previous.previousSibling();
612 624
613 CR_DEFINE_STATIC_LOCAL(WebString, kDefinitionTag, ("dt")); 625 CR_DEFINE_STATIC_LOCAL(WebString, kDefinitionTag, ("dt"));
614 if (previous.isNull() || !HasTagName(previous, kDefinitionTag)) 626 if (previous.isNull() || !HasTagName(previous, kDefinitionTag))
615 return base::string16(); 627 return base::string16();
616 628
617 return FindChildText(previous); 629 return FindChildText(previous);
618 } 630 }
619 631
620 // Returns true if the closest ancestor is a <div> and not a <td>. 632 // Returns the element type for all ancestor nodes in CAPS, starting with the
621 // Returns false if the closest ancestor is a <td> tag, 633 // parent node.
622 // or if there is no <div> or <td> ancestor. 634 std::vector<std::string> AncestorTagNames(
623 bool ClosestAncestorIsDivAndNotTD(const WebFormControlElement& element) { 635 const WebFormControlElement& element) {
636 std::vector<std::string> tag_names;
624 for (WebNode parent_node = element.parentNode(); 637 for (WebNode parent_node = element.parentNode();
625 !parent_node.isNull(); 638 !parent_node.isNull();
626 parent_node = parent_node.parentNode()) { 639 parent_node = parent_node.parentNode()) {
627 if (!parent_node.isElementNode()) 640 if (!parent_node.isElementNode())
628 continue; 641 continue;
629 642
630 WebElement cur_element = parent_node.to<WebElement>(); 643 tag_names.push_back(parent_node.to<WebElement>().tagName().utf8());
631 if (cur_element.hasHTMLTagName("div"))
632 return true;
633 if (cur_element.hasHTMLTagName("td"))
634 return false;
635 } 644 }
636 return false; 645 return tag_names;
637 } 646 }
638 647
639 // Infers corresponding label for |element| from surrounding context in the DOM, 648 // Infers corresponding label for |element| from surrounding context in the DOM,
640 // e.g. the contents of the preceding <p> tag or text element. 649 // e.g. the contents of the preceding <p> tag or text element.
641 base::string16 InferLabelForElement(const WebFormControlElement& element) { 650 base::string16 InferLabelForElement(const WebFormControlElement& element) {
642 base::string16 inferred_label; 651 base::string16 inferred_label;
643 if (IsCheckableElement(toWebInputElement(&element))) { 652 if (IsCheckableElement(toWebInputElement(&element))) {
644 inferred_label = InferLabelFromNext(element); 653 inferred_label = InferLabelFromNext(element);
645 if (!inferred_label.empty()) 654 if (!inferred_label.empty())
646 return inferred_label; 655 return inferred_label;
647 } 656 }
648 657
649 inferred_label = InferLabelFromPrevious(element); 658 inferred_label = InferLabelFromPrevious(element);
650 if (!inferred_label.empty()) 659 if (!inferred_label.empty())
651 return inferred_label; 660 return inferred_label;
652 661
653 // If we didn't find a label, check for placeholder text. 662 // If we didn't find a label, check for placeholder text.
654 inferred_label = InferLabelFromPlaceholder(element); 663 inferred_label = InferLabelFromPlaceholder(element);
655 if (!inferred_label.empty()) 664 if (!inferred_label.empty())
656 return inferred_label; 665 return inferred_label;
657 666
658 // If we didn't find a label, check for list item case. 667 // For all other searches that involve traversing up the tree, the search
659 inferred_label = InferLabelFromListItem(element); 668 // order is based on which tag is the closest ancestor to |element|.
660 if (!inferred_label.empty()) 669 std::vector<std::string> tag_names = AncestorTagNames(element);
661 return inferred_label; 670 std::set<std::string> seen_tag_names;
671 for (const std::string& tag_name : tag_names) {
672 if (ContainsKey(seen_tag_names, tag_name))
673 continue;
662 674
663 // If we didn't find a label, check for definition list case. 675 seen_tag_names.insert(tag_name);
664 inferred_label = InferLabelFromDefinitionList(element); 676 if (tag_name == "DIV") {
665 if (!inferred_label.empty()) 677 inferred_label = InferLabelFromDivTable(element);
666 return inferred_label; 678 } else if (tag_name == "TD") {
679 inferred_label = InferLabelFromTableColumn(element);
680 if (inferred_label.empty())
681 inferred_label = InferLabelFromTableRow(element);
682 } else if (tag_name == "DD") {
683 inferred_label = InferLabelFromDefinitionList(element);
684 } else if (tag_name == "LI") {
685 inferred_label = InferLabelFromListItem(element);
686 } else if (tag_name == "FIELDSET") {
687 break;
688 }
667 689
668 bool check_div_first = ClosestAncestorIsDivAndNotTD(element);
669 if (check_div_first) {
670 // If we didn't find a label, check for div table case first since it's the
671 // closest ancestor.
672 inferred_label = InferLabelFromDivTable(element);
673 if (!inferred_label.empty()) 690 if (!inferred_label.empty())
674 return inferred_label; 691 break;
675 } 692 }
676 693
677 // If we didn't find a label, check for table cell case.
678 inferred_label = InferLabelFromTableColumn(element);
679 if (!inferred_label.empty())
680 return inferred_label;
681
682 // If we didn't find a label, check for table row case.
683 inferred_label = InferLabelFromTableRow(element);
684 if (!inferred_label.empty())
685 return inferred_label;
686
687 if (!check_div_first) {
688 // If we didn't find a label from the table, check for div table case if we
689 // haven't already.
690 inferred_label = InferLabelFromDivTable(element);
691 }
692 return inferred_label; 694 return inferred_label;
693 } 695 }
694 696
695 // Fills |option_strings| with the values of the <option> elements present in 697 // Fills |option_strings| with the values of the <option> elements present in
696 // |select_element|. 698 // |select_element|.
697 void GetOptionStringsFromElement(const WebSelectElement& select_element, 699 void GetOptionStringsFromElement(const WebSelectElement& select_element,
698 std::vector<base::string16>* option_values, 700 std::vector<base::string16>* option_values,
699 std::vector<base::string16>* option_contents) { 701 std::vector<base::string16>* option_contents) {
700 DCHECK(!select_element.isNull()); 702 DCHECK(!select_element.isNull());
701 703
(...skipping 836 matching lines...) Expand 10 before | Expand all | Expand 10 after
1538 1540
1539 gfx::RectF GetScaledBoundingBox(float scale, WebElement* element) { 1541 gfx::RectF GetScaledBoundingBox(float scale, WebElement* element) {
1540 gfx::Rect bounding_box(element->boundsInViewportSpace()); 1542 gfx::Rect bounding_box(element->boundsInViewportSpace());
1541 return gfx::RectF(bounding_box.x() * scale, 1543 return gfx::RectF(bounding_box.x() * scale,
1542 bounding_box.y() * scale, 1544 bounding_box.y() * scale,
1543 bounding_box.width() * scale, 1545 bounding_box.width() * scale,
1544 bounding_box.height() * scale); 1546 bounding_box.height() * scale);
1545 } 1547 }
1546 1548
1547 } // namespace autofill 1549 } // namespace autofill
OLDNEW
« no previous file with comments | « chrome/test/data/autofill/heuristics/output/bug_465587.out ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698