Index: chrome/renderer/autofill/form_manager.cc |
diff --git a/chrome/renderer/autofill/form_manager.cc b/chrome/renderer/autofill/form_manager.cc |
index 53884db4f166cc3f861aebe2c6ea2c5063fb7235..3cb6ceac912c42f8971b1c1c88fc291dc3b78803 100644 |
--- a/chrome/renderer/autofill/form_manager.cc |
+++ b/chrome/renderer/autofill/form_manager.cc |
@@ -78,14 +78,6 @@ bool IsSelectElement(const WebFormControlElement& element) { |
return element.formControlType() == ASCIIToUTF16("select-one"); |
} |
-bool IsTextContainerElement(const WebElement& element) { |
- return |
- element.hasTagName("p") || |
- element.hasTagName("b") || |
- element.hasTagName("span") || |
- element.hasTagName("font"); |
-} |
- |
bool IsOptionElement(const WebElement& element) { |
return element.hasTagName("option"); |
} |
@@ -107,6 +99,31 @@ bool IsAutofillableElement(const WebFormControlElement& element) { |
return IsTextInput(input_element) || IsSelectElement(element); |
} |
+// Appends |suffix| to |prefix| so that any intermediary whitespace is collapsed |
+// to a single space. If |force_whitespace| is true, then the resulting string |
+// is guaranteed to have a space between |prefix| and |suffix|. Otherwise, the |
+// result includes a space only if |prefix| has trailing whitespace or |suffix| |
+// has leading whitespace. |
dhollowa
2011/08/01 22:16:54
Some examples in this comment would help the casua
Ilya Sherman
2011/08/01 23:59:36
Done.
|
+const string16 CombineAndCollapseWhitespace(const string16& prefix, |
+ const string16& suffix, |
+ bool force_whitespace) { |
+ string16 prefix_trimmed; |
+ TrimPositions prefix_trailing_whitespace = |
+ TrimWhitespace(prefix, TRIM_TRAILING, &prefix_trimmed); |
+ |
+ // Recursively compute the children's text. |
+ string16 suffix_trimmed; |
+ TrimPositions suffix_leading_whitespace = |
+ TrimWhitespace(suffix, TRIM_LEADING, &suffix_trimmed); |
+ |
+ if (prefix_trailing_whitespace || suffix_leading_whitespace || |
+ force_whitespace) { |
+ return prefix_trimmed + ASCIIToUTF16(" ") + suffix_trimmed; |
+ } else { |
+ return prefix_trimmed + suffix_trimmed; |
+ } |
+} |
+ |
// This is a helper function for the FindChildText() function (see below). |
// Search depth is limited with the |depth| parameter. |
string16 FindChildTextInner(const WebNode& node, int depth) { |
@@ -126,39 +143,27 @@ string16 FindChildTextInner(const WebNode& node, int depth) { |
const WebElement element = node.toConst<WebElement>(); |
if (IsOptionElement(element) || |
IsScriptElement(element) || |
- IsNoScriptElement(element)) { |
+ IsNoScriptElement(element) || |
+ (element.isFormControlElement() && |
+ IsAutofillableElement(element.toConst<WebFormControlElement>()))) { |
return string16(); |
} |
} |
// Extract the text exactly at this node. |
string16 node_text = node.nodeValue(); |
- TrimPositions node_trailing_whitespace = |
- TrimWhitespace(node_text, TRIM_TRAILING, &node_text); |
// Recursively compute the children's text. |
// Preserve inter-element whitespace separation. |
string16 child_text = FindChildTextInner(node.firstChild(), depth - 1); |
- TrimPositions child_leading_whitespace = |
- TrimWhitespace(child_text, TRIM_LEADING, &child_text); |
- if (node_trailing_whitespace || child_leading_whitespace || |
- (node.nodeType() == WebNode::TextNode && node_text.empty())) { |
- node_text += ASCIIToUTF16(" "); |
- } |
- node_text += child_text; |
- node_trailing_whitespace = |
- TrimWhitespace(node_text, TRIM_TRAILING, &node_text); |
+ bool add_space = node.nodeType() == WebNode::TextNode && node_text.empty(); |
+ node_text = CombineAndCollapseWhitespace(node_text, child_text, add_space); |
// Recursively compute the siblings' text. |
// Again, preserve inter-element whitespace separation. |
string16 sibling_text = FindChildTextInner(node.nextSibling(), depth - 1); |
- TrimPositions sibling_leading_whitespace = |
- TrimWhitespace(sibling_text, TRIM_LEADING, &sibling_text); |
- if (node_trailing_whitespace || sibling_leading_whitespace || |
- (node.nodeType() == WebNode::TextNode && node_text.empty())) { |
- node_text += ASCIIToUTF16(" "); |
- } |
- node_text += sibling_text; |
+ add_space = node.nodeType() == WebNode::TextNode && node_text.empty(); |
+ node_text = CombineAndCollapseWhitespace(node_text, sibling_text, add_space); |
return node_text; |
} |
@@ -169,85 +174,72 @@ string16 FindChildTextInner(const WebNode& node, int depth) { |
// used when the structure is not directly known. However, unlike with |
// |innerText()|, the search depth and breadth are limited to a fixed threshold. |
// Whitespace is trimmed from text accumulated at descendant nodes. |
-string16 FindChildText(const WebElement& element) { |
- WebNode child = element.firstChild(); |
+string16 FindChildText(const WebNode& node) { |
+ if (node.isTextNode()) |
+ return node.nodeValue(); |
+ |
+ WebNode child = node.firstChild(); |
const int kChildSearchDepth = 10; |
- string16 element_text = FindChildTextInner(child, kChildSearchDepth); |
- TrimWhitespace(element_text, TRIM_ALL, &element_text); |
- return element_text; |
+ string16 node_text = FindChildTextInner(child, kChildSearchDepth); |
+ TrimWhitespace(node_text, TRIM_ALL, &node_text); |
+ return node_text; |
} |
// Helper for |InferLabelForElement()| that infers a label, if possible, from |
-// a previous sibling of |element|. |
+// a previous sibling of |element|, |
+// e.g. Some Text <input ...> |
+// or Some <span>Text</span> <input ...> |
+// or <p>Some Text</p><input ...> |
+// or <label>Some Text</label> <input ...> |
+// or Some Text <img><input ...> |
+// or <b>Some Text</b><br/> <input ...>. |
string16 InferLabelFromPrevious(const WebFormControlElement& element) { |
string16 inferred_label; |
- WebNode previous = element.previousSibling(); |
- if (previous.isNull()) |
- return string16(); |
- |
- // Check for text immediately before the |element|. |
- if (previous.isTextNode()) { |
- inferred_label = previous.nodeValue(); |
- TrimWhitespace(inferred_label, TRIM_ALL, &inferred_label); |
- } |
- |
- // If we didn't find text, check for an immediately preceding text container, |
- // e.g. <p>Some Text</p><input ...> |
- // Note the lack of whitespace between <p> and <input> elements. |
- if (inferred_label.empty() && previous.isElementNode()) { |
- WebElement previous_element = previous.to<WebElement>(); |
- if (IsTextContainerElement(previous_element)) |
- inferred_label = FindChildText(previous_element); |
- } |
- |
- // If we didn't find one immediately preceding, check for a text container |
- // separated from this node only by whitespace, |
- // e.g. <p>Some Text</p> <input ...> |
- // Note the whitespace between <p> and <input> elements. |
- if (inferred_label.empty() && previous.isTextNode()) { |
- WebNode sibling = previous.previousSibling(); |
- if (!sibling.isNull() && sibling.isElementNode()) { |
- WebElement previous_element = sibling.to<WebElement>(); |
- if (IsTextContainerElement(previous_element)) |
- inferred_label = FindChildText(previous_element); |
- } |
- } |
+ WebNode previous = element; |
+ while (true) { |
+ previous = previous.previousSibling(); |
+ if (previous.isNull()) |
+ break; |
- // Look for a text node prior to <img> or <br> tags, |
- // e.g. Some Text<img/><input ...> or Some Text<br/><input ...> |
- while (inferred_label.empty() && !previous.isNull()) { |
- if (previous.isTextNode()) { |
- inferred_label = previous.nodeValue(); |
- TrimWhitespace(inferred_label, TRIM_ALL, &inferred_label); |
- } else if (previous.isElementNode()) { |
- WebElement previous_element = previous.to<WebElement>(); |
- if (IsTextContainerElement(previous_element)) |
- inferred_label = FindChildText(previous_element); |
- else if (!HasTagName(previous, "img") && !HasTagName(previous, "br")) |
- break; |
- } else { |
+ WebNode::NodeType node_type = previous.nodeType(); |
+ if (node_type != WebNode::TextNode && |
+ node_type != WebNode::ElementNode && |
+ node_type != WebNode::CommentNode) |
break; |
- } |
- previous = previous.previousSibling(); |
- } |
+ // Coalesce any text contained in multiple consecutive plain text nodes or |
+ // on of a few HTML elements that are essentially equivalent to text nodes. |
+ if (previous.isTextNode() || |
+ HasTagName(previous, "b") || HasTagName(previous, "strong") || |
+ HasTagName(previous, "span") || HasTagName(previous, "font")) { |
+ string16 value = FindChildText(previous); |
+ // A text node's value will be empty if it is for a line break. |
+ bool add_space = previous.isTextNode() && value.empty(); |
+ inferred_label = |
+ CombineAndCollapseWhitespace(value, inferred_label, add_space); |
+ } else if (previous.isElementNode()) { |
dhollowa
2011/08/01 22:16:54
Instead of "else if", it would be more readable IM
Ilya Sherman
2011/08/01 23:59:36
Done.
|
+ // All other elements are only allowed if we have not yet found any |
+ // candidate label text. |
+ string16 trimmed_label; |
+ TrimWhitespace(inferred_label, TRIM_ALL, &trimmed_label); |
+ if (trimmed_label.empty()) { |
+ // <img> and <br> tags often appear between the input element and its |
+ // label text, so skip over them. |
+ if (HasTagName(previous, "img") || HasTagName(previous, "br")) |
+ continue; |
+ |
+ // We expect <p> and <label> tags to contain the full label text, so |
+ // only allow these if we have not yet found any candidate label text. |
+ if (HasTagName(previous, "p") || HasTagName(previous, "label")) |
+ inferred_label = FindChildText(previous); |
+ } |
- // Look for a label node prior to the <input> tag, |
- // e.g. <label>Some Text</label><input ...> |
- while (inferred_label.empty() && !previous.isNull()) { |
- if (previous.isTextNode()) { |
- inferred_label = previous.nodeValue(); |
- TrimWhitespace(inferred_label, TRIM_ALL, &inferred_label); |
- } else if (HasTagName(previous, "label")) { |
- inferred_label = FindChildText(previous.to<WebElement>()); |
- } else { |
break; |
} |
- |
- previous = previous.previousSibling(); |
} |
+ TrimWhitespace(inferred_label, TRIM_ALL, &inferred_label); |
return inferred_label; |
} |
@@ -262,7 +254,7 @@ string16 InferLabelFromListItem(const WebFormControlElement& element) { |
} |
if (!parent.isNull() && HasTagName(parent, "li")) |
- return FindChildText(parent.to<WebElement>()); |
+ return FindChildText(parent); |
return string16(); |
} |
@@ -289,7 +281,7 @@ string16 InferLabelFromTableColumn(const WebFormControlElement& element) { |
WebNode previous = parent.previousSibling(); |
while (inferred_label.empty() && !previous.isNull()) { |
if (HasTagName(previous, "td") || HasTagName(previous, "th")) |
- inferred_label = FindChildText(previous.to<WebElement>()); |
+ inferred_label = FindChildText(previous); |
previous = previous.previousSibling(); |
} |
@@ -316,7 +308,7 @@ string16 InferLabelFromTableRow(const WebFormControlElement& element) { |
WebNode previous = parent.previousSibling(); |
while (inferred_label.empty() && !previous.isNull()) { |
if (HasTagName(previous, "tr")) |
- inferred_label = FindChildText(previous.to<WebElement>()); |
+ inferred_label = FindChildText(previous); |
previous = previous.previousSibling(); |
} |
@@ -330,23 +322,29 @@ string16 InferLabelFromTableRow(const WebFormControlElement& element) { |
// e.g. <div>Some Text</div><div><input ...></div> |
string16 InferLabelFromDivTable(const WebFormControlElement& element) { |
WebNode node = element.parentNode(); |
- while (!node.isNull() && node.isElementNode() && |
- !node.to<WebElement>().hasTagName("div") && |
- // If the element is in a table, its label most likely is too. |
- !node.to<WebElement>().hasTagName("table")) { |
- node = node.parentNode(); |
- } |
+ bool looking_for_parent = true; |
- if (node.isNull() || !HasTagName(node, "div")) |
- return string16(); |
- |
- // Search the siblings while we cannot find label. |
+ // Search the sibling and parent <div>s until we find a candidate label. |
string16 inferred_label; |
while (inferred_label.empty() && !node.isNull()) { |
- if (HasTagName(node, "div")) |
- inferred_label = FindChildText(node.to<WebElement>()); |
+ if (HasTagName(node, "div")) { |
+ looking_for_parent = false; |
+ inferred_label = FindChildText(node); |
+ } else if (looking_for_parent && |
+ (HasTagName(node, "table") || HasTagName(node, "fieldset"))) { |
+ // If the element is in a table or fieldset, its label most likely is too. |
+ break; |
+ } |
+ |
+ if (node.previousSibling().isNull()) { |
+ // If there are no more siblings, continue walking up the tree. |
+ looking_for_parent = true; |
+ } |
- node = node.previousSibling(); |
+ if (looking_for_parent) |
+ node = node.parentNode(); |
+ else |
+ node = node.previousSibling(); |
} |
return inferred_label; |
@@ -373,7 +371,7 @@ string16 InferLabelFromDefinitionList(const WebFormControlElement& element) { |
if (previous.isNull() || !HasTagName(previous, "dt")) |
return string16(); |
- return FindChildText(previous.to<WebElement>()); |
+ return FindChildText(previous); |
} |
// Infers corresponding label for |element| from surrounding context in the DOM, |
@@ -645,9 +643,12 @@ string16 FormManager::LabelForElement(const WebFormControlElement& element) { |
WebNodeList labels = element.document().getElementsByTagName("label"); |
for (unsigned i = 0; i < labels.length(); ++i) { |
WebLabelElement label = labels.item(i).to<WebLabelElement>(); |
- DCHECK(label.hasTagName("label")); |
- if (label.correspondingControl() == element) |
+ WebElement corresponding_control = label.correspondingControl(); |
+ if (corresponding_control == element || |
+ (corresponding_control.isNull() && |
+ label.getAttribute("for") == element.nameForAutofill())) { |
return FindChildText(label); |
+ } |
} |
// Infer the label from context if not found in label element. |
@@ -732,13 +733,21 @@ bool FormManager::WebFormElementToFormData(const WebFormElement& element, |
WebLabelElement label = labels.item(i).to<WebLabelElement>(); |
WebFormControlElement field_element = |
label.correspondingControl().to<WebFormControlElement>(); |
- if (field_element.isNull() || |
+ |
+ string16 element_name; |
+ if (field_element.isNull()) { |
+ // Sometimes site authors will incorrectly specify the corresponding |
+ // field element's name rather than its id, so we compensate here. |
+ element_name = label.getAttribute("for"); |
+ } else if ( |
!field_element.isFormControlElement() || |
- field_element.formControlType() == WebString::fromUTF8("hidden")) |
+ field_element.formControlType() == WebString::fromUTF8("hidden")) { |
continue; |
+ } else { |
+ element_name = field_element.nameForAutofill(); |
+ } |
- std::map<string16, FormField*>::iterator iter = |
- name_map.find(field_element.nameForAutofill()); |
+ std::map<string16, FormField*>::iterator iter = name_map.find(element_name); |
// Concatenate labels because some sites might have multiple label |
// candidates. |
if (iter != name_map.end()) |