| OLD | NEW |
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/renderer/autofill/form_manager.h" | 5 #include "chrome/renderer/autofill/form_manager.h" |
| 6 | 6 |
| 7 #include "base/logging.h" | 7 #include "base/logging.h" |
| 8 #include "base/memory/scoped_vector.h" | 8 #include "base/memory/scoped_vector.h" |
| 9 #include "base/stl_util.h" | 9 #include "base/stl_util.h" |
| 10 #include "base/string_util.h" | 10 #include "base/string_util.h" |
| (...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 71 if (!element) | 71 if (!element) |
| 72 return false; | 72 return false; |
| 73 | 73 |
| 74 return element->isTextField() && !element->isPasswordField(); | 74 return element->isTextField() && !element->isPasswordField(); |
| 75 } | 75 } |
| 76 | 76 |
| 77 bool IsSelectElement(const WebFormControlElement& element) { | 77 bool IsSelectElement(const WebFormControlElement& element) { |
| 78 return element.formControlType() == ASCIIToUTF16("select-one"); | 78 return element.formControlType() == ASCIIToUTF16("select-one"); |
| 79 } | 79 } |
| 80 | 80 |
| 81 bool IsTextContainerElement(const WebElement& element) { | |
| 82 return | |
| 83 element.hasTagName("p") || | |
| 84 element.hasTagName("b") || | |
| 85 element.hasTagName("span") || | |
| 86 element.hasTagName("font"); | |
| 87 } | |
| 88 | |
| 89 bool IsOptionElement(const WebElement& element) { | 81 bool IsOptionElement(const WebElement& element) { |
| 90 return element.hasTagName("option"); | 82 return element.hasTagName("option"); |
| 91 } | 83 } |
| 92 | 84 |
| 93 bool IsScriptElement(const WebElement& element) { | 85 bool IsScriptElement(const WebElement& element) { |
| 94 return element.hasTagName("script"); | 86 return element.hasTagName("script"); |
| 95 } | 87 } |
| 96 | 88 |
| 97 bool IsNoScriptElement(const WebElement& element) { | 89 bool IsNoScriptElement(const WebElement& element) { |
| 98 return element.hasTagName("noscript"); | 90 return element.hasTagName("noscript"); |
| 99 } | 91 } |
| 100 | 92 |
| 101 bool HasTagName(const WebNode& node, const WebKit::WebString& tag) { | 93 bool HasTagName(const WebNode& node, const WebKit::WebString& tag) { |
| 102 return node.isElementNode() && node.toConst<WebElement>().hasTagName(tag); | 94 return node.isElementNode() && node.toConst<WebElement>().hasTagName(tag); |
| 103 } | 95 } |
| 104 | 96 |
| 105 bool IsAutofillableElement(const WebFormControlElement& element) { | 97 bool IsAutofillableElement(const WebFormControlElement& element) { |
| 106 const WebInputElement* input_element = toWebInputElement(&element); | 98 const WebInputElement* input_element = toWebInputElement(&element); |
| 107 return IsTextInput(input_element) || IsSelectElement(element); | 99 return IsTextInput(input_element) || IsSelectElement(element); |
| 108 } | 100 } |
| 109 | 101 |
| 102 // Appends |suffix| to |prefix| so that any intermediary whitespace is collapsed |
| 103 // to a single space. If |force_whitespace| is true, then the resulting string |
| 104 // is guaranteed to have a space between |prefix| and |suffix|. Otherwise, the |
| 105 // result includes a space only if |prefix| has trailing whitespace or |suffix| |
| 106 // has leading whitespace. |
| 107 // A few examples: |
| 108 // * CombineAndCollapseWhitespace("foo", "bar", false) -> "foobar" |
| 109 // * CombineAndCollapseWhitespace("foo", "bar", true) -> "foo bar" |
| 110 // * CombineAndCollapseWhitespace("foo ", "bar", false) -> "foo bar" |
| 111 // * CombineAndCollapseWhitespace("foo", " bar", false) -> "foo bar" |
| 112 // * CombineAndCollapseWhitespace("foo", " bar", true) -> "foo bar" |
| 113 // * CombineAndCollapseWhitespace("foo ", " bar", false) -> "foo bar" |
| 114 // * CombineAndCollapseWhitespace(" foo", "bar ", false) -> " foobar " |
| 115 // * CombineAndCollapseWhitespace(" foo", "bar ", true) -> " foo bar " |
| 116 const string16 CombineAndCollapseWhitespace(const string16& prefix, |
| 117 const string16& suffix, |
| 118 bool force_whitespace) { |
| 119 string16 prefix_trimmed; |
| 120 TrimPositions prefix_trailing_whitespace = |
| 121 TrimWhitespace(prefix, TRIM_TRAILING, &prefix_trimmed); |
| 122 |
| 123 // Recursively compute the children's text. |
| 124 string16 suffix_trimmed; |
| 125 TrimPositions suffix_leading_whitespace = |
| 126 TrimWhitespace(suffix, TRIM_LEADING, &suffix_trimmed); |
| 127 |
| 128 if (prefix_trailing_whitespace || suffix_leading_whitespace || |
| 129 force_whitespace) { |
| 130 return prefix_trimmed + ASCIIToUTF16(" ") + suffix_trimmed; |
| 131 } else { |
| 132 return prefix_trimmed + suffix_trimmed; |
| 133 } |
| 134 } |
| 135 |
| 110 // This is a helper function for the FindChildText() function (see below). | 136 // This is a helper function for the FindChildText() function (see below). |
| 111 // Search depth is limited with the |depth| parameter. | 137 // Search depth is limited with the |depth| parameter. |
| 112 string16 FindChildTextInner(const WebNode& node, int depth) { | 138 string16 FindChildTextInner(const WebNode& node, int depth) { |
| 113 if (depth <= 0 || node.isNull()) | 139 if (depth <= 0 || node.isNull()) |
| 114 return string16(); | 140 return string16(); |
| 115 | 141 |
| 116 // Skip over comments. | 142 // Skip over comments. |
| 117 if (node.nodeType() == WebNode::CommentNode) | 143 if (node.nodeType() == WebNode::CommentNode) |
| 118 return FindChildTextInner(node.nextSibling(), depth - 1); | 144 return FindChildTextInner(node.nextSibling(), depth - 1); |
| 119 | 145 |
| 120 if (node.nodeType() != WebNode::ElementNode && | 146 if (node.nodeType() != WebNode::ElementNode && |
| 121 node.nodeType() != WebNode::TextNode) | 147 node.nodeType() != WebNode::TextNode) |
| 122 return string16(); | 148 return string16(); |
| 123 | 149 |
| 124 // Ignore elements known not to contain inferable labels. | 150 // Ignore elements known not to contain inferable labels. |
| 125 if (node.isElementNode()) { | 151 if (node.isElementNode()) { |
| 126 const WebElement element = node.toConst<WebElement>(); | 152 const WebElement element = node.toConst<WebElement>(); |
| 127 if (IsOptionElement(element) || | 153 if (IsOptionElement(element) || |
| 128 IsScriptElement(element) || | 154 IsScriptElement(element) || |
| 129 IsNoScriptElement(element)) { | 155 IsNoScriptElement(element) || |
| 156 (element.isFormControlElement() && |
| 157 IsAutofillableElement(element.toConst<WebFormControlElement>()))) { |
| 130 return string16(); | 158 return string16(); |
| 131 } | 159 } |
| 132 } | 160 } |
| 133 | 161 |
| 134 // Extract the text exactly at this node. | 162 // Extract the text exactly at this node. |
| 135 string16 node_text = node.nodeValue(); | 163 string16 node_text = node.nodeValue(); |
| 136 TrimPositions node_trailing_whitespace = | |
| 137 TrimWhitespace(node_text, TRIM_TRAILING, &node_text); | |
| 138 | 164 |
| 139 // Recursively compute the children's text. | 165 // Recursively compute the children's text. |
| 140 // Preserve inter-element whitespace separation. | 166 // Preserve inter-element whitespace separation. |
| 141 string16 child_text = FindChildTextInner(node.firstChild(), depth - 1); | 167 string16 child_text = FindChildTextInner(node.firstChild(), depth - 1); |
| 142 TrimPositions child_leading_whitespace = | 168 bool add_space = node.nodeType() == WebNode::TextNode && node_text.empty(); |
| 143 TrimWhitespace(child_text, TRIM_LEADING, &child_text); | 169 node_text = CombineAndCollapseWhitespace(node_text, child_text, add_space); |
| 144 if (node_trailing_whitespace || child_leading_whitespace || | |
| 145 (node.nodeType() == WebNode::TextNode && node_text.empty())) { | |
| 146 node_text += ASCIIToUTF16(" "); | |
| 147 } | |
| 148 node_text += child_text; | |
| 149 node_trailing_whitespace = | |
| 150 TrimWhitespace(node_text, TRIM_TRAILING, &node_text); | |
| 151 | 170 |
| 152 // Recursively compute the siblings' text. | 171 // Recursively compute the siblings' text. |
| 153 // Again, preserve inter-element whitespace separation. | 172 // Again, preserve inter-element whitespace separation. |
| 154 string16 sibling_text = FindChildTextInner(node.nextSibling(), depth - 1); | 173 string16 sibling_text = FindChildTextInner(node.nextSibling(), depth - 1); |
| 155 TrimPositions sibling_leading_whitespace = | 174 add_space = node.nodeType() == WebNode::TextNode && node_text.empty(); |
| 156 TrimWhitespace(sibling_text, TRIM_LEADING, &sibling_text); | 175 node_text = CombineAndCollapseWhitespace(node_text, sibling_text, add_space); |
| 157 if (node_trailing_whitespace || sibling_leading_whitespace || | |
| 158 (node.nodeType() == WebNode::TextNode && node_text.empty())) { | |
| 159 node_text += ASCIIToUTF16(" "); | |
| 160 } | |
| 161 node_text += sibling_text; | |
| 162 | 176 |
| 163 return node_text; | 177 return node_text; |
| 164 } | 178 } |
| 165 | 179 |
| 166 // Returns the aggregated values of the descendants of |element| that are | 180 // Returns the aggregated values of the descendants of |element| that are |
| 167 // non-empty text nodes. This is a faster alternative to |innerText()| for | 181 // non-empty text nodes. This is a faster alternative to |innerText()| for |
| 168 // performance critical operations. It does a full depth-first search so can be | 182 // performance critical operations. It does a full depth-first search so can be |
| 169 // used when the structure is not directly known. However, unlike with | 183 // used when the structure is not directly known. However, unlike with |
| 170 // |innerText()|, the search depth and breadth are limited to a fixed threshold. | 184 // |innerText()|, the search depth and breadth are limited to a fixed threshold. |
| 171 // Whitespace is trimmed from text accumulated at descendant nodes. | 185 // Whitespace is trimmed from text accumulated at descendant nodes. |
| 172 string16 FindChildText(const WebElement& element) { | 186 string16 FindChildText(const WebNode& node) { |
| 173 WebNode child = element.firstChild(); | 187 if (node.isTextNode()) |
| 188 return node.nodeValue(); |
| 189 |
| 190 WebNode child = node.firstChild(); |
| 174 | 191 |
| 175 const int kChildSearchDepth = 10; | 192 const int kChildSearchDepth = 10; |
| 176 string16 element_text = FindChildTextInner(child, kChildSearchDepth); | 193 string16 node_text = FindChildTextInner(child, kChildSearchDepth); |
| 177 TrimWhitespace(element_text, TRIM_ALL, &element_text); | 194 TrimWhitespace(node_text, TRIM_ALL, &node_text); |
| 178 return element_text; | 195 return node_text; |
| 179 } | 196 } |
| 180 | 197 |
| 181 // Helper for |InferLabelForElement()| that infers a label, if possible, from | 198 // Helper for |InferLabelForElement()| that infers a label, if possible, from |
| 182 // a previous sibling of |element|. | 199 // a previous sibling of |element|, |
| 200 // e.g. Some Text <input ...> |
| 201 // or Some <span>Text</span> <input ...> |
| 202 // or <p>Some Text</p><input ...> |
| 203 // or <label>Some Text</label> <input ...> |
| 204 // or Some Text <img><input ...> |
| 205 // or <b>Some Text</b><br/> <input ...>. |
| 183 string16 InferLabelFromPrevious(const WebFormControlElement& element) { | 206 string16 InferLabelFromPrevious(const WebFormControlElement& element) { |
| 184 string16 inferred_label; | 207 string16 inferred_label; |
| 185 WebNode previous = element.previousSibling(); | 208 WebNode previous = element; |
| 186 if (previous.isNull()) | 209 while (true) { |
| 187 return string16(); | 210 previous = previous.previousSibling(); |
| 211 if (previous.isNull()) |
| 212 break; |
| 188 | 213 |
| 189 // Check for text immediately before the |element|. | 214 // Skip over comments. |
| 190 if (previous.isTextNode()) { | 215 WebNode::NodeType node_type = previous.nodeType(); |
| 191 inferred_label = previous.nodeValue(); | 216 if (node_type == WebNode::CommentNode) |
| 192 TrimWhitespace(inferred_label, TRIM_ALL, &inferred_label); | 217 continue; |
| 218 |
| 219 // Otherwise, only consider normal HTML elements and their contents. |
| 220 if (node_type != WebNode::TextNode && |
| 221 node_type != WebNode::ElementNode) |
| 222 break; |
| 223 |
| 224 // A label might be split across multiple "lightweight" nodes. |
| 225 // Coalesce any text contained in multiple consecutive |
| 226 // (a) plain text nodes or |
| 227 // (b) inline HTML elements that are essentially equivalent to text nodes. |
| 228 if (previous.isTextNode() || |
| 229 HasTagName(previous, "b") || HasTagName(previous, "strong") || |
| 230 HasTagName(previous, "span") || HasTagName(previous, "font")) { |
| 231 string16 value = FindChildText(previous); |
| 232 // A text node's value will be empty if it is for a line break. |
| 233 bool add_space = previous.isTextNode() && value.empty(); |
| 234 inferred_label = |
| 235 CombineAndCollapseWhitespace(value, inferred_label, add_space); |
| 236 continue; |
| 237 } |
| 238 |
| 239 // If we have identified a partial label and have reached a non-lightweight |
| 240 // element, consider the label to be complete. |
| 241 string16 trimmed_label; |
| 242 TrimWhitespace(inferred_label, TRIM_ALL, &trimmed_label); |
| 243 if (!trimmed_label.empty()) |
| 244 break; |
| 245 |
| 246 // <img> and <br> tags often appear between the input element and its |
| 247 // label text, so skip over them. |
| 248 if (HasTagName(previous, "img") || HasTagName(previous, "br")) |
| 249 continue; |
| 250 |
| 251 // We only expect <p> and <label> tags to contain the full label text. |
| 252 if (HasTagName(previous, "p") || HasTagName(previous, "label")) |
| 253 inferred_label = FindChildText(previous); |
| 254 |
| 255 break; |
| 193 } | 256 } |
| 194 | 257 |
| 195 // If we didn't find text, check for an immediately preceding text container, | 258 TrimWhitespace(inferred_label, TRIM_ALL, &inferred_label); |
| 196 // e.g. <p>Some Text</p><input ...> | |
| 197 // Note the lack of whitespace between <p> and <input> elements. | |
| 198 if (inferred_label.empty() && previous.isElementNode()) { | |
| 199 WebElement previous_element = previous.to<WebElement>(); | |
| 200 if (IsTextContainerElement(previous_element)) | |
| 201 inferred_label = FindChildText(previous_element); | |
| 202 } | |
| 203 | |
| 204 // If we didn't find one immediately preceding, check for a text container | |
| 205 // separated from this node only by whitespace, | |
| 206 // e.g. <p>Some Text</p> <input ...> | |
| 207 // Note the whitespace between <p> and <input> elements. | |
| 208 if (inferred_label.empty() && previous.isTextNode()) { | |
| 209 WebNode sibling = previous.previousSibling(); | |
| 210 if (!sibling.isNull() && sibling.isElementNode()) { | |
| 211 WebElement previous_element = sibling.to<WebElement>(); | |
| 212 if (IsTextContainerElement(previous_element)) | |
| 213 inferred_label = FindChildText(previous_element); | |
| 214 } | |
| 215 } | |
| 216 | |
| 217 // Look for a text node prior to <img> or <br> tags, | |
| 218 // e.g. Some Text<img/><input ...> or Some Text<br/><input ...> | |
| 219 while (inferred_label.empty() && !previous.isNull()) { | |
| 220 if (previous.isTextNode()) { | |
| 221 inferred_label = previous.nodeValue(); | |
| 222 TrimWhitespace(inferred_label, TRIM_ALL, &inferred_label); | |
| 223 } else if (previous.isElementNode()) { | |
| 224 WebElement previous_element = previous.to<WebElement>(); | |
| 225 if (IsTextContainerElement(previous_element)) | |
| 226 inferred_label = FindChildText(previous_element); | |
| 227 else if (!HasTagName(previous, "img") && !HasTagName(previous, "br")) | |
| 228 break; | |
| 229 } else { | |
| 230 break; | |
| 231 } | |
| 232 | |
| 233 previous = previous.previousSibling(); | |
| 234 } | |
| 235 | |
| 236 // Look for a label node prior to the <input> tag, | |
| 237 // e.g. <label>Some Text</label><input ...> | |
| 238 while (inferred_label.empty() && !previous.isNull()) { | |
| 239 if (previous.isTextNode()) { | |
| 240 inferred_label = previous.nodeValue(); | |
| 241 TrimWhitespace(inferred_label, TRIM_ALL, &inferred_label); | |
| 242 } else if (HasTagName(previous, "label")) { | |
| 243 inferred_label = FindChildText(previous.to<WebElement>()); | |
| 244 } else { | |
| 245 break; | |
| 246 } | |
| 247 | |
| 248 previous = previous.previousSibling(); | |
| 249 } | |
| 250 | |
| 251 return inferred_label; | 259 return inferred_label; |
| 252 } | 260 } |
| 253 | 261 |
| 254 // Helper for |InferLabelForElement()| that infers a label, if possible, from | 262 // Helper for |InferLabelForElement()| that infers a label, if possible, from |
| 255 // enclosing list item, | 263 // enclosing list item, |
| 256 // e.g. <li>Some Text<input ...><input ...><input ...></tr> | 264 // e.g. <li>Some Text<input ...><input ...><input ...></tr> |
| 257 string16 InferLabelFromListItem(const WebFormControlElement& element) { | 265 string16 InferLabelFromListItem(const WebFormControlElement& element) { |
| 258 WebNode parent = element.parentNode(); | 266 WebNode parent = element.parentNode(); |
| 259 while (!parent.isNull() && parent.isElementNode() && | 267 while (!parent.isNull() && parent.isElementNode() && |
| 260 !parent.to<WebElement>().hasTagName("li")) { | 268 !parent.to<WebElement>().hasTagName("li")) { |
| 261 parent = parent.parentNode(); | 269 parent = parent.parentNode(); |
| 262 } | 270 } |
| 263 | 271 |
| 264 if (!parent.isNull() && HasTagName(parent, "li")) | 272 if (!parent.isNull() && HasTagName(parent, "li")) |
| 265 return FindChildText(parent.to<WebElement>()); | 273 return FindChildText(parent); |
| 266 | 274 |
| 267 return string16(); | 275 return string16(); |
| 268 } | 276 } |
| 269 | 277 |
| 270 // Helper for |InferLabelForElement()| that infers a label, if possible, from | 278 // Helper for |InferLabelForElement()| that infers a label, if possible, from |
| 271 // surrounding table structure, | 279 // surrounding table structure, |
| 272 // e.g. <tr><td>Some Text</td><td><input ...></td></tr> | 280 // e.g. <tr><td>Some Text</td><td><input ...></td></tr> |
| 273 // or <tr><th>Some Text</th><td><input ...></td></tr> | 281 // or <tr><th>Some Text</th><td><input ...></td></tr> |
| 274 // or <tr><td><b>Some Text</b></td><td><b><input ...></b></td></tr> | 282 // or <tr><td><b>Some Text</b></td><td><b><input ...></b></td></tr> |
| 275 // or <tr><th><b>Some Text</b></th><td><b><input ...></b></td></tr> | 283 // or <tr><th><b>Some Text</b></th><td><b><input ...></b></td></tr> |
| 276 string16 InferLabelFromTableColumn(const WebFormControlElement& element) { | 284 string16 InferLabelFromTableColumn(const WebFormControlElement& element) { |
| 277 WebNode parent = element.parentNode(); | 285 WebNode parent = element.parentNode(); |
| 278 while (!parent.isNull() && parent.isElementNode() && | 286 while (!parent.isNull() && parent.isElementNode() && |
| 279 !parent.to<WebElement>().hasTagName("td")) { | 287 !parent.to<WebElement>().hasTagName("td")) { |
| 280 parent = parent.parentNode(); | 288 parent = parent.parentNode(); |
| 281 } | 289 } |
| 282 | 290 |
| 283 if (parent.isNull()) | 291 if (parent.isNull()) |
| 284 return string16(); | 292 return string16(); |
| 285 | 293 |
| 286 // Check all previous siblings, skipping non-element nodes, until we find a | 294 // Check all previous siblings, skipping non-element nodes, until we find a |
| 287 // non-empty text block. | 295 // non-empty text block. |
| 288 string16 inferred_label; | 296 string16 inferred_label; |
| 289 WebNode previous = parent.previousSibling(); | 297 WebNode previous = parent.previousSibling(); |
| 290 while (inferred_label.empty() && !previous.isNull()) { | 298 while (inferred_label.empty() && !previous.isNull()) { |
| 291 if (HasTagName(previous, "td") || HasTagName(previous, "th")) | 299 if (HasTagName(previous, "td") || HasTagName(previous, "th")) |
| 292 inferred_label = FindChildText(previous.to<WebElement>()); | 300 inferred_label = FindChildText(previous); |
| 293 | 301 |
| 294 previous = previous.previousSibling(); | 302 previous = previous.previousSibling(); |
| 295 } | 303 } |
| 296 | 304 |
| 297 return inferred_label; | 305 return inferred_label; |
| 298 } | 306 } |
| 299 | 307 |
| 300 // Helper for |InferLabelForElement()| that infers a label, if possible, from | 308 // Helper for |InferLabelForElement()| that infers a label, if possible, from |
| 301 // surrounding table structure, | 309 // surrounding table structure, |
| 302 // e.g. <tr><td>Some Text</td></tr><tr><td><input ...></td></tr> | 310 // e.g. <tr><td>Some Text</td></tr><tr><td><input ...></td></tr> |
| 303 string16 InferLabelFromTableRow(const WebFormControlElement& element) { | 311 string16 InferLabelFromTableRow(const WebFormControlElement& element) { |
| 304 WebNode parent = element.parentNode(); | 312 WebNode parent = element.parentNode(); |
| 305 while (!parent.isNull() && parent.isElementNode() && | 313 while (!parent.isNull() && parent.isElementNode() && |
| 306 !parent.to<WebElement>().hasTagName("tr")) { | 314 !parent.to<WebElement>().hasTagName("tr")) { |
| 307 parent = parent.parentNode(); | 315 parent = parent.parentNode(); |
| 308 } | 316 } |
| 309 | 317 |
| 310 if (parent.isNull()) | 318 if (parent.isNull()) |
| 311 return string16(); | 319 return string16(); |
| 312 | 320 |
| 313 // Check all previous siblings, skipping non-element nodes, until we find a | 321 // Check all previous siblings, skipping non-element nodes, until we find a |
| 314 // non-empty text block. | 322 // non-empty text block. |
| 315 string16 inferred_label; | 323 string16 inferred_label; |
| 316 WebNode previous = parent.previousSibling(); | 324 WebNode previous = parent.previousSibling(); |
| 317 while (inferred_label.empty() && !previous.isNull()) { | 325 while (inferred_label.empty() && !previous.isNull()) { |
| 318 if (HasTagName(previous, "tr")) | 326 if (HasTagName(previous, "tr")) |
| 319 inferred_label = FindChildText(previous.to<WebElement>()); | 327 inferred_label = FindChildText(previous); |
| 320 | 328 |
| 321 previous = previous.previousSibling(); | 329 previous = previous.previousSibling(); |
| 322 } | 330 } |
| 323 | 331 |
| 324 return inferred_label; | 332 return inferred_label; |
| 325 } | 333 } |
| 326 | 334 |
| 327 // Helper for |InferLabelForElement()| that infers a label, if possible, from | 335 // Helper for |InferLabelForElement()| that infers a label, if possible, from |
| 328 // a surrounding div table, | 336 // a surrounding div table, |
| 329 // e.g. <div>Some Text<span><input ...></span></div> | 337 // e.g. <div>Some Text<span><input ...></span></div> |
| 330 // e.g. <div>Some Text</div><div><input ...></div> | 338 // e.g. <div>Some Text</div><div><input ...></div> |
| 331 string16 InferLabelFromDivTable(const WebFormControlElement& element) { | 339 string16 InferLabelFromDivTable(const WebFormControlElement& element) { |
| 332 WebNode node = element.parentNode(); | 340 WebNode node = element.parentNode(); |
| 333 while (!node.isNull() && node.isElementNode() && | 341 bool looking_for_parent = true; |
| 334 !node.to<WebElement>().hasTagName("div") && | |
| 335 // If the element is in a table, its label most likely is too. | |
| 336 !node.to<WebElement>().hasTagName("table")) { | |
| 337 node = node.parentNode(); | |
| 338 } | |
| 339 | 342 |
| 340 if (node.isNull() || !HasTagName(node, "div")) | 343 // Search the sibling and parent <div>s until we find a candidate label. |
| 341 return string16(); | |
| 342 | |
| 343 // Search the siblings while we cannot find label. | |
| 344 string16 inferred_label; | 344 string16 inferred_label; |
| 345 while (inferred_label.empty() && !node.isNull()) { | 345 while (inferred_label.empty() && !node.isNull()) { |
| 346 if (HasTagName(node, "div")) | 346 if (HasTagName(node, "div")) { |
| 347 inferred_label = FindChildText(node.to<WebElement>()); | 347 looking_for_parent = false; |
| 348 inferred_label = FindChildText(node); |
| 349 } else if (looking_for_parent && |
| 350 (HasTagName(node, "table") || HasTagName(node, "fieldset"))) { |
| 351 // If the element is in a table or fieldset, its label most likely is too. |
| 352 break; |
| 353 } |
| 348 | 354 |
| 349 node = node.previousSibling(); | 355 if (node.previousSibling().isNull()) { |
| 356 // If there are no more siblings, continue walking up the tree. |
| 357 looking_for_parent = true; |
| 358 } |
| 359 |
| 360 if (looking_for_parent) |
| 361 node = node.parentNode(); |
| 362 else |
| 363 node = node.previousSibling(); |
| 350 } | 364 } |
| 351 | 365 |
| 352 return inferred_label; | 366 return inferred_label; |
| 353 } | 367 } |
| 354 | 368 |
| 355 // Helper for |InferLabelForElement()| that infers a label, if possible, from | 369 // Helper for |InferLabelForElement()| that infers a label, if possible, from |
| 356 // a surrounding definition list, | 370 // a surrounding definition list, |
| 357 // e.g. <dl><dt>Some Text</dt><dd><input ...></dd></dl> | 371 // e.g. <dl><dt>Some Text</dt><dd><input ...></dd></dl> |
| 358 // e.g. <dl><dt><b>Some Text</b></dt><dd><b><input ...></b></dd></dl> | 372 // e.g. <dl><dt><b>Some Text</b></dt><dd><b><input ...></b></dd></dl> |
| 359 string16 InferLabelFromDefinitionList(const WebFormControlElement& element) { | 373 string16 InferLabelFromDefinitionList(const WebFormControlElement& element) { |
| 360 WebNode parent = element.parentNode(); | 374 WebNode parent = element.parentNode(); |
| 361 while (!parent.isNull() && parent.isElementNode() && | 375 while (!parent.isNull() && parent.isElementNode() && |
| 362 !parent.to<WebElement>().hasTagName("dd")) | 376 !parent.to<WebElement>().hasTagName("dd")) |
| 363 parent = parent.parentNode(); | 377 parent = parent.parentNode(); |
| 364 | 378 |
| 365 if (parent.isNull() || !HasTagName(parent, "dd")) | 379 if (parent.isNull() || !HasTagName(parent, "dd")) |
| 366 return string16(); | 380 return string16(); |
| 367 | 381 |
| 368 // Skip by any intervening text nodes. | 382 // Skip by any intervening text nodes. |
| 369 WebNode previous = parent.previousSibling(); | 383 WebNode previous = parent.previousSibling(); |
| 370 while (!previous.isNull() && previous.isTextNode()) | 384 while (!previous.isNull() && previous.isTextNode()) |
| 371 previous = previous.previousSibling(); | 385 previous = previous.previousSibling(); |
| 372 | 386 |
| 373 if (previous.isNull() || !HasTagName(previous, "dt")) | 387 if (previous.isNull() || !HasTagName(previous, "dt")) |
| 374 return string16(); | 388 return string16(); |
| 375 | 389 |
| 376 return FindChildText(previous.to<WebElement>()); | 390 return FindChildText(previous); |
| 377 } | 391 } |
| 378 | 392 |
| 379 // Infers corresponding label for |element| from surrounding context in the DOM, | 393 // Infers corresponding label for |element| from surrounding context in the DOM, |
| 380 // e.g. the contents of the preceding <p> tag or text element. | 394 // e.g. the contents of the preceding <p> tag or text element. |
| 381 string16 InferLabelForElement(const WebFormControlElement& element) { | 395 string16 InferLabelForElement(const WebFormControlElement& element) { |
| 382 string16 inferred_label = InferLabelFromPrevious(element); | 396 string16 inferred_label = InferLabelFromPrevious(element); |
| 383 if (!inferred_label.empty()) | 397 if (!inferred_label.empty()) |
| 384 return inferred_label; | 398 return inferred_label; |
| 385 | 399 |
| 386 // If we didn't find a label, check for list item case. | 400 // If we didn't find a label, check for list item case. |
| (...skipping 251 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 638 | 652 |
| 639 // static | 653 // static |
| 640 string16 FormManager::LabelForElement(const WebFormControlElement& element) { | 654 string16 FormManager::LabelForElement(const WebFormControlElement& element) { |
| 641 // Don't scrape labels for elements we can't possibly autofill anyway. | 655 // Don't scrape labels for elements we can't possibly autofill anyway. |
| 642 if (!IsAutofillableElement(element)) | 656 if (!IsAutofillableElement(element)) |
| 643 return string16(); | 657 return string16(); |
| 644 | 658 |
| 645 WebNodeList labels = element.document().getElementsByTagName("label"); | 659 WebNodeList labels = element.document().getElementsByTagName("label"); |
| 646 for (unsigned i = 0; i < labels.length(); ++i) { | 660 for (unsigned i = 0; i < labels.length(); ++i) { |
| 647 WebLabelElement label = labels.item(i).to<WebLabelElement>(); | 661 WebLabelElement label = labels.item(i).to<WebLabelElement>(); |
| 648 DCHECK(label.hasTagName("label")); | 662 WebElement corresponding_control = label.correspondingControl(); |
| 649 if (label.correspondingControl() == element) | 663 if (corresponding_control == element || |
| 664 (corresponding_control.isNull() && |
| 665 label.getAttribute("for") == element.nameForAutofill())) { |
| 650 return FindChildText(label); | 666 return FindChildText(label); |
| 667 } |
| 651 } | 668 } |
| 652 | 669 |
| 653 // Infer the label from context if not found in label element. | 670 // Infer the label from context if not found in label element. |
| 654 return InferLabelForElement(element); | 671 return InferLabelForElement(element); |
| 655 } | 672 } |
| 656 | 673 |
| 657 // static | 674 // static |
| 658 bool FormManager::WebFormElementToFormData(const WebFormElement& element, | 675 bool FormManager::WebFormElementToFormData(const WebFormElement& element, |
| 659 RequirementsMask requirements, | 676 RequirementsMask requirements, |
| 660 ExtractMask extract_mask, | 677 ExtractMask extract_mask, |
| (...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 725 // Loop through the label elements inside the form element. For each label | 742 // Loop through the label elements inside the form element. For each label |
| 726 // element, get the corresponding form control element, use the form control | 743 // element, get the corresponding form control element, use the form control |
| 727 // element's name as a key into the <name, FormField> map to find the | 744 // element's name as a key into the <name, FormField> map to find the |
| 728 // previously created FormField and set the FormField's label to the | 745 // previously created FormField and set the FormField's label to the |
| 729 // label.firstChild().nodeValue() of the label element. | 746 // label.firstChild().nodeValue() of the label element. |
| 730 WebNodeList labels = element.getElementsByTagName("label"); | 747 WebNodeList labels = element.getElementsByTagName("label"); |
| 731 for (unsigned i = 0; i < labels.length(); ++i) { | 748 for (unsigned i = 0; i < labels.length(); ++i) { |
| 732 WebLabelElement label = labels.item(i).to<WebLabelElement>(); | 749 WebLabelElement label = labels.item(i).to<WebLabelElement>(); |
| 733 WebFormControlElement field_element = | 750 WebFormControlElement field_element = |
| 734 label.correspondingControl().to<WebFormControlElement>(); | 751 label.correspondingControl().to<WebFormControlElement>(); |
| 735 if (field_element.isNull() || | 752 |
| 753 string16 element_name; |
| 754 if (field_element.isNull()) { |
| 755 // Sometimes site authors will incorrectly specify the corresponding |
| 756 // field element's name rather than its id, so we compensate here. |
| 757 element_name = label.getAttribute("for"); |
| 758 } else if ( |
| 736 !field_element.isFormControlElement() || | 759 !field_element.isFormControlElement() || |
| 737 field_element.formControlType() == WebString::fromUTF8("hidden")) | 760 field_element.formControlType() == WebString::fromUTF8("hidden")) { |
| 738 continue; | 761 continue; |
| 762 } else { |
| 763 element_name = field_element.nameForAutofill(); |
| 764 } |
| 739 | 765 |
| 740 std::map<string16, FormField*>::iterator iter = | 766 std::map<string16, FormField*>::iterator iter = name_map.find(element_name); |
| 741 name_map.find(field_element.nameForAutofill()); | |
| 742 // Concatenate labels because some sites might have multiple label | 767 // Concatenate labels because some sites might have multiple label |
| 743 // candidates. | 768 // candidates. |
| 744 if (iter != name_map.end()) | 769 if (iter != name_map.end()) |
| 745 iter->second->label += FindChildText(label); | 770 iter->second->label += FindChildText(label); |
| 746 } | 771 } |
| 747 | 772 |
| 748 // Loop through the form control elements, extracting the label text from | 773 // Loop through the form control elements, extracting the label text from |
| 749 // the DOM. We use the |fields_extracted| vector to make sure we assign the | 774 // the DOM. We use the |fields_extracted| vector to make sure we assign the |
| 750 // extracted label to the correct field, as it's possible |form_fields| will | 775 // extracted label to the correct field, as it's possible |form_fields| will |
| 751 // not contain all of the elements in |control_elements|. | 776 // not contain all of the elements in |control_elements|. |
| (...skipping 317 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1069 if (element_name == form.name && action == form.action) { | 1094 if (element_name == form.name && action == form.action) { |
| 1070 *form_element = *form_iter; | 1095 *form_element = *form_iter; |
| 1071 return true; | 1096 return true; |
| 1072 } | 1097 } |
| 1073 } | 1098 } |
| 1074 | 1099 |
| 1075 return false; | 1100 return false; |
| 1076 } | 1101 } |
| 1077 | 1102 |
| 1078 } // namespace autofill | 1103 } // namespace autofill |
| OLD | NEW |