Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/renderer/autofill/form_manager.h" | 5 #include "chrome/renderer/autofill/form_manager.h" |
| 6 | 6 |
| 7 #include "base/logging.h" | 7 #include "base/logging.h" |
| 8 #include "base/memory/scoped_vector.h" | 8 #include "base/memory/scoped_vector.h" |
| 9 #include "base/stl_util.h" | 9 #include "base/stl_util.h" |
| 10 #include "base/string_util.h" | 10 #include "base/string_util.h" |
| (...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 71 if (!element) | 71 if (!element) |
| 72 return false; | 72 return false; |
| 73 | 73 |
| 74 return element->isTextField() && !element->isPasswordField(); | 74 return element->isTextField() && !element->isPasswordField(); |
| 75 } | 75 } |
| 76 | 76 |
| 77 bool IsSelectElement(const WebFormControlElement& element) { | 77 bool IsSelectElement(const WebFormControlElement& element) { |
| 78 return element.formControlType() == ASCIIToUTF16("select-one"); | 78 return element.formControlType() == ASCIIToUTF16("select-one"); |
| 79 } | 79 } |
| 80 | 80 |
| 81 bool IsTextContainerElement(const WebElement& element) { | |
| 82 return | |
| 83 element.hasTagName("p") || | |
| 84 element.hasTagName("b") || | |
| 85 element.hasTagName("span") || | |
| 86 element.hasTagName("font"); | |
| 87 } | |
| 88 | |
| 89 bool IsOptionElement(const WebElement& element) { | 81 bool IsOptionElement(const WebElement& element) { |
| 90 return element.hasTagName("option"); | 82 return element.hasTagName("option"); |
| 91 } | 83 } |
| 92 | 84 |
| 93 bool IsScriptElement(const WebElement& element) { | 85 bool IsScriptElement(const WebElement& element) { |
| 94 return element.hasTagName("script"); | 86 return element.hasTagName("script"); |
| 95 } | 87 } |
| 96 | 88 |
| 97 bool IsNoScriptElement(const WebElement& element) { | 89 bool IsNoScriptElement(const WebElement& element) { |
| 98 return element.hasTagName("noscript"); | 90 return element.hasTagName("noscript"); |
| 99 } | 91 } |
| 100 | 92 |
| 101 bool HasTagName(const WebNode& node, const WebKit::WebString& tag) { | 93 bool HasTagName(const WebNode& node, const WebKit::WebString& tag) { |
| 102 return node.isElementNode() && node.toConst<WebElement>().hasTagName(tag); | 94 return node.isElementNode() && node.toConst<WebElement>().hasTagName(tag); |
| 103 } | 95 } |
| 104 | 96 |
| 105 bool IsAutofillableElement(const WebFormControlElement& element) { | 97 bool IsAutofillableElement(const WebFormControlElement& element) { |
| 106 const WebInputElement* input_element = toWebInputElement(&element); | 98 const WebInputElement* input_element = toWebInputElement(&element); |
| 107 return IsTextInput(input_element) || IsSelectElement(element); | 99 return IsTextInput(input_element) || IsSelectElement(element); |
| 108 } | 100 } |
| 109 | 101 |
| 102 // Appends |suffix| to |prefix| so that any intermediary whitespace is collapsed | |
| 103 // to a single space. If |force_whitespace| is true, then the resulting string | |
| 104 // is guaranteed to have a space between |prefix| and |suffix|. Otherwise, the | |
| 105 // result includes a space only if |prefix| has trailing whitespace or |suffix| | |
| 106 // has leading whitespace. | |
|
dhollowa
2011/08/01 22:16:54
Some examples in this comment would help the casua
Ilya Sherman
2011/08/01 23:59:36
Done.
| |
| 107 const string16 CombineAndCollapseWhitespace(const string16& prefix, | |
| 108 const string16& suffix, | |
| 109 bool force_whitespace) { | |
| 110 string16 prefix_trimmed; | |
| 111 TrimPositions prefix_trailing_whitespace = | |
| 112 TrimWhitespace(prefix, TRIM_TRAILING, &prefix_trimmed); | |
| 113 | |
| 114 // Recursively compute the children's text. | |
| 115 string16 suffix_trimmed; | |
| 116 TrimPositions suffix_leading_whitespace = | |
| 117 TrimWhitespace(suffix, TRIM_LEADING, &suffix_trimmed); | |
| 118 | |
| 119 if (prefix_trailing_whitespace || suffix_leading_whitespace || | |
| 120 force_whitespace) { | |
| 121 return prefix_trimmed + ASCIIToUTF16(" ") + suffix_trimmed; | |
| 122 } else { | |
| 123 return prefix_trimmed + suffix_trimmed; | |
| 124 } | |
| 125 } | |
| 126 | |
| 110 // This is a helper function for the FindChildText() function (see below). | 127 // This is a helper function for the FindChildText() function (see below). |
| 111 // Search depth is limited with the |depth| parameter. | 128 // Search depth is limited with the |depth| parameter. |
| 112 string16 FindChildTextInner(const WebNode& node, int depth) { | 129 string16 FindChildTextInner(const WebNode& node, int depth) { |
| 113 if (depth <= 0 || node.isNull()) | 130 if (depth <= 0 || node.isNull()) |
| 114 return string16(); | 131 return string16(); |
| 115 | 132 |
| 116 // Skip over comments. | 133 // Skip over comments. |
| 117 if (node.nodeType() == WebNode::CommentNode) | 134 if (node.nodeType() == WebNode::CommentNode) |
| 118 return FindChildTextInner(node.nextSibling(), depth - 1); | 135 return FindChildTextInner(node.nextSibling(), depth - 1); |
| 119 | 136 |
| 120 if (node.nodeType() != WebNode::ElementNode && | 137 if (node.nodeType() != WebNode::ElementNode && |
| 121 node.nodeType() != WebNode::TextNode) | 138 node.nodeType() != WebNode::TextNode) |
| 122 return string16(); | 139 return string16(); |
| 123 | 140 |
| 124 // Ignore elements known not to contain inferable labels. | 141 // Ignore elements known not to contain inferable labels. |
| 125 if (node.isElementNode()) { | 142 if (node.isElementNode()) { |
| 126 const WebElement element = node.toConst<WebElement>(); | 143 const WebElement element = node.toConst<WebElement>(); |
| 127 if (IsOptionElement(element) || | 144 if (IsOptionElement(element) || |
| 128 IsScriptElement(element) || | 145 IsScriptElement(element) || |
| 129 IsNoScriptElement(element)) { | 146 IsNoScriptElement(element) || |
| 147 (element.isFormControlElement() && | |
| 148 IsAutofillableElement(element.toConst<WebFormControlElement>()))) { | |
| 130 return string16(); | 149 return string16(); |
| 131 } | 150 } |
| 132 } | 151 } |
| 133 | 152 |
| 134 // Extract the text exactly at this node. | 153 // Extract the text exactly at this node. |
| 135 string16 node_text = node.nodeValue(); | 154 string16 node_text = node.nodeValue(); |
| 136 TrimPositions node_trailing_whitespace = | |
| 137 TrimWhitespace(node_text, TRIM_TRAILING, &node_text); | |
| 138 | 155 |
| 139 // Recursively compute the children's text. | 156 // Recursively compute the children's text. |
| 140 // Preserve inter-element whitespace separation. | 157 // Preserve inter-element whitespace separation. |
| 141 string16 child_text = FindChildTextInner(node.firstChild(), depth - 1); | 158 string16 child_text = FindChildTextInner(node.firstChild(), depth - 1); |
| 142 TrimPositions child_leading_whitespace = | 159 bool add_space = node.nodeType() == WebNode::TextNode && node_text.empty(); |
| 143 TrimWhitespace(child_text, TRIM_LEADING, &child_text); | 160 node_text = CombineAndCollapseWhitespace(node_text, child_text, add_space); |
| 144 if (node_trailing_whitespace || child_leading_whitespace || | |
| 145 (node.nodeType() == WebNode::TextNode && node_text.empty())) { | |
| 146 node_text += ASCIIToUTF16(" "); | |
| 147 } | |
| 148 node_text += child_text; | |
| 149 node_trailing_whitespace = | |
| 150 TrimWhitespace(node_text, TRIM_TRAILING, &node_text); | |
| 151 | 161 |
| 152 // Recursively compute the siblings' text. | 162 // Recursively compute the siblings' text. |
| 153 // Again, preserve inter-element whitespace separation. | 163 // Again, preserve inter-element whitespace separation. |
| 154 string16 sibling_text = FindChildTextInner(node.nextSibling(), depth - 1); | 164 string16 sibling_text = FindChildTextInner(node.nextSibling(), depth - 1); |
| 155 TrimPositions sibling_leading_whitespace = | 165 add_space = node.nodeType() == WebNode::TextNode && node_text.empty(); |
| 156 TrimWhitespace(sibling_text, TRIM_LEADING, &sibling_text); | 166 node_text = CombineAndCollapseWhitespace(node_text, sibling_text, add_space); |
| 157 if (node_trailing_whitespace || sibling_leading_whitespace || | |
| 158 (node.nodeType() == WebNode::TextNode && node_text.empty())) { | |
| 159 node_text += ASCIIToUTF16(" "); | |
| 160 } | |
| 161 node_text += sibling_text; | |
| 162 | 167 |
| 163 return node_text; | 168 return node_text; |
| 164 } | 169 } |
| 165 | 170 |
| 166 // Returns the aggregated values of the descendants of |element| that are | 171 // Returns the aggregated values of the descendants of |element| that are |
| 167 // non-empty text nodes. This is a faster alternative to |innerText()| for | 172 // non-empty text nodes. This is a faster alternative to |innerText()| for |
| 168 // performance critical operations. It does a full depth-first search so can be | 173 // performance critical operations. It does a full depth-first search so can be |
| 169 // used when the structure is not directly known. However, unlike with | 174 // used when the structure is not directly known. However, unlike with |
| 170 // |innerText()|, the search depth and breadth are limited to a fixed threshold. | 175 // |innerText()|, the search depth and breadth are limited to a fixed threshold. |
| 171 // Whitespace is trimmed from text accumulated at descendant nodes. | 176 // Whitespace is trimmed from text accumulated at descendant nodes. |
| 172 string16 FindChildText(const WebElement& element) { | 177 string16 FindChildText(const WebNode& node) { |
| 173 WebNode child = element.firstChild(); | 178 if (node.isTextNode()) |
| 179 return node.nodeValue(); | |
| 180 | |
| 181 WebNode child = node.firstChild(); | |
| 174 | 182 |
| 175 const int kChildSearchDepth = 10; | 183 const int kChildSearchDepth = 10; |
| 176 string16 element_text = FindChildTextInner(child, kChildSearchDepth); | 184 string16 node_text = FindChildTextInner(child, kChildSearchDepth); |
| 177 TrimWhitespace(element_text, TRIM_ALL, &element_text); | 185 TrimWhitespace(node_text, TRIM_ALL, &node_text); |
| 178 return element_text; | 186 return node_text; |
| 179 } | 187 } |
| 180 | 188 |
| 181 // Helper for |InferLabelForElement()| that infers a label, if possible, from | 189 // Helper for |InferLabelForElement()| that infers a label, if possible, from |
| 182 // a previous sibling of |element|. | 190 // a previous sibling of |element|, |
| 191 // e.g. Some Text <input ...> | |
| 192 // or Some <span>Text</span> <input ...> | |
| 193 // or <p>Some Text</p><input ...> | |
| 194 // or <label>Some Text</label> <input ...> | |
| 195 // or Some Text <img><input ...> | |
| 196 // or <b>Some Text</b><br/> <input ...>. | |
| 183 string16 InferLabelFromPrevious(const WebFormControlElement& element) { | 197 string16 InferLabelFromPrevious(const WebFormControlElement& element) { |
| 184 string16 inferred_label; | 198 string16 inferred_label; |
| 185 WebNode previous = element.previousSibling(); | 199 WebNode previous = element; |
| 186 if (previous.isNull()) | 200 while (true) { |
| 187 return string16(); | 201 previous = previous.previousSibling(); |
| 202 if (previous.isNull()) | |
| 203 break; | |
| 188 | 204 |
| 189 // Check for text immediately before the |element|. | 205 WebNode::NodeType node_type = previous.nodeType(); |
| 190 if (previous.isTextNode()) { | 206 if (node_type != WebNode::TextNode && |
| 191 inferred_label = previous.nodeValue(); | 207 node_type != WebNode::ElementNode && |
| 192 TrimWhitespace(inferred_label, TRIM_ALL, &inferred_label); | 208 node_type != WebNode::CommentNode) |
| 193 } | 209 break; |
| 194 | 210 |
| 195 // If we didn't find text, check for an immediately preceding text container, | 211 // Coalesce any text contained in multiple consecutive plain text nodes or |
| 196 // e.g. <p>Some Text</p><input ...> | 212 // on of a few HTML elements that are essentially equivalent to text nodes. |
| 197 // Note the lack of whitespace between <p> and <input> elements. | 213 if (previous.isTextNode() || |
| 198 if (inferred_label.empty() && previous.isElementNode()) { | 214 HasTagName(previous, "b") || HasTagName(previous, "strong") || |
| 199 WebElement previous_element = previous.to<WebElement>(); | 215 HasTagName(previous, "span") || HasTagName(previous, "font")) { |
| 200 if (IsTextContainerElement(previous_element)) | 216 string16 value = FindChildText(previous); |
| 201 inferred_label = FindChildText(previous_element); | 217 // A text node's value will be empty if it is for a line break. |
| 202 } | 218 bool add_space = previous.isTextNode() && value.empty(); |
| 219 inferred_label = | |
| 220 CombineAndCollapseWhitespace(value, inferred_label, add_space); | |
| 221 } else if (previous.isElementNode()) { | |
|
dhollowa
2011/08/01 22:16:54
Instead of "else if", it would be more readable IM
Ilya Sherman
2011/08/01 23:59:36
Done.
| |
| 222 // All other elements are only allowed if we have not yet found any | |
| 223 // candidate label text. | |
| 224 string16 trimmed_label; | |
| 225 TrimWhitespace(inferred_label, TRIM_ALL, &trimmed_label); | |
| 226 if (trimmed_label.empty()) { | |
| 227 // <img> and <br> tags often appear between the input element and its | |
| 228 // label text, so skip over them. | |
| 229 if (HasTagName(previous, "img") || HasTagName(previous, "br")) | |
| 230 continue; | |
| 203 | 231 |
| 204 // If we didn't find one immediately preceding, check for a text container | 232 // We expect <p> and <label> tags to contain the full label text, so |
| 205 // separated from this node only by whitespace, | 233 // only allow these if we have not yet found any candidate label text. |
| 206 // e.g. <p>Some Text</p> <input ...> | 234 if (HasTagName(previous, "p") || HasTagName(previous, "label")) |
| 207 // Note the whitespace between <p> and <input> elements. | 235 inferred_label = FindChildText(previous); |
| 208 if (inferred_label.empty() && previous.isTextNode()) { | 236 } |
| 209 WebNode sibling = previous.previousSibling(); | 237 |
| 210 if (!sibling.isNull() && sibling.isElementNode()) { | 238 break; |
| 211 WebElement previous_element = sibling.to<WebElement>(); | |
| 212 if (IsTextContainerElement(previous_element)) | |
| 213 inferred_label = FindChildText(previous_element); | |
| 214 } | 239 } |
| 215 } | 240 } |
| 216 | 241 |
| 217 // Look for a text node prior to <img> or <br> tags, | 242 TrimWhitespace(inferred_label, TRIM_ALL, &inferred_label); |
| 218 // e.g. Some Text<img/><input ...> or Some Text<br/><input ...> | |
| 219 while (inferred_label.empty() && !previous.isNull()) { | |
| 220 if (previous.isTextNode()) { | |
| 221 inferred_label = previous.nodeValue(); | |
| 222 TrimWhitespace(inferred_label, TRIM_ALL, &inferred_label); | |
| 223 } else if (previous.isElementNode()) { | |
| 224 WebElement previous_element = previous.to<WebElement>(); | |
| 225 if (IsTextContainerElement(previous_element)) | |
| 226 inferred_label = FindChildText(previous_element); | |
| 227 else if (!HasTagName(previous, "img") && !HasTagName(previous, "br")) | |
| 228 break; | |
| 229 } else { | |
| 230 break; | |
| 231 } | |
| 232 | |
| 233 previous = previous.previousSibling(); | |
| 234 } | |
| 235 | |
| 236 // Look for a label node prior to the <input> tag, | |
| 237 // e.g. <label>Some Text</label><input ...> | |
| 238 while (inferred_label.empty() && !previous.isNull()) { | |
| 239 if (previous.isTextNode()) { | |
| 240 inferred_label = previous.nodeValue(); | |
| 241 TrimWhitespace(inferred_label, TRIM_ALL, &inferred_label); | |
| 242 } else if (HasTagName(previous, "label")) { | |
| 243 inferred_label = FindChildText(previous.to<WebElement>()); | |
| 244 } else { | |
| 245 break; | |
| 246 } | |
| 247 | |
| 248 previous = previous.previousSibling(); | |
| 249 } | |
| 250 | |
| 251 return inferred_label; | 243 return inferred_label; |
| 252 } | 244 } |
| 253 | 245 |
| 254 // Helper for |InferLabelForElement()| that infers a label, if possible, from | 246 // Helper for |InferLabelForElement()| that infers a label, if possible, from |
| 255 // enclosing list item, | 247 // enclosing list item, |
| 256 // e.g. <li>Some Text<input ...><input ...><input ...></tr> | 248 // e.g. <li>Some Text<input ...><input ...><input ...></tr> |
| 257 string16 InferLabelFromListItem(const WebFormControlElement& element) { | 249 string16 InferLabelFromListItem(const WebFormControlElement& element) { |
| 258 WebNode parent = element.parentNode(); | 250 WebNode parent = element.parentNode(); |
| 259 while (!parent.isNull() && parent.isElementNode() && | 251 while (!parent.isNull() && parent.isElementNode() && |
| 260 !parent.to<WebElement>().hasTagName("li")) { | 252 !parent.to<WebElement>().hasTagName("li")) { |
| 261 parent = parent.parentNode(); | 253 parent = parent.parentNode(); |
| 262 } | 254 } |
| 263 | 255 |
| 264 if (!parent.isNull() && HasTagName(parent, "li")) | 256 if (!parent.isNull() && HasTagName(parent, "li")) |
| 265 return FindChildText(parent.to<WebElement>()); | 257 return FindChildText(parent); |
| 266 | 258 |
| 267 return string16(); | 259 return string16(); |
| 268 } | 260 } |
| 269 | 261 |
| 270 // Helper for |InferLabelForElement()| that infers a label, if possible, from | 262 // Helper for |InferLabelForElement()| that infers a label, if possible, from |
| 271 // surrounding table structure, | 263 // surrounding table structure, |
| 272 // e.g. <tr><td>Some Text</td><td><input ...></td></tr> | 264 // e.g. <tr><td>Some Text</td><td><input ...></td></tr> |
| 273 // or <tr><th>Some Text</th><td><input ...></td></tr> | 265 // or <tr><th>Some Text</th><td><input ...></td></tr> |
| 274 // or <tr><td><b>Some Text</b></td><td><b><input ...></b></td></tr> | 266 // or <tr><td><b>Some Text</b></td><td><b><input ...></b></td></tr> |
| 275 // or <tr><th><b>Some Text</b></th><td><b><input ...></b></td></tr> | 267 // or <tr><th><b>Some Text</b></th><td><b><input ...></b></td></tr> |
| 276 string16 InferLabelFromTableColumn(const WebFormControlElement& element) { | 268 string16 InferLabelFromTableColumn(const WebFormControlElement& element) { |
| 277 WebNode parent = element.parentNode(); | 269 WebNode parent = element.parentNode(); |
| 278 while (!parent.isNull() && parent.isElementNode() && | 270 while (!parent.isNull() && parent.isElementNode() && |
| 279 !parent.to<WebElement>().hasTagName("td")) { | 271 !parent.to<WebElement>().hasTagName("td")) { |
| 280 parent = parent.parentNode(); | 272 parent = parent.parentNode(); |
| 281 } | 273 } |
| 282 | 274 |
| 283 if (parent.isNull()) | 275 if (parent.isNull()) |
| 284 return string16(); | 276 return string16(); |
| 285 | 277 |
| 286 // Check all previous siblings, skipping non-element nodes, until we find a | 278 // Check all previous siblings, skipping non-element nodes, until we find a |
| 287 // non-empty text block. | 279 // non-empty text block. |
| 288 string16 inferred_label; | 280 string16 inferred_label; |
| 289 WebNode previous = parent.previousSibling(); | 281 WebNode previous = parent.previousSibling(); |
| 290 while (inferred_label.empty() && !previous.isNull()) { | 282 while (inferred_label.empty() && !previous.isNull()) { |
| 291 if (HasTagName(previous, "td") || HasTagName(previous, "th")) | 283 if (HasTagName(previous, "td") || HasTagName(previous, "th")) |
| 292 inferred_label = FindChildText(previous.to<WebElement>()); | 284 inferred_label = FindChildText(previous); |
| 293 | 285 |
| 294 previous = previous.previousSibling(); | 286 previous = previous.previousSibling(); |
| 295 } | 287 } |
| 296 | 288 |
| 297 return inferred_label; | 289 return inferred_label; |
| 298 } | 290 } |
| 299 | 291 |
| 300 // Helper for |InferLabelForElement()| that infers a label, if possible, from | 292 // Helper for |InferLabelForElement()| that infers a label, if possible, from |
| 301 // surrounding table structure, | 293 // surrounding table structure, |
| 302 // e.g. <tr><td>Some Text</td></tr><tr><td><input ...></td></tr> | 294 // e.g. <tr><td>Some Text</td></tr><tr><td><input ...></td></tr> |
| 303 string16 InferLabelFromTableRow(const WebFormControlElement& element) { | 295 string16 InferLabelFromTableRow(const WebFormControlElement& element) { |
| 304 WebNode parent = element.parentNode(); | 296 WebNode parent = element.parentNode(); |
| 305 while (!parent.isNull() && parent.isElementNode() && | 297 while (!parent.isNull() && parent.isElementNode() && |
| 306 !parent.to<WebElement>().hasTagName("tr")) { | 298 !parent.to<WebElement>().hasTagName("tr")) { |
| 307 parent = parent.parentNode(); | 299 parent = parent.parentNode(); |
| 308 } | 300 } |
| 309 | 301 |
| 310 if (parent.isNull()) | 302 if (parent.isNull()) |
| 311 return string16(); | 303 return string16(); |
| 312 | 304 |
| 313 // Check all previous siblings, skipping non-element nodes, until we find a | 305 // Check all previous siblings, skipping non-element nodes, until we find a |
| 314 // non-empty text block. | 306 // non-empty text block. |
| 315 string16 inferred_label; | 307 string16 inferred_label; |
| 316 WebNode previous = parent.previousSibling(); | 308 WebNode previous = parent.previousSibling(); |
| 317 while (inferred_label.empty() && !previous.isNull()) { | 309 while (inferred_label.empty() && !previous.isNull()) { |
| 318 if (HasTagName(previous, "tr")) | 310 if (HasTagName(previous, "tr")) |
| 319 inferred_label = FindChildText(previous.to<WebElement>()); | 311 inferred_label = FindChildText(previous); |
| 320 | 312 |
| 321 previous = previous.previousSibling(); | 313 previous = previous.previousSibling(); |
| 322 } | 314 } |
| 323 | 315 |
| 324 return inferred_label; | 316 return inferred_label; |
| 325 } | 317 } |
| 326 | 318 |
| 327 // Helper for |InferLabelForElement()| that infers a label, if possible, from | 319 // Helper for |InferLabelForElement()| that infers a label, if possible, from |
| 328 // a surrounding div table, | 320 // a surrounding div table, |
| 329 // e.g. <div>Some Text<span><input ...></span></div> | 321 // e.g. <div>Some Text<span><input ...></span></div> |
| 330 // e.g. <div>Some Text</div><div><input ...></div> | 322 // e.g. <div>Some Text</div><div><input ...></div> |
| 331 string16 InferLabelFromDivTable(const WebFormControlElement& element) { | 323 string16 InferLabelFromDivTable(const WebFormControlElement& element) { |
| 332 WebNode node = element.parentNode(); | 324 WebNode node = element.parentNode(); |
| 333 while (!node.isNull() && node.isElementNode() && | 325 bool looking_for_parent = true; |
| 334 !node.to<WebElement>().hasTagName("div") && | |
| 335 // If the element is in a table, its label most likely is too. | |
| 336 !node.to<WebElement>().hasTagName("table")) { | |
| 337 node = node.parentNode(); | |
| 338 } | |
| 339 | 326 |
| 340 if (node.isNull() || !HasTagName(node, "div")) | 327 // Search the sibling and parent <div>s until we find a candidate label. |
| 341 return string16(); | |
| 342 | |
| 343 // Search the siblings while we cannot find label. | |
| 344 string16 inferred_label; | 328 string16 inferred_label; |
| 345 while (inferred_label.empty() && !node.isNull()) { | 329 while (inferred_label.empty() && !node.isNull()) { |
| 346 if (HasTagName(node, "div")) | 330 if (HasTagName(node, "div")) { |
| 347 inferred_label = FindChildText(node.to<WebElement>()); | 331 looking_for_parent = false; |
| 332 inferred_label = FindChildText(node); | |
| 333 } else if (looking_for_parent && | |
| 334 (HasTagName(node, "table") || HasTagName(node, "fieldset"))) { | |
| 335 // If the element is in a table or fieldset, its label most likely is too. | |
| 336 break; | |
| 337 } | |
| 348 | 338 |
| 349 node = node.previousSibling(); | 339 if (node.previousSibling().isNull()) { |
| 340 // If there are no more siblings, continue walking up the tree. | |
| 341 looking_for_parent = true; | |
| 342 } | |
| 343 | |
| 344 if (looking_for_parent) | |
| 345 node = node.parentNode(); | |
| 346 else | |
| 347 node = node.previousSibling(); | |
| 350 } | 348 } |
| 351 | 349 |
| 352 return inferred_label; | 350 return inferred_label; |
| 353 } | 351 } |
| 354 | 352 |
| 355 // Helper for |InferLabelForElement()| that infers a label, if possible, from | 353 // Helper for |InferLabelForElement()| that infers a label, if possible, from |
| 356 // a surrounding definition list, | 354 // a surrounding definition list, |
| 357 // e.g. <dl><dt>Some Text</dt><dd><input ...></dd></dl> | 355 // e.g. <dl><dt>Some Text</dt><dd><input ...></dd></dl> |
| 358 // e.g. <dl><dt><b>Some Text</b></dt><dd><b><input ...></b></dd></dl> | 356 // e.g. <dl><dt><b>Some Text</b></dt><dd><b><input ...></b></dd></dl> |
| 359 string16 InferLabelFromDefinitionList(const WebFormControlElement& element) { | 357 string16 InferLabelFromDefinitionList(const WebFormControlElement& element) { |
| 360 WebNode parent = element.parentNode(); | 358 WebNode parent = element.parentNode(); |
| 361 while (!parent.isNull() && parent.isElementNode() && | 359 while (!parent.isNull() && parent.isElementNode() && |
| 362 !parent.to<WebElement>().hasTagName("dd")) | 360 !parent.to<WebElement>().hasTagName("dd")) |
| 363 parent = parent.parentNode(); | 361 parent = parent.parentNode(); |
| 364 | 362 |
| 365 if (parent.isNull() || !HasTagName(parent, "dd")) | 363 if (parent.isNull() || !HasTagName(parent, "dd")) |
| 366 return string16(); | 364 return string16(); |
| 367 | 365 |
| 368 // Skip by any intervening text nodes. | 366 // Skip by any intervening text nodes. |
| 369 WebNode previous = parent.previousSibling(); | 367 WebNode previous = parent.previousSibling(); |
| 370 while (!previous.isNull() && previous.isTextNode()) | 368 while (!previous.isNull() && previous.isTextNode()) |
| 371 previous = previous.previousSibling(); | 369 previous = previous.previousSibling(); |
| 372 | 370 |
| 373 if (previous.isNull() || !HasTagName(previous, "dt")) | 371 if (previous.isNull() || !HasTagName(previous, "dt")) |
| 374 return string16(); | 372 return string16(); |
| 375 | 373 |
| 376 return FindChildText(previous.to<WebElement>()); | 374 return FindChildText(previous); |
| 377 } | 375 } |
| 378 | 376 |
| 379 // Infers corresponding label for |element| from surrounding context in the DOM, | 377 // Infers corresponding label for |element| from surrounding context in the DOM, |
| 380 // e.g. the contents of the preceding <p> tag or text element. | 378 // e.g. the contents of the preceding <p> tag or text element. |
| 381 string16 InferLabelForElement(const WebFormControlElement& element) { | 379 string16 InferLabelForElement(const WebFormControlElement& element) { |
| 382 string16 inferred_label = InferLabelFromPrevious(element); | 380 string16 inferred_label = InferLabelFromPrevious(element); |
| 383 if (!inferred_label.empty()) | 381 if (!inferred_label.empty()) |
| 384 return inferred_label; | 382 return inferred_label; |
| 385 | 383 |
| 386 // If we didn't find a label, check for list item case. | 384 // If we didn't find a label, check for list item case. |
| (...skipping 251 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 638 | 636 |
| 639 // static | 637 // static |
| 640 string16 FormManager::LabelForElement(const WebFormControlElement& element) { | 638 string16 FormManager::LabelForElement(const WebFormControlElement& element) { |
| 641 // Don't scrape labels for elements we can't possibly autofill anyway. | 639 // Don't scrape labels for elements we can't possibly autofill anyway. |
| 642 if (!IsAutofillableElement(element)) | 640 if (!IsAutofillableElement(element)) |
| 643 return string16(); | 641 return string16(); |
| 644 | 642 |
| 645 WebNodeList labels = element.document().getElementsByTagName("label"); | 643 WebNodeList labels = element.document().getElementsByTagName("label"); |
| 646 for (unsigned i = 0; i < labels.length(); ++i) { | 644 for (unsigned i = 0; i < labels.length(); ++i) { |
| 647 WebLabelElement label = labels.item(i).to<WebLabelElement>(); | 645 WebLabelElement label = labels.item(i).to<WebLabelElement>(); |
| 648 DCHECK(label.hasTagName("label")); | 646 WebElement corresponding_control = label.correspondingControl(); |
| 649 if (label.correspondingControl() == element) | 647 if (corresponding_control == element || |
| 648 (corresponding_control.isNull() && | |
| 649 label.getAttribute("for") == element.nameForAutofill())) { | |
| 650 return FindChildText(label); | 650 return FindChildText(label); |
| 651 } | |
| 651 } | 652 } |
| 652 | 653 |
| 653 // Infer the label from context if not found in label element. | 654 // Infer the label from context if not found in label element. |
| 654 return InferLabelForElement(element); | 655 return InferLabelForElement(element); |
| 655 } | 656 } |
| 656 | 657 |
| 657 // static | 658 // static |
| 658 bool FormManager::WebFormElementToFormData(const WebFormElement& element, | 659 bool FormManager::WebFormElementToFormData(const WebFormElement& element, |
| 659 RequirementsMask requirements, | 660 RequirementsMask requirements, |
| 660 ExtractMask extract_mask, | 661 ExtractMask extract_mask, |
| (...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 725 // Loop through the label elements inside the form element. For each label | 726 // Loop through the label elements inside the form element. For each label |
| 726 // element, get the corresponding form control element, use the form control | 727 // element, get the corresponding form control element, use the form control |
| 727 // element's name as a key into the <name, FormField> map to find the | 728 // element's name as a key into the <name, FormField> map to find the |
| 728 // previously created FormField and set the FormField's label to the | 729 // previously created FormField and set the FormField's label to the |
| 729 // label.firstChild().nodeValue() of the label element. | 730 // label.firstChild().nodeValue() of the label element. |
| 730 WebNodeList labels = element.getElementsByTagName("label"); | 731 WebNodeList labels = element.getElementsByTagName("label"); |
| 731 for (unsigned i = 0; i < labels.length(); ++i) { | 732 for (unsigned i = 0; i < labels.length(); ++i) { |
| 732 WebLabelElement label = labels.item(i).to<WebLabelElement>(); | 733 WebLabelElement label = labels.item(i).to<WebLabelElement>(); |
| 733 WebFormControlElement field_element = | 734 WebFormControlElement field_element = |
| 734 label.correspondingControl().to<WebFormControlElement>(); | 735 label.correspondingControl().to<WebFormControlElement>(); |
| 735 if (field_element.isNull() || | 736 |
| 737 string16 element_name; | |
| 738 if (field_element.isNull()) { | |
| 739 // Sometimes site authors will incorrectly specify the corresponding | |
| 740 // field element's name rather than its id, so we compensate here. | |
| 741 element_name = label.getAttribute("for"); | |
| 742 } else if ( | |
| 736 !field_element.isFormControlElement() || | 743 !field_element.isFormControlElement() || |
| 737 field_element.formControlType() == WebString::fromUTF8("hidden")) | 744 field_element.formControlType() == WebString::fromUTF8("hidden")) { |
| 738 continue; | 745 continue; |
| 746 } else { | |
| 747 element_name = field_element.nameForAutofill(); | |
| 748 } | |
| 739 | 749 |
| 740 std::map<string16, FormField*>::iterator iter = | 750 std::map<string16, FormField*>::iterator iter = name_map.find(element_name); |
| 741 name_map.find(field_element.nameForAutofill()); | |
| 742 // Concatenate labels because some sites might have multiple label | 751 // Concatenate labels because some sites might have multiple label |
| 743 // candidates. | 752 // candidates. |
| 744 if (iter != name_map.end()) | 753 if (iter != name_map.end()) |
| 745 iter->second->label += FindChildText(label); | 754 iter->second->label += FindChildText(label); |
| 746 } | 755 } |
| 747 | 756 |
| 748 // Loop through the form control elements, extracting the label text from | 757 // Loop through the form control elements, extracting the label text from |
| 749 // the DOM. We use the |fields_extracted| vector to make sure we assign the | 758 // the DOM. We use the |fields_extracted| vector to make sure we assign the |
| 750 // extracted label to the correct field, as it's possible |form_fields| will | 759 // extracted label to the correct field, as it's possible |form_fields| will |
| 751 // not contain all of the elements in |control_elements|. | 760 // not contain all of the elements in |control_elements|. |
| (...skipping 317 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1069 if (element_name == form.name && action == form.action) { | 1078 if (element_name == form.name && action == form.action) { |
| 1070 *form_element = *form_iter; | 1079 *form_element = *form_iter; |
| 1071 return true; | 1080 return true; |
| 1072 } | 1081 } |
| 1073 } | 1082 } |
| 1074 | 1083 |
| 1075 return false; | 1084 return false; |
| 1076 } | 1085 } |
| 1077 | 1086 |
| 1078 } // namespace autofill | 1087 } // namespace autofill |
| OLD | NEW |