Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(341)

Side by Side Diff: chrome/renderer/autofill/form_manager.cc

Issue 7531023: Improve Autofill heuristics when detecting labels from previous elements. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Address review comments Created 9 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/renderer/autofill/form_manager.h" 5 #include "chrome/renderer/autofill/form_manager.h"
6 6
7 #include "base/logging.h" 7 #include "base/logging.h"
8 #include "base/memory/scoped_vector.h" 8 #include "base/memory/scoped_vector.h"
9 #include "base/stl_util.h" 9 #include "base/stl_util.h"
10 #include "base/string_util.h" 10 #include "base/string_util.h"
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after
71 if (!element) 71 if (!element)
72 return false; 72 return false;
73 73
74 return element->isTextField() && !element->isPasswordField(); 74 return element->isTextField() && !element->isPasswordField();
75 } 75 }
76 76
77 bool IsSelectElement(const WebFormControlElement& element) { 77 bool IsSelectElement(const WebFormControlElement& element) {
78 return element.formControlType() == ASCIIToUTF16("select-one"); 78 return element.formControlType() == ASCIIToUTF16("select-one");
79 } 79 }
80 80
81 bool IsTextContainerElement(const WebElement& element) {
82 return
83 element.hasTagName("p") ||
84 element.hasTagName("b") ||
85 element.hasTagName("span") ||
86 element.hasTagName("font");
87 }
88
89 bool IsOptionElement(const WebElement& element) { 81 bool IsOptionElement(const WebElement& element) {
90 return element.hasTagName("option"); 82 return element.hasTagName("option");
91 } 83 }
92 84
93 bool IsScriptElement(const WebElement& element) { 85 bool IsScriptElement(const WebElement& element) {
94 return element.hasTagName("script"); 86 return element.hasTagName("script");
95 } 87 }
96 88
97 bool IsNoScriptElement(const WebElement& element) { 89 bool IsNoScriptElement(const WebElement& element) {
98 return element.hasTagName("noscript"); 90 return element.hasTagName("noscript");
99 } 91 }
100 92
101 bool HasTagName(const WebNode& node, const WebKit::WebString& tag) { 93 bool HasTagName(const WebNode& node, const WebKit::WebString& tag) {
102 return node.isElementNode() && node.toConst<WebElement>().hasTagName(tag); 94 return node.isElementNode() && node.toConst<WebElement>().hasTagName(tag);
103 } 95 }
104 96
105 bool IsAutofillableElement(const WebFormControlElement& element) { 97 bool IsAutofillableElement(const WebFormControlElement& element) {
106 const WebInputElement* input_element = toWebInputElement(&element); 98 const WebInputElement* input_element = toWebInputElement(&element);
107 return IsTextInput(input_element) || IsSelectElement(element); 99 return IsTextInput(input_element) || IsSelectElement(element);
108 } 100 }
109 101
102 // Appends |suffix| to |prefix| so that any intermediary whitespace is collapsed
103 // to a single space. If |force_whitespace| is true, then the resulting string
104 // is guaranteed to have a space between |prefix| and |suffix|. Otherwise, the
105 // result includes a space only if |prefix| has trailing whitespace or |suffix|
106 // has leading whitespace.
107 // A few examples:
108 // * CombineAndCollapseWhitespace("foo", "bar", false) -> "foobar"
109 // * CombineAndCollapseWhitespace("foo", "bar", true) -> "foo bar"
110 // * CombineAndCollapseWhitespace("foo ", "bar", false) -> "foo bar"
111 // * CombineAndCollapseWhitespace("foo", " bar", false) -> "foo bar"
112 // * CombineAndCollapseWhitespace("foo", " bar", true) -> "foo bar"
113 // * CombineAndCollapseWhitespace("foo ", " bar", false) -> "foo bar"
114 // * CombineAndCollapseWhitespace(" foo", "bar ", false) -> " foobar "
115 // * CombineAndCollapseWhitespace(" foo", "bar ", true) -> " foo bar "
116 const string16 CombineAndCollapseWhitespace(const string16& prefix,
117 const string16& suffix,
118 bool force_whitespace) {
119 string16 prefix_trimmed;
120 TrimPositions prefix_trailing_whitespace =
121 TrimWhitespace(prefix, TRIM_TRAILING, &prefix_trimmed);
122
123 // Recursively compute the children's text.
124 string16 suffix_trimmed;
125 TrimPositions suffix_leading_whitespace =
126 TrimWhitespace(suffix, TRIM_LEADING, &suffix_trimmed);
127
128 if (prefix_trailing_whitespace || suffix_leading_whitespace ||
129 force_whitespace) {
130 return prefix_trimmed + ASCIIToUTF16(" ") + suffix_trimmed;
131 } else {
132 return prefix_trimmed + suffix_trimmed;
133 }
134 }
135
110 // This is a helper function for the FindChildText() function (see below). 136 // This is a helper function for the FindChildText() function (see below).
111 // Search depth is limited with the |depth| parameter. 137 // Search depth is limited with the |depth| parameter.
112 string16 FindChildTextInner(const WebNode& node, int depth) { 138 string16 FindChildTextInner(const WebNode& node, int depth) {
113 if (depth <= 0 || node.isNull()) 139 if (depth <= 0 || node.isNull())
114 return string16(); 140 return string16();
115 141
116 // Skip over comments. 142 // Skip over comments.
117 if (node.nodeType() == WebNode::CommentNode) 143 if (node.nodeType() == WebNode::CommentNode)
118 return FindChildTextInner(node.nextSibling(), depth - 1); 144 return FindChildTextInner(node.nextSibling(), depth - 1);
119 145
120 if (node.nodeType() != WebNode::ElementNode && 146 if (node.nodeType() != WebNode::ElementNode &&
121 node.nodeType() != WebNode::TextNode) 147 node.nodeType() != WebNode::TextNode)
122 return string16(); 148 return string16();
123 149
124 // Ignore elements known not to contain inferable labels. 150 // Ignore elements known not to contain inferable labels.
125 if (node.isElementNode()) { 151 if (node.isElementNode()) {
126 const WebElement element = node.toConst<WebElement>(); 152 const WebElement element = node.toConst<WebElement>();
127 if (IsOptionElement(element) || 153 if (IsOptionElement(element) ||
128 IsScriptElement(element) || 154 IsScriptElement(element) ||
129 IsNoScriptElement(element)) { 155 IsNoScriptElement(element) ||
156 (element.isFormControlElement() &&
157 IsAutofillableElement(element.toConst<WebFormControlElement>()))) {
130 return string16(); 158 return string16();
131 } 159 }
132 } 160 }
133 161
134 // Extract the text exactly at this node. 162 // Extract the text exactly at this node.
135 string16 node_text = node.nodeValue(); 163 string16 node_text = node.nodeValue();
136 TrimPositions node_trailing_whitespace =
137 TrimWhitespace(node_text, TRIM_TRAILING, &node_text);
138 164
139 // Recursively compute the children's text. 165 // Recursively compute the children's text.
140 // Preserve inter-element whitespace separation. 166 // Preserve inter-element whitespace separation.
141 string16 child_text = FindChildTextInner(node.firstChild(), depth - 1); 167 string16 child_text = FindChildTextInner(node.firstChild(), depth - 1);
142 TrimPositions child_leading_whitespace = 168 bool add_space = node.nodeType() == WebNode::TextNode && node_text.empty();
143 TrimWhitespace(child_text, TRIM_LEADING, &child_text); 169 node_text = CombineAndCollapseWhitespace(node_text, child_text, add_space);
144 if (node_trailing_whitespace || child_leading_whitespace ||
145 (node.nodeType() == WebNode::TextNode && node_text.empty())) {
146 node_text += ASCIIToUTF16(" ");
147 }
148 node_text += child_text;
149 node_trailing_whitespace =
150 TrimWhitespace(node_text, TRIM_TRAILING, &node_text);
151 170
152 // Recursively compute the siblings' text. 171 // Recursively compute the siblings' text.
153 // Again, preserve inter-element whitespace separation. 172 // Again, preserve inter-element whitespace separation.
154 string16 sibling_text = FindChildTextInner(node.nextSibling(), depth - 1); 173 string16 sibling_text = FindChildTextInner(node.nextSibling(), depth - 1);
155 TrimPositions sibling_leading_whitespace = 174 add_space = node.nodeType() == WebNode::TextNode && node_text.empty();
156 TrimWhitespace(sibling_text, TRIM_LEADING, &sibling_text); 175 node_text = CombineAndCollapseWhitespace(node_text, sibling_text, add_space);
157 if (node_trailing_whitespace || sibling_leading_whitespace ||
158 (node.nodeType() == WebNode::TextNode && node_text.empty())) {
159 node_text += ASCIIToUTF16(" ");
160 }
161 node_text += sibling_text;
162 176
163 return node_text; 177 return node_text;
164 } 178 }
165 179
166 // Returns the aggregated values of the descendants of |element| that are 180 // Returns the aggregated values of the descendants of |element| that are
167 // non-empty text nodes. This is a faster alternative to |innerText()| for 181 // non-empty text nodes. This is a faster alternative to |innerText()| for
168 // performance critical operations. It does a full depth-first search so can be 182 // performance critical operations. It does a full depth-first search so can be
169 // used when the structure is not directly known. However, unlike with 183 // used when the structure is not directly known. However, unlike with
170 // |innerText()|, the search depth and breadth are limited to a fixed threshold. 184 // |innerText()|, the search depth and breadth are limited to a fixed threshold.
171 // Whitespace is trimmed from text accumulated at descendant nodes. 185 // Whitespace is trimmed from text accumulated at descendant nodes.
172 string16 FindChildText(const WebElement& element) { 186 string16 FindChildText(const WebNode& node) {
173 WebNode child = element.firstChild(); 187 if (node.isTextNode())
188 return node.nodeValue();
189
190 WebNode child = node.firstChild();
174 191
175 const int kChildSearchDepth = 10; 192 const int kChildSearchDepth = 10;
176 string16 element_text = FindChildTextInner(child, kChildSearchDepth); 193 string16 node_text = FindChildTextInner(child, kChildSearchDepth);
177 TrimWhitespace(element_text, TRIM_ALL, &element_text); 194 TrimWhitespace(node_text, TRIM_ALL, &node_text);
178 return element_text; 195 return node_text;
179 } 196 }
180 197
181 // Helper for |InferLabelForElement()| that infers a label, if possible, from 198 // Helper for |InferLabelForElement()| that infers a label, if possible, from
182 // a previous sibling of |element|. 199 // a previous sibling of |element|,
200 // e.g. Some Text <input ...>
201 // or Some <span>Text</span> <input ...>
202 // or <p>Some Text</p><input ...>
203 // or <label>Some Text</label> <input ...>
204 // or Some Text <img><input ...>
205 // or <b>Some Text</b><br/> <input ...>.
183 string16 InferLabelFromPrevious(const WebFormControlElement& element) { 206 string16 InferLabelFromPrevious(const WebFormControlElement& element) {
184 string16 inferred_label; 207 string16 inferred_label;
185 WebNode previous = element.previousSibling(); 208 WebNode previous = element;
186 if (previous.isNull()) 209 while (true) {
187 return string16(); 210 previous = previous.previousSibling();
211 if (previous.isNull())
212 break;
188 213
189 // Check for text immediately before the |element|. 214 // Skip over comments.
190 if (previous.isTextNode()) { 215 WebNode::NodeType node_type = previous.nodeType();
191 inferred_label = previous.nodeValue(); 216 if (node_type == WebNode::CommentNode)
192 TrimWhitespace(inferred_label, TRIM_ALL, &inferred_label); 217 continue;
218
219 // Otherwise, only consider normal HTML elements and their contents.
220 if (node_type != WebNode::TextNode &&
221 node_type != WebNode::ElementNode)
222 break;
223
224 // A label might be split across multiple "lightweight" nodes.
225 // Coalesce any text contained in multiple consecutive
226 // (a) plain text nodes or
227 // (b) inline HTML elements that are essentially equivalent to text nodes.
228 if (previous.isTextNode() ||
229 HasTagName(previous, "b") || HasTagName(previous, "strong") ||
230 HasTagName(previous, "span") || HasTagName(previous, "font")) {
231 string16 value = FindChildText(previous);
232 // A text node's value will be empty if it is for a line break.
233 bool add_space = previous.isTextNode() && value.empty();
234 inferred_label =
235 CombineAndCollapseWhitespace(value, inferred_label, add_space);
236 continue;
237 }
238
239 // If we have identified a partial label and have reached a non-lightweight
240 // element, consider the label to be complete.
241 string16 trimmed_label;
242 TrimWhitespace(inferred_label, TRIM_ALL, &trimmed_label);
243 if (!trimmed_label.empty())
244 break;
245
246 // <img> and <br> tags often appear between the input element and its
247 // label text, so skip over them.
248 if (HasTagName(previous, "img") || HasTagName(previous, "br"))
249 continue;
250
251 // We only expect <p> and <label> tags to contain the full label text.
252 if (HasTagName(previous, "p") || HasTagName(previous, "label"))
253 inferred_label = FindChildText(previous);
254
255 break;
193 } 256 }
194 257
195 // If we didn't find text, check for an immediately preceding text container, 258 TrimWhitespace(inferred_label, TRIM_ALL, &inferred_label);
196 // e.g. <p>Some Text</p><input ...>
197 // Note the lack of whitespace between <p> and <input> elements.
198 if (inferred_label.empty() && previous.isElementNode()) {
199 WebElement previous_element = previous.to<WebElement>();
200 if (IsTextContainerElement(previous_element))
201 inferred_label = FindChildText(previous_element);
202 }
203
204 // If we didn't find one immediately preceding, check for a text container
205 // separated from this node only by whitespace,
206 // e.g. <p>Some Text</p> <input ...>
207 // Note the whitespace between <p> and <input> elements.
208 if (inferred_label.empty() && previous.isTextNode()) {
209 WebNode sibling = previous.previousSibling();
210 if (!sibling.isNull() && sibling.isElementNode()) {
211 WebElement previous_element = sibling.to<WebElement>();
212 if (IsTextContainerElement(previous_element))
213 inferred_label = FindChildText(previous_element);
214 }
215 }
216
217 // Look for a text node prior to <img> or <br> tags,
218 // e.g. Some Text<img/><input ...> or Some Text<br/><input ...>
219 while (inferred_label.empty() && !previous.isNull()) {
220 if (previous.isTextNode()) {
221 inferred_label = previous.nodeValue();
222 TrimWhitespace(inferred_label, TRIM_ALL, &inferred_label);
223 } else if (previous.isElementNode()) {
224 WebElement previous_element = previous.to<WebElement>();
225 if (IsTextContainerElement(previous_element))
226 inferred_label = FindChildText(previous_element);
227 else if (!HasTagName(previous, "img") && !HasTagName(previous, "br"))
228 break;
229 } else {
230 break;
231 }
232
233 previous = previous.previousSibling();
234 }
235
236 // Look for a label node prior to the <input> tag,
237 // e.g. <label>Some Text</label><input ...>
238 while (inferred_label.empty() && !previous.isNull()) {
239 if (previous.isTextNode()) {
240 inferred_label = previous.nodeValue();
241 TrimWhitespace(inferred_label, TRIM_ALL, &inferred_label);
242 } else if (HasTagName(previous, "label")) {
243 inferred_label = FindChildText(previous.to<WebElement>());
244 } else {
245 break;
246 }
247
248 previous = previous.previousSibling();
249 }
250
251 return inferred_label; 259 return inferred_label;
252 } 260 }
253 261
254 // Helper for |InferLabelForElement()| that infers a label, if possible, from 262 // Helper for |InferLabelForElement()| that infers a label, if possible, from
255 // enclosing list item, 263 // enclosing list item,
256 // e.g. <li>Some Text<input ...><input ...><input ...></tr> 264 // e.g. <li>Some Text<input ...><input ...><input ...></tr>
257 string16 InferLabelFromListItem(const WebFormControlElement& element) { 265 string16 InferLabelFromListItem(const WebFormControlElement& element) {
258 WebNode parent = element.parentNode(); 266 WebNode parent = element.parentNode();
259 while (!parent.isNull() && parent.isElementNode() && 267 while (!parent.isNull() && parent.isElementNode() &&
260 !parent.to<WebElement>().hasTagName("li")) { 268 !parent.to<WebElement>().hasTagName("li")) {
261 parent = parent.parentNode(); 269 parent = parent.parentNode();
262 } 270 }
263 271
264 if (!parent.isNull() && HasTagName(parent, "li")) 272 if (!parent.isNull() && HasTagName(parent, "li"))
265 return FindChildText(parent.to<WebElement>()); 273 return FindChildText(parent);
266 274
267 return string16(); 275 return string16();
268 } 276 }
269 277
270 // Helper for |InferLabelForElement()| that infers a label, if possible, from 278 // Helper for |InferLabelForElement()| that infers a label, if possible, from
271 // surrounding table structure, 279 // surrounding table structure,
272 // e.g. <tr><td>Some Text</td><td><input ...></td></tr> 280 // e.g. <tr><td>Some Text</td><td><input ...></td></tr>
273 // or <tr><th>Some Text</th><td><input ...></td></tr> 281 // or <tr><th>Some Text</th><td><input ...></td></tr>
274 // or <tr><td><b>Some Text</b></td><td><b><input ...></b></td></tr> 282 // or <tr><td><b>Some Text</b></td><td><b><input ...></b></td></tr>
275 // or <tr><th><b>Some Text</b></th><td><b><input ...></b></td></tr> 283 // or <tr><th><b>Some Text</b></th><td><b><input ...></b></td></tr>
276 string16 InferLabelFromTableColumn(const WebFormControlElement& element) { 284 string16 InferLabelFromTableColumn(const WebFormControlElement& element) {
277 WebNode parent = element.parentNode(); 285 WebNode parent = element.parentNode();
278 while (!parent.isNull() && parent.isElementNode() && 286 while (!parent.isNull() && parent.isElementNode() &&
279 !parent.to<WebElement>().hasTagName("td")) { 287 !parent.to<WebElement>().hasTagName("td")) {
280 parent = parent.parentNode(); 288 parent = parent.parentNode();
281 } 289 }
282 290
283 if (parent.isNull()) 291 if (parent.isNull())
284 return string16(); 292 return string16();
285 293
286 // Check all previous siblings, skipping non-element nodes, until we find a 294 // Check all previous siblings, skipping non-element nodes, until we find a
287 // non-empty text block. 295 // non-empty text block.
288 string16 inferred_label; 296 string16 inferred_label;
289 WebNode previous = parent.previousSibling(); 297 WebNode previous = parent.previousSibling();
290 while (inferred_label.empty() && !previous.isNull()) { 298 while (inferred_label.empty() && !previous.isNull()) {
291 if (HasTagName(previous, "td") || HasTagName(previous, "th")) 299 if (HasTagName(previous, "td") || HasTagName(previous, "th"))
292 inferred_label = FindChildText(previous.to<WebElement>()); 300 inferred_label = FindChildText(previous);
293 301
294 previous = previous.previousSibling(); 302 previous = previous.previousSibling();
295 } 303 }
296 304
297 return inferred_label; 305 return inferred_label;
298 } 306 }
299 307
300 // Helper for |InferLabelForElement()| that infers a label, if possible, from 308 // Helper for |InferLabelForElement()| that infers a label, if possible, from
301 // surrounding table structure, 309 // surrounding table structure,
302 // e.g. <tr><td>Some Text</td></tr><tr><td><input ...></td></tr> 310 // e.g. <tr><td>Some Text</td></tr><tr><td><input ...></td></tr>
303 string16 InferLabelFromTableRow(const WebFormControlElement& element) { 311 string16 InferLabelFromTableRow(const WebFormControlElement& element) {
304 WebNode parent = element.parentNode(); 312 WebNode parent = element.parentNode();
305 while (!parent.isNull() && parent.isElementNode() && 313 while (!parent.isNull() && parent.isElementNode() &&
306 !parent.to<WebElement>().hasTagName("tr")) { 314 !parent.to<WebElement>().hasTagName("tr")) {
307 parent = parent.parentNode(); 315 parent = parent.parentNode();
308 } 316 }
309 317
310 if (parent.isNull()) 318 if (parent.isNull())
311 return string16(); 319 return string16();
312 320
313 // Check all previous siblings, skipping non-element nodes, until we find a 321 // Check all previous siblings, skipping non-element nodes, until we find a
314 // non-empty text block. 322 // non-empty text block.
315 string16 inferred_label; 323 string16 inferred_label;
316 WebNode previous = parent.previousSibling(); 324 WebNode previous = parent.previousSibling();
317 while (inferred_label.empty() && !previous.isNull()) { 325 while (inferred_label.empty() && !previous.isNull()) {
318 if (HasTagName(previous, "tr")) 326 if (HasTagName(previous, "tr"))
319 inferred_label = FindChildText(previous.to<WebElement>()); 327 inferred_label = FindChildText(previous);
320 328
321 previous = previous.previousSibling(); 329 previous = previous.previousSibling();
322 } 330 }
323 331
324 return inferred_label; 332 return inferred_label;
325 } 333 }
326 334
327 // Helper for |InferLabelForElement()| that infers a label, if possible, from 335 // Helper for |InferLabelForElement()| that infers a label, if possible, from
328 // a surrounding div table, 336 // a surrounding div table,
329 // e.g. <div>Some Text<span><input ...></span></div> 337 // e.g. <div>Some Text<span><input ...></span></div>
330 // e.g. <div>Some Text</div><div><input ...></div> 338 // e.g. <div>Some Text</div><div><input ...></div>
331 string16 InferLabelFromDivTable(const WebFormControlElement& element) { 339 string16 InferLabelFromDivTable(const WebFormControlElement& element) {
332 WebNode node = element.parentNode(); 340 WebNode node = element.parentNode();
333 while (!node.isNull() && node.isElementNode() && 341 bool looking_for_parent = true;
334 !node.to<WebElement>().hasTagName("div") &&
335 // If the element is in a table, its label most likely is too.
336 !node.to<WebElement>().hasTagName("table")) {
337 node = node.parentNode();
338 }
339 342
340 if (node.isNull() || !HasTagName(node, "div")) 343 // Search the sibling and parent <div>s until we find a candidate label.
341 return string16();
342
343 // Search the siblings while we cannot find label.
344 string16 inferred_label; 344 string16 inferred_label;
345 while (inferred_label.empty() && !node.isNull()) { 345 while (inferred_label.empty() && !node.isNull()) {
346 if (HasTagName(node, "div")) 346 if (HasTagName(node, "div")) {
347 inferred_label = FindChildText(node.to<WebElement>()); 347 looking_for_parent = false;
348 inferred_label = FindChildText(node);
349 } else if (looking_for_parent &&
350 (HasTagName(node, "table") || HasTagName(node, "fieldset"))) {
351 // If the element is in a table or fieldset, its label most likely is too.
352 break;
353 }
348 354
349 node = node.previousSibling(); 355 if (node.previousSibling().isNull()) {
356 // If there are no more siblings, continue walking up the tree.
357 looking_for_parent = true;
358 }
359
360 if (looking_for_parent)
361 node = node.parentNode();
362 else
363 node = node.previousSibling();
350 } 364 }
351 365
352 return inferred_label; 366 return inferred_label;
353 } 367 }
354 368
355 // Helper for |InferLabelForElement()| that infers a label, if possible, from 369 // Helper for |InferLabelForElement()| that infers a label, if possible, from
356 // a surrounding definition list, 370 // a surrounding definition list,
357 // e.g. <dl><dt>Some Text</dt><dd><input ...></dd></dl> 371 // e.g. <dl><dt>Some Text</dt><dd><input ...></dd></dl>
358 // e.g. <dl><dt><b>Some Text</b></dt><dd><b><input ...></b></dd></dl> 372 // e.g. <dl><dt><b>Some Text</b></dt><dd><b><input ...></b></dd></dl>
359 string16 InferLabelFromDefinitionList(const WebFormControlElement& element) { 373 string16 InferLabelFromDefinitionList(const WebFormControlElement& element) {
360 WebNode parent = element.parentNode(); 374 WebNode parent = element.parentNode();
361 while (!parent.isNull() && parent.isElementNode() && 375 while (!parent.isNull() && parent.isElementNode() &&
362 !parent.to<WebElement>().hasTagName("dd")) 376 !parent.to<WebElement>().hasTagName("dd"))
363 parent = parent.parentNode(); 377 parent = parent.parentNode();
364 378
365 if (parent.isNull() || !HasTagName(parent, "dd")) 379 if (parent.isNull() || !HasTagName(parent, "dd"))
366 return string16(); 380 return string16();
367 381
368 // Skip by any intervening text nodes. 382 // Skip by any intervening text nodes.
369 WebNode previous = parent.previousSibling(); 383 WebNode previous = parent.previousSibling();
370 while (!previous.isNull() && previous.isTextNode()) 384 while (!previous.isNull() && previous.isTextNode())
371 previous = previous.previousSibling(); 385 previous = previous.previousSibling();
372 386
373 if (previous.isNull() || !HasTagName(previous, "dt")) 387 if (previous.isNull() || !HasTagName(previous, "dt"))
374 return string16(); 388 return string16();
375 389
376 return FindChildText(previous.to<WebElement>()); 390 return FindChildText(previous);
377 } 391 }
378 392
379 // Infers corresponding label for |element| from surrounding context in the DOM, 393 // Infers corresponding label for |element| from surrounding context in the DOM,
380 // e.g. the contents of the preceding <p> tag or text element. 394 // e.g. the contents of the preceding <p> tag or text element.
381 string16 InferLabelForElement(const WebFormControlElement& element) { 395 string16 InferLabelForElement(const WebFormControlElement& element) {
382 string16 inferred_label = InferLabelFromPrevious(element); 396 string16 inferred_label = InferLabelFromPrevious(element);
383 if (!inferred_label.empty()) 397 if (!inferred_label.empty())
384 return inferred_label; 398 return inferred_label;
385 399
386 // If we didn't find a label, check for list item case. 400 // If we didn't find a label, check for list item case.
(...skipping 251 matching lines...) Expand 10 before | Expand all | Expand 10 after
638 652
639 // static 653 // static
640 string16 FormManager::LabelForElement(const WebFormControlElement& element) { 654 string16 FormManager::LabelForElement(const WebFormControlElement& element) {
641 // Don't scrape labels for elements we can't possibly autofill anyway. 655 // Don't scrape labels for elements we can't possibly autofill anyway.
642 if (!IsAutofillableElement(element)) 656 if (!IsAutofillableElement(element))
643 return string16(); 657 return string16();
644 658
645 WebNodeList labels = element.document().getElementsByTagName("label"); 659 WebNodeList labels = element.document().getElementsByTagName("label");
646 for (unsigned i = 0; i < labels.length(); ++i) { 660 for (unsigned i = 0; i < labels.length(); ++i) {
647 WebLabelElement label = labels.item(i).to<WebLabelElement>(); 661 WebLabelElement label = labels.item(i).to<WebLabelElement>();
648 DCHECK(label.hasTagName("label")); 662 WebElement corresponding_control = label.correspondingControl();
649 if (label.correspondingControl() == element) 663 if (corresponding_control == element ||
664 (corresponding_control.isNull() &&
665 label.getAttribute("for") == element.nameForAutofill())) {
650 return FindChildText(label); 666 return FindChildText(label);
667 }
651 } 668 }
652 669
653 // Infer the label from context if not found in label element. 670 // Infer the label from context if not found in label element.
654 return InferLabelForElement(element); 671 return InferLabelForElement(element);
655 } 672 }
656 673
657 // static 674 // static
658 bool FormManager::WebFormElementToFormData(const WebFormElement& element, 675 bool FormManager::WebFormElementToFormData(const WebFormElement& element,
659 RequirementsMask requirements, 676 RequirementsMask requirements,
660 ExtractMask extract_mask, 677 ExtractMask extract_mask,
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after
725 // Loop through the label elements inside the form element. For each label 742 // Loop through the label elements inside the form element. For each label
726 // element, get the corresponding form control element, use the form control 743 // element, get the corresponding form control element, use the form control
727 // element's name as a key into the <name, FormField> map to find the 744 // element's name as a key into the <name, FormField> map to find the
728 // previously created FormField and set the FormField's label to the 745 // previously created FormField and set the FormField's label to the
729 // label.firstChild().nodeValue() of the label element. 746 // label.firstChild().nodeValue() of the label element.
730 WebNodeList labels = element.getElementsByTagName("label"); 747 WebNodeList labels = element.getElementsByTagName("label");
731 for (unsigned i = 0; i < labels.length(); ++i) { 748 for (unsigned i = 0; i < labels.length(); ++i) {
732 WebLabelElement label = labels.item(i).to<WebLabelElement>(); 749 WebLabelElement label = labels.item(i).to<WebLabelElement>();
733 WebFormControlElement field_element = 750 WebFormControlElement field_element =
734 label.correspondingControl().to<WebFormControlElement>(); 751 label.correspondingControl().to<WebFormControlElement>();
735 if (field_element.isNull() || 752
753 string16 element_name;
754 if (field_element.isNull()) {
755 // Sometimes site authors will incorrectly specify the corresponding
756 // field element's name rather than its id, so we compensate here.
757 element_name = label.getAttribute("for");
758 } else if (
736 !field_element.isFormControlElement() || 759 !field_element.isFormControlElement() ||
737 field_element.formControlType() == WebString::fromUTF8("hidden")) 760 field_element.formControlType() == WebString::fromUTF8("hidden")) {
738 continue; 761 continue;
762 } else {
763 element_name = field_element.nameForAutofill();
764 }
739 765
740 std::map<string16, FormField*>::iterator iter = 766 std::map<string16, FormField*>::iterator iter = name_map.find(element_name);
741 name_map.find(field_element.nameForAutofill());
742 // Concatenate labels because some sites might have multiple label 767 // Concatenate labels because some sites might have multiple label
743 // candidates. 768 // candidates.
744 if (iter != name_map.end()) 769 if (iter != name_map.end())
745 iter->second->label += FindChildText(label); 770 iter->second->label += FindChildText(label);
746 } 771 }
747 772
748 // Loop through the form control elements, extracting the label text from 773 // Loop through the form control elements, extracting the label text from
749 // the DOM. We use the |fields_extracted| vector to make sure we assign the 774 // the DOM. We use the |fields_extracted| vector to make sure we assign the
750 // extracted label to the correct field, as it's possible |form_fields| will 775 // extracted label to the correct field, as it's possible |form_fields| will
751 // not contain all of the elements in |control_elements|. 776 // not contain all of the elements in |control_elements|.
(...skipping 317 matching lines...) Expand 10 before | Expand all | Expand 10 after
1069 if (element_name == form.name && action == form.action) { 1094 if (element_name == form.name && action == form.action) {
1070 *form_element = *form_iter; 1095 *form_element = *form_iter;
1071 return true; 1096 return true;
1072 } 1097 }
1073 } 1098 }
1074 1099
1075 return false; 1100 return false;
1076 } 1101 }
1077 1102
1078 } // namespace autofill 1103 } // namespace autofill
OLDNEW
« no previous file with comments | « chrome/browser/autofill/form_structure_browsertest.cc ('k') | chrome/test/data/autofill/heuristics/input/16_crbug_87517.html » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698