OLD | NEW |
---|---|
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/renderer/autofill/form_manager.h" | 5 #include "chrome/renderer/autofill/form_manager.h" |
6 | 6 |
7 #include "base/logging.h" | 7 #include "base/logging.h" |
8 #include "base/memory/scoped_vector.h" | 8 #include "base/memory/scoped_vector.h" |
9 #include "base/stl_util.h" | 9 #include "base/stl_util.h" |
10 #include "base/string_util.h" | 10 #include "base/string_util.h" |
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
71 if (!element) | 71 if (!element) |
72 return false; | 72 return false; |
73 | 73 |
74 return element->isTextField() && !element->isPasswordField(); | 74 return element->isTextField() && !element->isPasswordField(); |
75 } | 75 } |
76 | 76 |
77 bool IsSelectElement(const WebFormControlElement& element) { | 77 bool IsSelectElement(const WebFormControlElement& element) { |
78 return element.formControlType() == ASCIIToUTF16("select-one"); | 78 return element.formControlType() == ASCIIToUTF16("select-one"); |
79 } | 79 } |
80 | 80 |
81 bool IsTextContainerElement(const WebElement& element) { | |
82 return | |
83 element.hasTagName("p") || | |
84 element.hasTagName("b") || | |
85 element.hasTagName("span") || | |
86 element.hasTagName("font"); | |
87 } | |
88 | |
89 bool IsOptionElement(const WebElement& element) { | 81 bool IsOptionElement(const WebElement& element) { |
90 return element.hasTagName("option"); | 82 return element.hasTagName("option"); |
91 } | 83 } |
92 | 84 |
93 bool IsScriptElement(const WebElement& element) { | 85 bool IsScriptElement(const WebElement& element) { |
94 return element.hasTagName("script"); | 86 return element.hasTagName("script"); |
95 } | 87 } |
96 | 88 |
97 bool IsNoScriptElement(const WebElement& element) { | 89 bool IsNoScriptElement(const WebElement& element) { |
98 return element.hasTagName("noscript"); | 90 return element.hasTagName("noscript"); |
99 } | 91 } |
100 | 92 |
101 bool HasTagName(const WebNode& node, const WebKit::WebString& tag) { | 93 bool HasTagName(const WebNode& node, const WebKit::WebString& tag) { |
102 return node.isElementNode() && node.toConst<WebElement>().hasTagName(tag); | 94 return node.isElementNode() && node.toConst<WebElement>().hasTagName(tag); |
103 } | 95 } |
104 | 96 |
105 bool IsAutofillableElement(const WebFormControlElement& element) { | 97 bool IsAutofillableElement(const WebFormControlElement& element) { |
106 const WebInputElement* input_element = toWebInputElement(&element); | 98 const WebInputElement* input_element = toWebInputElement(&element); |
107 return IsTextInput(input_element) || IsSelectElement(element); | 99 return IsTextInput(input_element) || IsSelectElement(element); |
108 } | 100 } |
109 | 101 |
102 // Appends |suffix| to |prefix| so that any intermediary whitespace is collapsed | |
103 // to a single space. If |force_whitespace| is true, then the resulting string | |
104 // is guaranteed to have a space between |prefix| and |suffix|. Otherwise, the | |
105 // result includes a space only if |prefix| has trailing whitespace or |suffix| | |
106 // has leading whitespace. | |
dhollowa
2011/08/01 22:16:54
Some examples in this comment would help the casua
Ilya Sherman
2011/08/01 23:59:36
Done.
| |
107 const string16 CombineAndCollapseWhitespace(const string16& prefix, | |
108 const string16& suffix, | |
109 bool force_whitespace) { | |
110 string16 prefix_trimmed; | |
111 TrimPositions prefix_trailing_whitespace = | |
112 TrimWhitespace(prefix, TRIM_TRAILING, &prefix_trimmed); | |
113 | |
114 // Recursively compute the children's text. | |
115 string16 suffix_trimmed; | |
116 TrimPositions suffix_leading_whitespace = | |
117 TrimWhitespace(suffix, TRIM_LEADING, &suffix_trimmed); | |
118 | |
119 if (prefix_trailing_whitespace || suffix_leading_whitespace || | |
120 force_whitespace) { | |
121 return prefix_trimmed + ASCIIToUTF16(" ") + suffix_trimmed; | |
122 } else { | |
123 return prefix_trimmed + suffix_trimmed; | |
124 } | |
125 } | |
126 | |
110 // This is a helper function for the FindChildText() function (see below). | 127 // This is a helper function for the FindChildText() function (see below). |
111 // Search depth is limited with the |depth| parameter. | 128 // Search depth is limited with the |depth| parameter. |
112 string16 FindChildTextInner(const WebNode& node, int depth) { | 129 string16 FindChildTextInner(const WebNode& node, int depth) { |
113 if (depth <= 0 || node.isNull()) | 130 if (depth <= 0 || node.isNull()) |
114 return string16(); | 131 return string16(); |
115 | 132 |
116 // Skip over comments. | 133 // Skip over comments. |
117 if (node.nodeType() == WebNode::CommentNode) | 134 if (node.nodeType() == WebNode::CommentNode) |
118 return FindChildTextInner(node.nextSibling(), depth - 1); | 135 return FindChildTextInner(node.nextSibling(), depth - 1); |
119 | 136 |
120 if (node.nodeType() != WebNode::ElementNode && | 137 if (node.nodeType() != WebNode::ElementNode && |
121 node.nodeType() != WebNode::TextNode) | 138 node.nodeType() != WebNode::TextNode) |
122 return string16(); | 139 return string16(); |
123 | 140 |
124 // Ignore elements known not to contain inferable labels. | 141 // Ignore elements known not to contain inferable labels. |
125 if (node.isElementNode()) { | 142 if (node.isElementNode()) { |
126 const WebElement element = node.toConst<WebElement>(); | 143 const WebElement element = node.toConst<WebElement>(); |
127 if (IsOptionElement(element) || | 144 if (IsOptionElement(element) || |
128 IsScriptElement(element) || | 145 IsScriptElement(element) || |
129 IsNoScriptElement(element)) { | 146 IsNoScriptElement(element) || |
147 (element.isFormControlElement() && | |
148 IsAutofillableElement(element.toConst<WebFormControlElement>()))) { | |
130 return string16(); | 149 return string16(); |
131 } | 150 } |
132 } | 151 } |
133 | 152 |
134 // Extract the text exactly at this node. | 153 // Extract the text exactly at this node. |
135 string16 node_text = node.nodeValue(); | 154 string16 node_text = node.nodeValue(); |
136 TrimPositions node_trailing_whitespace = | |
137 TrimWhitespace(node_text, TRIM_TRAILING, &node_text); | |
138 | 155 |
139 // Recursively compute the children's text. | 156 // Recursively compute the children's text. |
140 // Preserve inter-element whitespace separation. | 157 // Preserve inter-element whitespace separation. |
141 string16 child_text = FindChildTextInner(node.firstChild(), depth - 1); | 158 string16 child_text = FindChildTextInner(node.firstChild(), depth - 1); |
142 TrimPositions child_leading_whitespace = | 159 bool add_space = node.nodeType() == WebNode::TextNode && node_text.empty(); |
143 TrimWhitespace(child_text, TRIM_LEADING, &child_text); | 160 node_text = CombineAndCollapseWhitespace(node_text, child_text, add_space); |
144 if (node_trailing_whitespace || child_leading_whitespace || | |
145 (node.nodeType() == WebNode::TextNode && node_text.empty())) { | |
146 node_text += ASCIIToUTF16(" "); | |
147 } | |
148 node_text += child_text; | |
149 node_trailing_whitespace = | |
150 TrimWhitespace(node_text, TRIM_TRAILING, &node_text); | |
151 | 161 |
152 // Recursively compute the siblings' text. | 162 // Recursively compute the siblings' text. |
153 // Again, preserve inter-element whitespace separation. | 163 // Again, preserve inter-element whitespace separation. |
154 string16 sibling_text = FindChildTextInner(node.nextSibling(), depth - 1); | 164 string16 sibling_text = FindChildTextInner(node.nextSibling(), depth - 1); |
155 TrimPositions sibling_leading_whitespace = | 165 add_space = node.nodeType() == WebNode::TextNode && node_text.empty(); |
156 TrimWhitespace(sibling_text, TRIM_LEADING, &sibling_text); | 166 node_text = CombineAndCollapseWhitespace(node_text, sibling_text, add_space); |
157 if (node_trailing_whitespace || sibling_leading_whitespace || | |
158 (node.nodeType() == WebNode::TextNode && node_text.empty())) { | |
159 node_text += ASCIIToUTF16(" "); | |
160 } | |
161 node_text += sibling_text; | |
162 | 167 |
163 return node_text; | 168 return node_text; |
164 } | 169 } |
165 | 170 |
166 // Returns the aggregated values of the descendants of |element| that are | 171 // Returns the aggregated values of the descendants of |element| that are |
167 // non-empty text nodes. This is a faster alternative to |innerText()| for | 172 // non-empty text nodes. This is a faster alternative to |innerText()| for |
168 // performance critical operations. It does a full depth-first search so can be | 173 // performance critical operations. It does a full depth-first search so can be |
169 // used when the structure is not directly known. However, unlike with | 174 // used when the structure is not directly known. However, unlike with |
170 // |innerText()|, the search depth and breadth are limited to a fixed threshold. | 175 // |innerText()|, the search depth and breadth are limited to a fixed threshold. |
171 // Whitespace is trimmed from text accumulated at descendant nodes. | 176 // Whitespace is trimmed from text accumulated at descendant nodes. |
172 string16 FindChildText(const WebElement& element) { | 177 string16 FindChildText(const WebNode& node) { |
173 WebNode child = element.firstChild(); | 178 if (node.isTextNode()) |
179 return node.nodeValue(); | |
180 | |
181 WebNode child = node.firstChild(); | |
174 | 182 |
175 const int kChildSearchDepth = 10; | 183 const int kChildSearchDepth = 10; |
176 string16 element_text = FindChildTextInner(child, kChildSearchDepth); | 184 string16 node_text = FindChildTextInner(child, kChildSearchDepth); |
177 TrimWhitespace(element_text, TRIM_ALL, &element_text); | 185 TrimWhitespace(node_text, TRIM_ALL, &node_text); |
178 return element_text; | 186 return node_text; |
179 } | 187 } |
180 | 188 |
181 // Helper for |InferLabelForElement()| that infers a label, if possible, from | 189 // Helper for |InferLabelForElement()| that infers a label, if possible, from |
182 // a previous sibling of |element|. | 190 // a previous sibling of |element|, |
191 // e.g. Some Text <input ...> | |
192 // or Some <span>Text</span> <input ...> | |
193 // or <p>Some Text</p><input ...> | |
194 // or <label>Some Text</label> <input ...> | |
195 // or Some Text <img><input ...> | |
196 // or <b>Some Text</b><br/> <input ...>. | |
183 string16 InferLabelFromPrevious(const WebFormControlElement& element) { | 197 string16 InferLabelFromPrevious(const WebFormControlElement& element) { |
184 string16 inferred_label; | 198 string16 inferred_label; |
185 WebNode previous = element.previousSibling(); | 199 WebNode previous = element; |
186 if (previous.isNull()) | 200 while (true) { |
187 return string16(); | 201 previous = previous.previousSibling(); |
202 if (previous.isNull()) | |
203 break; | |
188 | 204 |
189 // Check for text immediately before the |element|. | 205 WebNode::NodeType node_type = previous.nodeType(); |
190 if (previous.isTextNode()) { | 206 if (node_type != WebNode::TextNode && |
191 inferred_label = previous.nodeValue(); | 207 node_type != WebNode::ElementNode && |
192 TrimWhitespace(inferred_label, TRIM_ALL, &inferred_label); | 208 node_type != WebNode::CommentNode) |
193 } | 209 break; |
194 | 210 |
195 // If we didn't find text, check for an immediately preceding text container, | 211 // Coalesce any text contained in multiple consecutive plain text nodes or |
196 // e.g. <p>Some Text</p><input ...> | 212 // on of a few HTML elements that are essentially equivalent to text nodes. |
197 // Note the lack of whitespace between <p> and <input> elements. | 213 if (previous.isTextNode() || |
198 if (inferred_label.empty() && previous.isElementNode()) { | 214 HasTagName(previous, "b") || HasTagName(previous, "strong") || |
199 WebElement previous_element = previous.to<WebElement>(); | 215 HasTagName(previous, "span") || HasTagName(previous, "font")) { |
200 if (IsTextContainerElement(previous_element)) | 216 string16 value = FindChildText(previous); |
201 inferred_label = FindChildText(previous_element); | 217 // A text node's value will be empty if it is for a line break. |
202 } | 218 bool add_space = previous.isTextNode() && value.empty(); |
219 inferred_label = | |
220 CombineAndCollapseWhitespace(value, inferred_label, add_space); | |
221 } else if (previous.isElementNode()) { | |
dhollowa
2011/08/01 22:16:54
Instead of "else if", it would be more readable IM
Ilya Sherman
2011/08/01 23:59:36
Done.
| |
222 // All other elements are only allowed if we have not yet found any | |
223 // candidate label text. | |
224 string16 trimmed_label; | |
225 TrimWhitespace(inferred_label, TRIM_ALL, &trimmed_label); | |
226 if (trimmed_label.empty()) { | |
227 // <img> and <br> tags often appear between the input element and its | |
228 // label text, so skip over them. | |
229 if (HasTagName(previous, "img") || HasTagName(previous, "br")) | |
230 continue; | |
203 | 231 |
204 // If we didn't find one immediately preceding, check for a text container | 232 // We expect <p> and <label> tags to contain the full label text, so |
205 // separated from this node only by whitespace, | 233 // only allow these if we have not yet found any candidate label text. |
206 // e.g. <p>Some Text</p> <input ...> | 234 if (HasTagName(previous, "p") || HasTagName(previous, "label")) |
207 // Note the whitespace between <p> and <input> elements. | 235 inferred_label = FindChildText(previous); |
208 if (inferred_label.empty() && previous.isTextNode()) { | 236 } |
209 WebNode sibling = previous.previousSibling(); | 237 |
210 if (!sibling.isNull() && sibling.isElementNode()) { | 238 break; |
211 WebElement previous_element = sibling.to<WebElement>(); | |
212 if (IsTextContainerElement(previous_element)) | |
213 inferred_label = FindChildText(previous_element); | |
214 } | 239 } |
215 } | 240 } |
216 | 241 |
217 // Look for a text node prior to <img> or <br> tags, | 242 TrimWhitespace(inferred_label, TRIM_ALL, &inferred_label); |
218 // e.g. Some Text<img/><input ...> or Some Text<br/><input ...> | |
219 while (inferred_label.empty() && !previous.isNull()) { | |
220 if (previous.isTextNode()) { | |
221 inferred_label = previous.nodeValue(); | |
222 TrimWhitespace(inferred_label, TRIM_ALL, &inferred_label); | |
223 } else if (previous.isElementNode()) { | |
224 WebElement previous_element = previous.to<WebElement>(); | |
225 if (IsTextContainerElement(previous_element)) | |
226 inferred_label = FindChildText(previous_element); | |
227 else if (!HasTagName(previous, "img") && !HasTagName(previous, "br")) | |
228 break; | |
229 } else { | |
230 break; | |
231 } | |
232 | |
233 previous = previous.previousSibling(); | |
234 } | |
235 | |
236 // Look for a label node prior to the <input> tag, | |
237 // e.g. <label>Some Text</label><input ...> | |
238 while (inferred_label.empty() && !previous.isNull()) { | |
239 if (previous.isTextNode()) { | |
240 inferred_label = previous.nodeValue(); | |
241 TrimWhitespace(inferred_label, TRIM_ALL, &inferred_label); | |
242 } else if (HasTagName(previous, "label")) { | |
243 inferred_label = FindChildText(previous.to<WebElement>()); | |
244 } else { | |
245 break; | |
246 } | |
247 | |
248 previous = previous.previousSibling(); | |
249 } | |
250 | |
251 return inferred_label; | 243 return inferred_label; |
252 } | 244 } |
253 | 245 |
254 // Helper for |InferLabelForElement()| that infers a label, if possible, from | 246 // Helper for |InferLabelForElement()| that infers a label, if possible, from |
255 // enclosing list item, | 247 // enclosing list item, |
256 // e.g. <li>Some Text<input ...><input ...><input ...></tr> | 248 // e.g. <li>Some Text<input ...><input ...><input ...></tr> |
257 string16 InferLabelFromListItem(const WebFormControlElement& element) { | 249 string16 InferLabelFromListItem(const WebFormControlElement& element) { |
258 WebNode parent = element.parentNode(); | 250 WebNode parent = element.parentNode(); |
259 while (!parent.isNull() && parent.isElementNode() && | 251 while (!parent.isNull() && parent.isElementNode() && |
260 !parent.to<WebElement>().hasTagName("li")) { | 252 !parent.to<WebElement>().hasTagName("li")) { |
261 parent = parent.parentNode(); | 253 parent = parent.parentNode(); |
262 } | 254 } |
263 | 255 |
264 if (!parent.isNull() && HasTagName(parent, "li")) | 256 if (!parent.isNull() && HasTagName(parent, "li")) |
265 return FindChildText(parent.to<WebElement>()); | 257 return FindChildText(parent); |
266 | 258 |
267 return string16(); | 259 return string16(); |
268 } | 260 } |
269 | 261 |
270 // Helper for |InferLabelForElement()| that infers a label, if possible, from | 262 // Helper for |InferLabelForElement()| that infers a label, if possible, from |
271 // surrounding table structure, | 263 // surrounding table structure, |
272 // e.g. <tr><td>Some Text</td><td><input ...></td></tr> | 264 // e.g. <tr><td>Some Text</td><td><input ...></td></tr> |
273 // or <tr><th>Some Text</th><td><input ...></td></tr> | 265 // or <tr><th>Some Text</th><td><input ...></td></tr> |
274 // or <tr><td><b>Some Text</b></td><td><b><input ...></b></td></tr> | 266 // or <tr><td><b>Some Text</b></td><td><b><input ...></b></td></tr> |
275 // or <tr><th><b>Some Text</b></th><td><b><input ...></b></td></tr> | 267 // or <tr><th><b>Some Text</b></th><td><b><input ...></b></td></tr> |
276 string16 InferLabelFromTableColumn(const WebFormControlElement& element) { | 268 string16 InferLabelFromTableColumn(const WebFormControlElement& element) { |
277 WebNode parent = element.parentNode(); | 269 WebNode parent = element.parentNode(); |
278 while (!parent.isNull() && parent.isElementNode() && | 270 while (!parent.isNull() && parent.isElementNode() && |
279 !parent.to<WebElement>().hasTagName("td")) { | 271 !parent.to<WebElement>().hasTagName("td")) { |
280 parent = parent.parentNode(); | 272 parent = parent.parentNode(); |
281 } | 273 } |
282 | 274 |
283 if (parent.isNull()) | 275 if (parent.isNull()) |
284 return string16(); | 276 return string16(); |
285 | 277 |
286 // Check all previous siblings, skipping non-element nodes, until we find a | 278 // Check all previous siblings, skipping non-element nodes, until we find a |
287 // non-empty text block. | 279 // non-empty text block. |
288 string16 inferred_label; | 280 string16 inferred_label; |
289 WebNode previous = parent.previousSibling(); | 281 WebNode previous = parent.previousSibling(); |
290 while (inferred_label.empty() && !previous.isNull()) { | 282 while (inferred_label.empty() && !previous.isNull()) { |
291 if (HasTagName(previous, "td") || HasTagName(previous, "th")) | 283 if (HasTagName(previous, "td") || HasTagName(previous, "th")) |
292 inferred_label = FindChildText(previous.to<WebElement>()); | 284 inferred_label = FindChildText(previous); |
293 | 285 |
294 previous = previous.previousSibling(); | 286 previous = previous.previousSibling(); |
295 } | 287 } |
296 | 288 |
297 return inferred_label; | 289 return inferred_label; |
298 } | 290 } |
299 | 291 |
300 // Helper for |InferLabelForElement()| that infers a label, if possible, from | 292 // Helper for |InferLabelForElement()| that infers a label, if possible, from |
301 // surrounding table structure, | 293 // surrounding table structure, |
302 // e.g. <tr><td>Some Text</td></tr><tr><td><input ...></td></tr> | 294 // e.g. <tr><td>Some Text</td></tr><tr><td><input ...></td></tr> |
303 string16 InferLabelFromTableRow(const WebFormControlElement& element) { | 295 string16 InferLabelFromTableRow(const WebFormControlElement& element) { |
304 WebNode parent = element.parentNode(); | 296 WebNode parent = element.parentNode(); |
305 while (!parent.isNull() && parent.isElementNode() && | 297 while (!parent.isNull() && parent.isElementNode() && |
306 !parent.to<WebElement>().hasTagName("tr")) { | 298 !parent.to<WebElement>().hasTagName("tr")) { |
307 parent = parent.parentNode(); | 299 parent = parent.parentNode(); |
308 } | 300 } |
309 | 301 |
310 if (parent.isNull()) | 302 if (parent.isNull()) |
311 return string16(); | 303 return string16(); |
312 | 304 |
313 // Check all previous siblings, skipping non-element nodes, until we find a | 305 // Check all previous siblings, skipping non-element nodes, until we find a |
314 // non-empty text block. | 306 // non-empty text block. |
315 string16 inferred_label; | 307 string16 inferred_label; |
316 WebNode previous = parent.previousSibling(); | 308 WebNode previous = parent.previousSibling(); |
317 while (inferred_label.empty() && !previous.isNull()) { | 309 while (inferred_label.empty() && !previous.isNull()) { |
318 if (HasTagName(previous, "tr")) | 310 if (HasTagName(previous, "tr")) |
319 inferred_label = FindChildText(previous.to<WebElement>()); | 311 inferred_label = FindChildText(previous); |
320 | 312 |
321 previous = previous.previousSibling(); | 313 previous = previous.previousSibling(); |
322 } | 314 } |
323 | 315 |
324 return inferred_label; | 316 return inferred_label; |
325 } | 317 } |
326 | 318 |
327 // Helper for |InferLabelForElement()| that infers a label, if possible, from | 319 // Helper for |InferLabelForElement()| that infers a label, if possible, from |
328 // a surrounding div table, | 320 // a surrounding div table, |
329 // e.g. <div>Some Text<span><input ...></span></div> | 321 // e.g. <div>Some Text<span><input ...></span></div> |
330 // e.g. <div>Some Text</div><div><input ...></div> | 322 // e.g. <div>Some Text</div><div><input ...></div> |
331 string16 InferLabelFromDivTable(const WebFormControlElement& element) { | 323 string16 InferLabelFromDivTable(const WebFormControlElement& element) { |
332 WebNode node = element.parentNode(); | 324 WebNode node = element.parentNode(); |
333 while (!node.isNull() && node.isElementNode() && | 325 bool looking_for_parent = true; |
334 !node.to<WebElement>().hasTagName("div") && | |
335 // If the element is in a table, its label most likely is too. | |
336 !node.to<WebElement>().hasTagName("table")) { | |
337 node = node.parentNode(); | |
338 } | |
339 | 326 |
340 if (node.isNull() || !HasTagName(node, "div")) | 327 // Search the sibling and parent <div>s until we find a candidate label. |
341 return string16(); | |
342 | |
343 // Search the siblings while we cannot find label. | |
344 string16 inferred_label; | 328 string16 inferred_label; |
345 while (inferred_label.empty() && !node.isNull()) { | 329 while (inferred_label.empty() && !node.isNull()) { |
346 if (HasTagName(node, "div")) | 330 if (HasTagName(node, "div")) { |
347 inferred_label = FindChildText(node.to<WebElement>()); | 331 looking_for_parent = false; |
332 inferred_label = FindChildText(node); | |
333 } else if (looking_for_parent && | |
334 (HasTagName(node, "table") || HasTagName(node, "fieldset"))) { | |
335 // If the element is in a table or fieldset, its label most likely is too. | |
336 break; | |
337 } | |
348 | 338 |
349 node = node.previousSibling(); | 339 if (node.previousSibling().isNull()) { |
340 // If there are no more siblings, continue walking up the tree. | |
341 looking_for_parent = true; | |
342 } | |
343 | |
344 if (looking_for_parent) | |
345 node = node.parentNode(); | |
346 else | |
347 node = node.previousSibling(); | |
350 } | 348 } |
351 | 349 |
352 return inferred_label; | 350 return inferred_label; |
353 } | 351 } |
354 | 352 |
355 // Helper for |InferLabelForElement()| that infers a label, if possible, from | 353 // Helper for |InferLabelForElement()| that infers a label, if possible, from |
356 // a surrounding definition list, | 354 // a surrounding definition list, |
357 // e.g. <dl><dt>Some Text</dt><dd><input ...></dd></dl> | 355 // e.g. <dl><dt>Some Text</dt><dd><input ...></dd></dl> |
358 // e.g. <dl><dt><b>Some Text</b></dt><dd><b><input ...></b></dd></dl> | 356 // e.g. <dl><dt><b>Some Text</b></dt><dd><b><input ...></b></dd></dl> |
359 string16 InferLabelFromDefinitionList(const WebFormControlElement& element) { | 357 string16 InferLabelFromDefinitionList(const WebFormControlElement& element) { |
360 WebNode parent = element.parentNode(); | 358 WebNode parent = element.parentNode(); |
361 while (!parent.isNull() && parent.isElementNode() && | 359 while (!parent.isNull() && parent.isElementNode() && |
362 !parent.to<WebElement>().hasTagName("dd")) | 360 !parent.to<WebElement>().hasTagName("dd")) |
363 parent = parent.parentNode(); | 361 parent = parent.parentNode(); |
364 | 362 |
365 if (parent.isNull() || !HasTagName(parent, "dd")) | 363 if (parent.isNull() || !HasTagName(parent, "dd")) |
366 return string16(); | 364 return string16(); |
367 | 365 |
368 // Skip by any intervening text nodes. | 366 // Skip by any intervening text nodes. |
369 WebNode previous = parent.previousSibling(); | 367 WebNode previous = parent.previousSibling(); |
370 while (!previous.isNull() && previous.isTextNode()) | 368 while (!previous.isNull() && previous.isTextNode()) |
371 previous = previous.previousSibling(); | 369 previous = previous.previousSibling(); |
372 | 370 |
373 if (previous.isNull() || !HasTagName(previous, "dt")) | 371 if (previous.isNull() || !HasTagName(previous, "dt")) |
374 return string16(); | 372 return string16(); |
375 | 373 |
376 return FindChildText(previous.to<WebElement>()); | 374 return FindChildText(previous); |
377 } | 375 } |
378 | 376 |
379 // Infers corresponding label for |element| from surrounding context in the DOM, | 377 // Infers corresponding label for |element| from surrounding context in the DOM, |
380 // e.g. the contents of the preceding <p> tag or text element. | 378 // e.g. the contents of the preceding <p> tag or text element. |
381 string16 InferLabelForElement(const WebFormControlElement& element) { | 379 string16 InferLabelForElement(const WebFormControlElement& element) { |
382 string16 inferred_label = InferLabelFromPrevious(element); | 380 string16 inferred_label = InferLabelFromPrevious(element); |
383 if (!inferred_label.empty()) | 381 if (!inferred_label.empty()) |
384 return inferred_label; | 382 return inferred_label; |
385 | 383 |
386 // If we didn't find a label, check for list item case. | 384 // If we didn't find a label, check for list item case. |
(...skipping 251 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
638 | 636 |
639 // static | 637 // static |
640 string16 FormManager::LabelForElement(const WebFormControlElement& element) { | 638 string16 FormManager::LabelForElement(const WebFormControlElement& element) { |
641 // Don't scrape labels for elements we can't possibly autofill anyway. | 639 // Don't scrape labels for elements we can't possibly autofill anyway. |
642 if (!IsAutofillableElement(element)) | 640 if (!IsAutofillableElement(element)) |
643 return string16(); | 641 return string16(); |
644 | 642 |
645 WebNodeList labels = element.document().getElementsByTagName("label"); | 643 WebNodeList labels = element.document().getElementsByTagName("label"); |
646 for (unsigned i = 0; i < labels.length(); ++i) { | 644 for (unsigned i = 0; i < labels.length(); ++i) { |
647 WebLabelElement label = labels.item(i).to<WebLabelElement>(); | 645 WebLabelElement label = labels.item(i).to<WebLabelElement>(); |
648 DCHECK(label.hasTagName("label")); | 646 WebElement corresponding_control = label.correspondingControl(); |
649 if (label.correspondingControl() == element) | 647 if (corresponding_control == element || |
648 (corresponding_control.isNull() && | |
649 label.getAttribute("for") == element.nameForAutofill())) { | |
650 return FindChildText(label); | 650 return FindChildText(label); |
651 } | |
651 } | 652 } |
652 | 653 |
653 // Infer the label from context if not found in label element. | 654 // Infer the label from context if not found in label element. |
654 return InferLabelForElement(element); | 655 return InferLabelForElement(element); |
655 } | 656 } |
656 | 657 |
657 // static | 658 // static |
658 bool FormManager::WebFormElementToFormData(const WebFormElement& element, | 659 bool FormManager::WebFormElementToFormData(const WebFormElement& element, |
659 RequirementsMask requirements, | 660 RequirementsMask requirements, |
660 ExtractMask extract_mask, | 661 ExtractMask extract_mask, |
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
725 // Loop through the label elements inside the form element. For each label | 726 // Loop through the label elements inside the form element. For each label |
726 // element, get the corresponding form control element, use the form control | 727 // element, get the corresponding form control element, use the form control |
727 // element's name as a key into the <name, FormField> map to find the | 728 // element's name as a key into the <name, FormField> map to find the |
728 // previously created FormField and set the FormField's label to the | 729 // previously created FormField and set the FormField's label to the |
729 // label.firstChild().nodeValue() of the label element. | 730 // label.firstChild().nodeValue() of the label element. |
730 WebNodeList labels = element.getElementsByTagName("label"); | 731 WebNodeList labels = element.getElementsByTagName("label"); |
731 for (unsigned i = 0; i < labels.length(); ++i) { | 732 for (unsigned i = 0; i < labels.length(); ++i) { |
732 WebLabelElement label = labels.item(i).to<WebLabelElement>(); | 733 WebLabelElement label = labels.item(i).to<WebLabelElement>(); |
733 WebFormControlElement field_element = | 734 WebFormControlElement field_element = |
734 label.correspondingControl().to<WebFormControlElement>(); | 735 label.correspondingControl().to<WebFormControlElement>(); |
735 if (field_element.isNull() || | 736 |
737 string16 element_name; | |
738 if (field_element.isNull()) { | |
739 // Sometimes site authors will incorrectly specify the corresponding | |
740 // field element's name rather than its id, so we compensate here. | |
741 element_name = label.getAttribute("for"); | |
742 } else if ( | |
736 !field_element.isFormControlElement() || | 743 !field_element.isFormControlElement() || |
737 field_element.formControlType() == WebString::fromUTF8("hidden")) | 744 field_element.formControlType() == WebString::fromUTF8("hidden")) { |
738 continue; | 745 continue; |
746 } else { | |
747 element_name = field_element.nameForAutofill(); | |
748 } | |
739 | 749 |
740 std::map<string16, FormField*>::iterator iter = | 750 std::map<string16, FormField*>::iterator iter = name_map.find(element_name); |
741 name_map.find(field_element.nameForAutofill()); | |
742 // Concatenate labels because some sites might have multiple label | 751 // Concatenate labels because some sites might have multiple label |
743 // candidates. | 752 // candidates. |
744 if (iter != name_map.end()) | 753 if (iter != name_map.end()) |
745 iter->second->label += FindChildText(label); | 754 iter->second->label += FindChildText(label); |
746 } | 755 } |
747 | 756 |
748 // Loop through the form control elements, extracting the label text from | 757 // Loop through the form control elements, extracting the label text from |
749 // the DOM. We use the |fields_extracted| vector to make sure we assign the | 758 // the DOM. We use the |fields_extracted| vector to make sure we assign the |
750 // extracted label to the correct field, as it's possible |form_fields| will | 759 // extracted label to the correct field, as it's possible |form_fields| will |
751 // not contain all of the elements in |control_elements|. | 760 // not contain all of the elements in |control_elements|. |
(...skipping 317 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1069 if (element_name == form.name && action == form.action) { | 1078 if (element_name == form.name && action == form.action) { |
1070 *form_element = *form_iter; | 1079 *form_element = *form_iter; |
1071 return true; | 1080 return true; |
1072 } | 1081 } |
1073 } | 1082 } |
1074 | 1083 |
1075 return false; | 1084 return false; |
1076 } | 1085 } |
1077 | 1086 |
1078 } // namespace autofill | 1087 } // namespace autofill |
OLD | NEW |