OLD | NEW |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/renderer/autofill/form_manager.h" | 5 #include "chrome/renderer/autofill/form_manager.h" |
6 | 6 |
7 #include "base/logging.h" | 7 #include "base/logging.h" |
8 #include "base/memory/scoped_vector.h" | 8 #include "base/memory/scoped_vector.h" |
9 #include "base/stl_util.h" | 9 #include "base/stl_util.h" |
10 #include "base/string_util.h" | 10 #include "base/string_util.h" |
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
71 if (!element) | 71 if (!element) |
72 return false; | 72 return false; |
73 | 73 |
74 return element->isTextField() && !element->isPasswordField(); | 74 return element->isTextField() && !element->isPasswordField(); |
75 } | 75 } |
76 | 76 |
77 bool IsSelectElement(const WebFormControlElement& element) { | 77 bool IsSelectElement(const WebFormControlElement& element) { |
78 return element.formControlType() == ASCIIToUTF16("select-one"); | 78 return element.formControlType() == ASCIIToUTF16("select-one"); |
79 } | 79 } |
80 | 80 |
81 bool IsTextContainerElement(const WebElement& element) { | |
82 return | |
83 element.hasTagName("p") || | |
84 element.hasTagName("b") || | |
85 element.hasTagName("span") || | |
86 element.hasTagName("font"); | |
87 } | |
88 | |
89 bool IsOptionElement(const WebElement& element) { | 81 bool IsOptionElement(const WebElement& element) { |
90 return element.hasTagName("option"); | 82 return element.hasTagName("option"); |
91 } | 83 } |
92 | 84 |
93 bool IsScriptElement(const WebElement& element) { | 85 bool IsScriptElement(const WebElement& element) { |
94 return element.hasTagName("script"); | 86 return element.hasTagName("script"); |
95 } | 87 } |
96 | 88 |
97 bool IsNoScriptElement(const WebElement& element) { | 89 bool IsNoScriptElement(const WebElement& element) { |
98 return element.hasTagName("noscript"); | 90 return element.hasTagName("noscript"); |
99 } | 91 } |
100 | 92 |
101 bool HasTagName(const WebNode& node, const WebKit::WebString& tag) { | 93 bool HasTagName(const WebNode& node, const WebKit::WebString& tag) { |
102 return node.isElementNode() && node.toConst<WebElement>().hasTagName(tag); | 94 return node.isElementNode() && node.toConst<WebElement>().hasTagName(tag); |
103 } | 95 } |
104 | 96 |
105 bool IsAutofillableElement(const WebFormControlElement& element) { | 97 bool IsAutofillableElement(const WebFormControlElement& element) { |
106 const WebInputElement* input_element = toWebInputElement(&element); | 98 const WebInputElement* input_element = toWebInputElement(&element); |
107 return IsTextInput(input_element) || IsSelectElement(element); | 99 return IsTextInput(input_element) || IsSelectElement(element); |
108 } | 100 } |
109 | 101 |
| 102 // Appends |suffix| to |prefix| so that any intermediary whitespace is collapsed |
| 103 // to a single space. If |force_whitespace| is true, then the resulting string |
| 104 // is guaranteed to have a space between |prefix| and |suffix|. Otherwise, the |
| 105 // result includes a space only if |prefix| has trailing whitespace or |suffix| |
| 106 // has leading whitespace. |
| 107 // A few examples: |
| 108 // * CombineAndCollapseWhitespace("foo", "bar", false) -> "foobar" |
| 109 // * CombineAndCollapseWhitespace("foo", "bar", true) -> "foo bar" |
| 110 // * CombineAndCollapseWhitespace("foo ", "bar", false) -> "foo bar" |
| 111 // * CombineAndCollapseWhitespace("foo", " bar", false) -> "foo bar" |
| 112 // * CombineAndCollapseWhitespace("foo", " bar", true) -> "foo bar" |
| 113 // * CombineAndCollapseWhitespace("foo ", " bar", false) -> "foo bar" |
| 114 // * CombineAndCollapseWhitespace(" foo", "bar ", false) -> " foobar " |
| 115 // * CombineAndCollapseWhitespace(" foo", "bar ", true) -> " foo bar " |
| 116 const string16 CombineAndCollapseWhitespace(const string16& prefix, |
| 117 const string16& suffix, |
| 118 bool force_whitespace) { |
| 119 string16 prefix_trimmed; |
| 120 TrimPositions prefix_trailing_whitespace = |
| 121 TrimWhitespace(prefix, TRIM_TRAILING, &prefix_trimmed); |
| 122 |
| 123 // Recursively compute the children's text. |
| 124 string16 suffix_trimmed; |
| 125 TrimPositions suffix_leading_whitespace = |
| 126 TrimWhitespace(suffix, TRIM_LEADING, &suffix_trimmed); |
| 127 |
| 128 if (prefix_trailing_whitespace || suffix_leading_whitespace || |
| 129 force_whitespace) { |
| 130 return prefix_trimmed + ASCIIToUTF16(" ") + suffix_trimmed; |
| 131 } else { |
| 132 return prefix_trimmed + suffix_trimmed; |
| 133 } |
| 134 } |
| 135 |
110 // This is a helper function for the FindChildText() function (see below). | 136 // This is a helper function for the FindChildText() function (see below). |
111 // Search depth is limited with the |depth| parameter. | 137 // Search depth is limited with the |depth| parameter. |
112 string16 FindChildTextInner(const WebNode& node, int depth) { | 138 string16 FindChildTextInner(const WebNode& node, int depth) { |
113 if (depth <= 0 || node.isNull()) | 139 if (depth <= 0 || node.isNull()) |
114 return string16(); | 140 return string16(); |
115 | 141 |
116 // Skip over comments. | 142 // Skip over comments. |
117 if (node.nodeType() == WebNode::CommentNode) | 143 if (node.nodeType() == WebNode::CommentNode) |
118 return FindChildTextInner(node.nextSibling(), depth - 1); | 144 return FindChildTextInner(node.nextSibling(), depth - 1); |
119 | 145 |
120 if (node.nodeType() != WebNode::ElementNode && | 146 if (node.nodeType() != WebNode::ElementNode && |
121 node.nodeType() != WebNode::TextNode) | 147 node.nodeType() != WebNode::TextNode) |
122 return string16(); | 148 return string16(); |
123 | 149 |
124 // Ignore elements known not to contain inferable labels. | 150 // Ignore elements known not to contain inferable labels. |
125 if (node.isElementNode()) { | 151 if (node.isElementNode()) { |
126 const WebElement element = node.toConst<WebElement>(); | 152 const WebElement element = node.toConst<WebElement>(); |
127 if (IsOptionElement(element) || | 153 if (IsOptionElement(element) || |
128 IsScriptElement(element) || | 154 IsScriptElement(element) || |
129 IsNoScriptElement(element)) { | 155 IsNoScriptElement(element) || |
| 156 (element.isFormControlElement() && |
| 157 IsAutofillableElement(element.toConst<WebFormControlElement>()))) { |
130 return string16(); | 158 return string16(); |
131 } | 159 } |
132 } | 160 } |
133 | 161 |
134 // Extract the text exactly at this node. | 162 // Extract the text exactly at this node. |
135 string16 node_text = node.nodeValue(); | 163 string16 node_text = node.nodeValue(); |
136 TrimPositions node_trailing_whitespace = | |
137 TrimWhitespace(node_text, TRIM_TRAILING, &node_text); | |
138 | 164 |
139 // Recursively compute the children's text. | 165 // Recursively compute the children's text. |
140 // Preserve inter-element whitespace separation. | 166 // Preserve inter-element whitespace separation. |
141 string16 child_text = FindChildTextInner(node.firstChild(), depth - 1); | 167 string16 child_text = FindChildTextInner(node.firstChild(), depth - 1); |
142 TrimPositions child_leading_whitespace = | 168 bool add_space = node.nodeType() == WebNode::TextNode && node_text.empty(); |
143 TrimWhitespace(child_text, TRIM_LEADING, &child_text); | 169 node_text = CombineAndCollapseWhitespace(node_text, child_text, add_space); |
144 if (node_trailing_whitespace || child_leading_whitespace || | |
145 (node.nodeType() == WebNode::TextNode && node_text.empty())) { | |
146 node_text += ASCIIToUTF16(" "); | |
147 } | |
148 node_text += child_text; | |
149 node_trailing_whitespace = | |
150 TrimWhitespace(node_text, TRIM_TRAILING, &node_text); | |
151 | 170 |
152 // Recursively compute the siblings' text. | 171 // Recursively compute the siblings' text. |
153 // Again, preserve inter-element whitespace separation. | 172 // Again, preserve inter-element whitespace separation. |
154 string16 sibling_text = FindChildTextInner(node.nextSibling(), depth - 1); | 173 string16 sibling_text = FindChildTextInner(node.nextSibling(), depth - 1); |
155 TrimPositions sibling_leading_whitespace = | 174 add_space = node.nodeType() == WebNode::TextNode && node_text.empty(); |
156 TrimWhitespace(sibling_text, TRIM_LEADING, &sibling_text); | 175 node_text = CombineAndCollapseWhitespace(node_text, sibling_text, add_space); |
157 if (node_trailing_whitespace || sibling_leading_whitespace || | |
158 (node.nodeType() == WebNode::TextNode && node_text.empty())) { | |
159 node_text += ASCIIToUTF16(" "); | |
160 } | |
161 node_text += sibling_text; | |
162 | 176 |
163 return node_text; | 177 return node_text; |
164 } | 178 } |
165 | 179 |
166 // Returns the aggregated values of the descendants of |element| that are | 180 // Returns the aggregated values of the descendants of |element| that are |
167 // non-empty text nodes. This is a faster alternative to |innerText()| for | 181 // non-empty text nodes. This is a faster alternative to |innerText()| for |
168 // performance critical operations. It does a full depth-first search so can be | 182 // performance critical operations. It does a full depth-first search so can be |
169 // used when the structure is not directly known. However, unlike with | 183 // used when the structure is not directly known. However, unlike with |
170 // |innerText()|, the search depth and breadth are limited to a fixed threshold. | 184 // |innerText()|, the search depth and breadth are limited to a fixed threshold. |
171 // Whitespace is trimmed from text accumulated at descendant nodes. | 185 // Whitespace is trimmed from text accumulated at descendant nodes. |
172 string16 FindChildText(const WebElement& element) { | 186 string16 FindChildText(const WebNode& node) { |
173 WebNode child = element.firstChild(); | 187 if (node.isTextNode()) |
| 188 return node.nodeValue(); |
| 189 |
| 190 WebNode child = node.firstChild(); |
174 | 191 |
175 const int kChildSearchDepth = 10; | 192 const int kChildSearchDepth = 10; |
176 string16 element_text = FindChildTextInner(child, kChildSearchDepth); | 193 string16 node_text = FindChildTextInner(child, kChildSearchDepth); |
177 TrimWhitespace(element_text, TRIM_ALL, &element_text); | 194 TrimWhitespace(node_text, TRIM_ALL, &node_text); |
178 return element_text; | 195 return node_text; |
179 } | 196 } |
180 | 197 |
181 // Helper for |InferLabelForElement()| that infers a label, if possible, from | 198 // Helper for |InferLabelForElement()| that infers a label, if possible, from |
182 // a previous sibling of |element|. | 199 // a previous sibling of |element|, |
| 200 // e.g. Some Text <input ...> |
| 201 // or Some <span>Text</span> <input ...> |
| 202 // or <p>Some Text</p><input ...> |
| 203 // or <label>Some Text</label> <input ...> |
| 204 // or Some Text <img><input ...> |
| 205 // or <b>Some Text</b><br/> <input ...>. |
183 string16 InferLabelFromPrevious(const WebFormControlElement& element) { | 206 string16 InferLabelFromPrevious(const WebFormControlElement& element) { |
184 string16 inferred_label; | 207 string16 inferred_label; |
185 WebNode previous = element.previousSibling(); | 208 WebNode previous = element; |
186 if (previous.isNull()) | 209 while (true) { |
187 return string16(); | 210 previous = previous.previousSibling(); |
| 211 if (previous.isNull()) |
| 212 break; |
188 | 213 |
189 // Check for text immediately before the |element|. | 214 // Skip over comments. |
190 if (previous.isTextNode()) { | 215 WebNode::NodeType node_type = previous.nodeType(); |
191 inferred_label = previous.nodeValue(); | 216 if (node_type == WebNode::CommentNode) |
192 TrimWhitespace(inferred_label, TRIM_ALL, &inferred_label); | 217 continue; |
| 218 |
| 219 // Otherwise, only consider normal HTML elements and their contents. |
| 220 if (node_type != WebNode::TextNode && |
| 221 node_type != WebNode::ElementNode) |
| 222 break; |
| 223 |
| 224 // A label might be split across multiple "lightweight" nodes. |
| 225 // Coalesce any text contained in multiple consecutive |
| 226 // (a) plain text nodes or |
| 227 // (b) inline HTML elements that are essentially equivalent to text nodes. |
| 228 if (previous.isTextNode() || |
| 229 HasTagName(previous, "b") || HasTagName(previous, "strong") || |
| 230 HasTagName(previous, "span") || HasTagName(previous, "font")) { |
| 231 string16 value = FindChildText(previous); |
| 232 // A text node's value will be empty if it is for a line break. |
| 233 bool add_space = previous.isTextNode() && value.empty(); |
| 234 inferred_label = |
| 235 CombineAndCollapseWhitespace(value, inferred_label, add_space); |
| 236 continue; |
| 237 } |
| 238 |
| 239 // If we have identified a partial label and have reached a non-lightweight |
| 240 // element, consider the label to be complete. |
| 241 string16 trimmed_label; |
| 242 TrimWhitespace(inferred_label, TRIM_ALL, &trimmed_label); |
| 243 if (!trimmed_label.empty()) |
| 244 break; |
| 245 |
| 246 // <img> and <br> tags often appear between the input element and its |
| 247 // label text, so skip over them. |
| 248 if (HasTagName(previous, "img") || HasTagName(previous, "br")) |
| 249 continue; |
| 250 |
| 251 // We only expect <p> and <label> tags to contain the full label text. |
| 252 if (HasTagName(previous, "p") || HasTagName(previous, "label")) |
| 253 inferred_label = FindChildText(previous); |
| 254 |
| 255 break; |
193 } | 256 } |
194 | 257 |
195 // If we didn't find text, check for an immediately preceding text container, | 258 TrimWhitespace(inferred_label, TRIM_ALL, &inferred_label); |
196 // e.g. <p>Some Text</p><input ...> | |
197 // Note the lack of whitespace between <p> and <input> elements. | |
198 if (inferred_label.empty() && previous.isElementNode()) { | |
199 WebElement previous_element = previous.to<WebElement>(); | |
200 if (IsTextContainerElement(previous_element)) | |
201 inferred_label = FindChildText(previous_element); | |
202 } | |
203 | |
204 // If we didn't find one immediately preceding, check for a text container | |
205 // separated from this node only by whitespace, | |
206 // e.g. <p>Some Text</p> <input ...> | |
207 // Note the whitespace between <p> and <input> elements. | |
208 if (inferred_label.empty() && previous.isTextNode()) { | |
209 WebNode sibling = previous.previousSibling(); | |
210 if (!sibling.isNull() && sibling.isElementNode()) { | |
211 WebElement previous_element = sibling.to<WebElement>(); | |
212 if (IsTextContainerElement(previous_element)) | |
213 inferred_label = FindChildText(previous_element); | |
214 } | |
215 } | |
216 | |
217 // Look for a text node prior to <img> or <br> tags, | |
218 // e.g. Some Text<img/><input ...> or Some Text<br/><input ...> | |
219 while (inferred_label.empty() && !previous.isNull()) { | |
220 if (previous.isTextNode()) { | |
221 inferred_label = previous.nodeValue(); | |
222 TrimWhitespace(inferred_label, TRIM_ALL, &inferred_label); | |
223 } else if (previous.isElementNode()) { | |
224 WebElement previous_element = previous.to<WebElement>(); | |
225 if (IsTextContainerElement(previous_element)) | |
226 inferred_label = FindChildText(previous_element); | |
227 else if (!HasTagName(previous, "img") && !HasTagName(previous, "br")) | |
228 break; | |
229 } else { | |
230 break; | |
231 } | |
232 | |
233 previous = previous.previousSibling(); | |
234 } | |
235 | |
236 // Look for a label node prior to the <input> tag, | |
237 // e.g. <label>Some Text</label><input ...> | |
238 while (inferred_label.empty() && !previous.isNull()) { | |
239 if (previous.isTextNode()) { | |
240 inferred_label = previous.nodeValue(); | |
241 TrimWhitespace(inferred_label, TRIM_ALL, &inferred_label); | |
242 } else if (HasTagName(previous, "label")) { | |
243 inferred_label = FindChildText(previous.to<WebElement>()); | |
244 } else { | |
245 break; | |
246 } | |
247 | |
248 previous = previous.previousSibling(); | |
249 } | |
250 | |
251 return inferred_label; | 259 return inferred_label; |
252 } | 260 } |
253 | 261 |
254 // Helper for |InferLabelForElement()| that infers a label, if possible, from | 262 // Helper for |InferLabelForElement()| that infers a label, if possible, from |
255 // enclosing list item, | 263 // enclosing list item, |
256 // e.g. <li>Some Text<input ...><input ...><input ...></tr> | 264 // e.g. <li>Some Text<input ...><input ...><input ...></tr> |
257 string16 InferLabelFromListItem(const WebFormControlElement& element) { | 265 string16 InferLabelFromListItem(const WebFormControlElement& element) { |
258 WebNode parent = element.parentNode(); | 266 WebNode parent = element.parentNode(); |
259 while (!parent.isNull() && parent.isElementNode() && | 267 while (!parent.isNull() && parent.isElementNode() && |
260 !parent.to<WebElement>().hasTagName("li")) { | 268 !parent.to<WebElement>().hasTagName("li")) { |
261 parent = parent.parentNode(); | 269 parent = parent.parentNode(); |
262 } | 270 } |
263 | 271 |
264 if (!parent.isNull() && HasTagName(parent, "li")) | 272 if (!parent.isNull() && HasTagName(parent, "li")) |
265 return FindChildText(parent.to<WebElement>()); | 273 return FindChildText(parent); |
266 | 274 |
267 return string16(); | 275 return string16(); |
268 } | 276 } |
269 | 277 |
270 // Helper for |InferLabelForElement()| that infers a label, if possible, from | 278 // Helper for |InferLabelForElement()| that infers a label, if possible, from |
271 // surrounding table structure, | 279 // surrounding table structure, |
272 // e.g. <tr><td>Some Text</td><td><input ...></td></tr> | 280 // e.g. <tr><td>Some Text</td><td><input ...></td></tr> |
273 // or <tr><th>Some Text</th><td><input ...></td></tr> | 281 // or <tr><th>Some Text</th><td><input ...></td></tr> |
274 // or <tr><td><b>Some Text</b></td><td><b><input ...></b></td></tr> | 282 // or <tr><td><b>Some Text</b></td><td><b><input ...></b></td></tr> |
275 // or <tr><th><b>Some Text</b></th><td><b><input ...></b></td></tr> | 283 // or <tr><th><b>Some Text</b></th><td><b><input ...></b></td></tr> |
276 string16 InferLabelFromTableColumn(const WebFormControlElement& element) { | 284 string16 InferLabelFromTableColumn(const WebFormControlElement& element) { |
277 WebNode parent = element.parentNode(); | 285 WebNode parent = element.parentNode(); |
278 while (!parent.isNull() && parent.isElementNode() && | 286 while (!parent.isNull() && parent.isElementNode() && |
279 !parent.to<WebElement>().hasTagName("td")) { | 287 !parent.to<WebElement>().hasTagName("td")) { |
280 parent = parent.parentNode(); | 288 parent = parent.parentNode(); |
281 } | 289 } |
282 | 290 |
283 if (parent.isNull()) | 291 if (parent.isNull()) |
284 return string16(); | 292 return string16(); |
285 | 293 |
286 // Check all previous siblings, skipping non-element nodes, until we find a | 294 // Check all previous siblings, skipping non-element nodes, until we find a |
287 // non-empty text block. | 295 // non-empty text block. |
288 string16 inferred_label; | 296 string16 inferred_label; |
289 WebNode previous = parent.previousSibling(); | 297 WebNode previous = parent.previousSibling(); |
290 while (inferred_label.empty() && !previous.isNull()) { | 298 while (inferred_label.empty() && !previous.isNull()) { |
291 if (HasTagName(previous, "td") || HasTagName(previous, "th")) | 299 if (HasTagName(previous, "td") || HasTagName(previous, "th")) |
292 inferred_label = FindChildText(previous.to<WebElement>()); | 300 inferred_label = FindChildText(previous); |
293 | 301 |
294 previous = previous.previousSibling(); | 302 previous = previous.previousSibling(); |
295 } | 303 } |
296 | 304 |
297 return inferred_label; | 305 return inferred_label; |
298 } | 306 } |
299 | 307 |
300 // Helper for |InferLabelForElement()| that infers a label, if possible, from | 308 // Helper for |InferLabelForElement()| that infers a label, if possible, from |
301 // surrounding table structure, | 309 // surrounding table structure, |
302 // e.g. <tr><td>Some Text</td></tr><tr><td><input ...></td></tr> | 310 // e.g. <tr><td>Some Text</td></tr><tr><td><input ...></td></tr> |
303 string16 InferLabelFromTableRow(const WebFormControlElement& element) { | 311 string16 InferLabelFromTableRow(const WebFormControlElement& element) { |
304 WebNode parent = element.parentNode(); | 312 WebNode parent = element.parentNode(); |
305 while (!parent.isNull() && parent.isElementNode() && | 313 while (!parent.isNull() && parent.isElementNode() && |
306 !parent.to<WebElement>().hasTagName("tr")) { | 314 !parent.to<WebElement>().hasTagName("tr")) { |
307 parent = parent.parentNode(); | 315 parent = parent.parentNode(); |
308 } | 316 } |
309 | 317 |
310 if (parent.isNull()) | 318 if (parent.isNull()) |
311 return string16(); | 319 return string16(); |
312 | 320 |
313 // Check all previous siblings, skipping non-element nodes, until we find a | 321 // Check all previous siblings, skipping non-element nodes, until we find a |
314 // non-empty text block. | 322 // non-empty text block. |
315 string16 inferred_label; | 323 string16 inferred_label; |
316 WebNode previous = parent.previousSibling(); | 324 WebNode previous = parent.previousSibling(); |
317 while (inferred_label.empty() && !previous.isNull()) { | 325 while (inferred_label.empty() && !previous.isNull()) { |
318 if (HasTagName(previous, "tr")) | 326 if (HasTagName(previous, "tr")) |
319 inferred_label = FindChildText(previous.to<WebElement>()); | 327 inferred_label = FindChildText(previous); |
320 | 328 |
321 previous = previous.previousSibling(); | 329 previous = previous.previousSibling(); |
322 } | 330 } |
323 | 331 |
324 return inferred_label; | 332 return inferred_label; |
325 } | 333 } |
326 | 334 |
327 // Helper for |InferLabelForElement()| that infers a label, if possible, from | 335 // Helper for |InferLabelForElement()| that infers a label, if possible, from |
328 // a surrounding div table, | 336 // a surrounding div table, |
329 // e.g. <div>Some Text<span><input ...></span></div> | 337 // e.g. <div>Some Text<span><input ...></span></div> |
330 // e.g. <div>Some Text</div><div><input ...></div> | 338 // e.g. <div>Some Text</div><div><input ...></div> |
331 string16 InferLabelFromDivTable(const WebFormControlElement& element) { | 339 string16 InferLabelFromDivTable(const WebFormControlElement& element) { |
332 WebNode node = element.parentNode(); | 340 WebNode node = element.parentNode(); |
333 while (!node.isNull() && node.isElementNode() && | 341 bool looking_for_parent = true; |
334 !node.to<WebElement>().hasTagName("div") && | |
335 // If the element is in a table, its label most likely is too. | |
336 !node.to<WebElement>().hasTagName("table")) { | |
337 node = node.parentNode(); | |
338 } | |
339 | 342 |
340 if (node.isNull() || !HasTagName(node, "div")) | 343 // Search the sibling and parent <div>s until we find a candidate label. |
341 return string16(); | |
342 | |
343 // Search the siblings while we cannot find label. | |
344 string16 inferred_label; | 344 string16 inferred_label; |
345 while (inferred_label.empty() && !node.isNull()) { | 345 while (inferred_label.empty() && !node.isNull()) { |
346 if (HasTagName(node, "div")) | 346 if (HasTagName(node, "div")) { |
347 inferred_label = FindChildText(node.to<WebElement>()); | 347 looking_for_parent = false; |
| 348 inferred_label = FindChildText(node); |
| 349 } else if (looking_for_parent && |
| 350 (HasTagName(node, "table") || HasTagName(node, "fieldset"))) { |
| 351 // If the element is in a table or fieldset, its label most likely is too. |
| 352 break; |
| 353 } |
348 | 354 |
349 node = node.previousSibling(); | 355 if (node.previousSibling().isNull()) { |
| 356 // If there are no more siblings, continue walking up the tree. |
| 357 looking_for_parent = true; |
| 358 } |
| 359 |
| 360 if (looking_for_parent) |
| 361 node = node.parentNode(); |
| 362 else |
| 363 node = node.previousSibling(); |
350 } | 364 } |
351 | 365 |
352 return inferred_label; | 366 return inferred_label; |
353 } | 367 } |
354 | 368 |
355 // Helper for |InferLabelForElement()| that infers a label, if possible, from | 369 // Helper for |InferLabelForElement()| that infers a label, if possible, from |
356 // a surrounding definition list, | 370 // a surrounding definition list, |
357 // e.g. <dl><dt>Some Text</dt><dd><input ...></dd></dl> | 371 // e.g. <dl><dt>Some Text</dt><dd><input ...></dd></dl> |
358 // e.g. <dl><dt><b>Some Text</b></dt><dd><b><input ...></b></dd></dl> | 372 // e.g. <dl><dt><b>Some Text</b></dt><dd><b><input ...></b></dd></dl> |
359 string16 InferLabelFromDefinitionList(const WebFormControlElement& element) { | 373 string16 InferLabelFromDefinitionList(const WebFormControlElement& element) { |
360 WebNode parent = element.parentNode(); | 374 WebNode parent = element.parentNode(); |
361 while (!parent.isNull() && parent.isElementNode() && | 375 while (!parent.isNull() && parent.isElementNode() && |
362 !parent.to<WebElement>().hasTagName("dd")) | 376 !parent.to<WebElement>().hasTagName("dd")) |
363 parent = parent.parentNode(); | 377 parent = parent.parentNode(); |
364 | 378 |
365 if (parent.isNull() || !HasTagName(parent, "dd")) | 379 if (parent.isNull() || !HasTagName(parent, "dd")) |
366 return string16(); | 380 return string16(); |
367 | 381 |
368 // Skip by any intervening text nodes. | 382 // Skip by any intervening text nodes. |
369 WebNode previous = parent.previousSibling(); | 383 WebNode previous = parent.previousSibling(); |
370 while (!previous.isNull() && previous.isTextNode()) | 384 while (!previous.isNull() && previous.isTextNode()) |
371 previous = previous.previousSibling(); | 385 previous = previous.previousSibling(); |
372 | 386 |
373 if (previous.isNull() || !HasTagName(previous, "dt")) | 387 if (previous.isNull() || !HasTagName(previous, "dt")) |
374 return string16(); | 388 return string16(); |
375 | 389 |
376 return FindChildText(previous.to<WebElement>()); | 390 return FindChildText(previous); |
377 } | 391 } |
378 | 392 |
379 // Infers corresponding label for |element| from surrounding context in the DOM, | 393 // Infers corresponding label for |element| from surrounding context in the DOM, |
380 // e.g. the contents of the preceding <p> tag or text element. | 394 // e.g. the contents of the preceding <p> tag or text element. |
381 string16 InferLabelForElement(const WebFormControlElement& element) { | 395 string16 InferLabelForElement(const WebFormControlElement& element) { |
382 string16 inferred_label = InferLabelFromPrevious(element); | 396 string16 inferred_label = InferLabelFromPrevious(element); |
383 if (!inferred_label.empty()) | 397 if (!inferred_label.empty()) |
384 return inferred_label; | 398 return inferred_label; |
385 | 399 |
386 // If we didn't find a label, check for list item case. | 400 // If we didn't find a label, check for list item case. |
(...skipping 251 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
638 | 652 |
639 // static | 653 // static |
640 string16 FormManager::LabelForElement(const WebFormControlElement& element) { | 654 string16 FormManager::LabelForElement(const WebFormControlElement& element) { |
641 // Don't scrape labels for elements we can't possibly autofill anyway. | 655 // Don't scrape labels for elements we can't possibly autofill anyway. |
642 if (!IsAutofillableElement(element)) | 656 if (!IsAutofillableElement(element)) |
643 return string16(); | 657 return string16(); |
644 | 658 |
645 WebNodeList labels = element.document().getElementsByTagName("label"); | 659 WebNodeList labels = element.document().getElementsByTagName("label"); |
646 for (unsigned i = 0; i < labels.length(); ++i) { | 660 for (unsigned i = 0; i < labels.length(); ++i) { |
647 WebLabelElement label = labels.item(i).to<WebLabelElement>(); | 661 WebLabelElement label = labels.item(i).to<WebLabelElement>(); |
648 DCHECK(label.hasTagName("label")); | 662 WebElement corresponding_control = label.correspondingControl(); |
649 if (label.correspondingControl() == element) | 663 if (corresponding_control == element || |
| 664 (corresponding_control.isNull() && |
| 665 label.getAttribute("for") == element.nameForAutofill())) { |
650 return FindChildText(label); | 666 return FindChildText(label); |
| 667 } |
651 } | 668 } |
652 | 669 |
653 // Infer the label from context if not found in label element. | 670 // Infer the label from context if not found in label element. |
654 return InferLabelForElement(element); | 671 return InferLabelForElement(element); |
655 } | 672 } |
656 | 673 |
657 // static | 674 // static |
658 bool FormManager::WebFormElementToFormData(const WebFormElement& element, | 675 bool FormManager::WebFormElementToFormData(const WebFormElement& element, |
659 RequirementsMask requirements, | 676 RequirementsMask requirements, |
660 ExtractMask extract_mask, | 677 ExtractMask extract_mask, |
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
725 // Loop through the label elements inside the form element. For each label | 742 // Loop through the label elements inside the form element. For each label |
726 // element, get the corresponding form control element, use the form control | 743 // element, get the corresponding form control element, use the form control |
727 // element's name as a key into the <name, FormField> map to find the | 744 // element's name as a key into the <name, FormField> map to find the |
728 // previously created FormField and set the FormField's label to the | 745 // previously created FormField and set the FormField's label to the |
729 // label.firstChild().nodeValue() of the label element. | 746 // label.firstChild().nodeValue() of the label element. |
730 WebNodeList labels = element.getElementsByTagName("label"); | 747 WebNodeList labels = element.getElementsByTagName("label"); |
731 for (unsigned i = 0; i < labels.length(); ++i) { | 748 for (unsigned i = 0; i < labels.length(); ++i) { |
732 WebLabelElement label = labels.item(i).to<WebLabelElement>(); | 749 WebLabelElement label = labels.item(i).to<WebLabelElement>(); |
733 WebFormControlElement field_element = | 750 WebFormControlElement field_element = |
734 label.correspondingControl().to<WebFormControlElement>(); | 751 label.correspondingControl().to<WebFormControlElement>(); |
735 if (field_element.isNull() || | 752 |
| 753 string16 element_name; |
| 754 if (field_element.isNull()) { |
| 755 // Sometimes site authors will incorrectly specify the corresponding |
| 756 // field element's name rather than its id, so we compensate here. |
| 757 element_name = label.getAttribute("for"); |
| 758 } else if ( |
736 !field_element.isFormControlElement() || | 759 !field_element.isFormControlElement() || |
737 field_element.formControlType() == WebString::fromUTF8("hidden")) | 760 field_element.formControlType() == WebString::fromUTF8("hidden")) { |
738 continue; | 761 continue; |
| 762 } else { |
| 763 element_name = field_element.nameForAutofill(); |
| 764 } |
739 | 765 |
740 std::map<string16, FormField*>::iterator iter = | 766 std::map<string16, FormField*>::iterator iter = name_map.find(element_name); |
741 name_map.find(field_element.nameForAutofill()); | |
742 // Concatenate labels because some sites might have multiple label | 767 // Concatenate labels because some sites might have multiple label |
743 // candidates. | 768 // candidates. |
744 if (iter != name_map.end()) | 769 if (iter != name_map.end()) |
745 iter->second->label += FindChildText(label); | 770 iter->second->label += FindChildText(label); |
746 } | 771 } |
747 | 772 |
748 // Loop through the form control elements, extracting the label text from | 773 // Loop through the form control elements, extracting the label text from |
749 // the DOM. We use the |fields_extracted| vector to make sure we assign the | 774 // the DOM. We use the |fields_extracted| vector to make sure we assign the |
750 // extracted label to the correct field, as it's possible |form_fields| will | 775 // extracted label to the correct field, as it's possible |form_fields| will |
751 // not contain all of the elements in |control_elements|. | 776 // not contain all of the elements in |control_elements|. |
(...skipping 317 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1069 if (element_name == form.name && action == form.action) { | 1094 if (element_name == form.name && action == form.action) { |
1070 *form_element = *form_iter; | 1095 *form_element = *form_iter; |
1071 return true; | 1096 return true; |
1072 } | 1097 } |
1073 } | 1098 } |
1074 | 1099 |
1075 return false; | 1100 return false; |
1076 } | 1101 } |
1077 | 1102 |
1078 } // namespace autofill | 1103 } // namespace autofill |
OLD | NEW |