OLD | NEW |
| (Empty) |
1 // Copyright 2013 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "chrome/browser/importer/bookmark_html_reader.h" | |
6 | |
7 #include "base/callback.h" | |
8 #include "base/file_util.h" | |
9 #include "base/i18n/icu_string_conversions.h" | |
10 #include "base/strings/string_number_conversions.h" | |
11 #include "base/strings/string_split.h" | |
12 #include "base/strings/string_util.h" | |
13 #include "base/time/time.h" | |
14 #include "chrome/browser/favicon/favicon_util.h" | |
15 #include "chrome/common/importer/imported_bookmark_entry.h" | |
16 #include "chrome/common/importer/imported_favicon_usage.h" | |
17 #include "content/public/common/url_constants.h" | |
18 #include "net/base/data_url.h" | |
19 #include "net/base/escape.h" | |
20 #include "url/gurl.h" | |
21 | |
22 namespace { | |
23 | |
24 // Fetches the given |attribute| value from the |attribute_list|. Returns true | |
25 // if successful, and |value| will contain the value. | |
26 bool GetAttribute(const std::string& attribute_list, | |
27 const std::string& attribute, | |
28 std::string* value) { | |
29 const char kQuote[] = "\""; | |
30 | |
31 size_t begin = attribute_list.find(attribute + "=" + kQuote); | |
32 if (begin == std::string::npos) | |
33 return false; // Can't find the attribute. | |
34 | |
35 begin += attribute.size() + 2; | |
36 size_t end = begin + 1; | |
37 | |
38 while (end < attribute_list.size()) { | |
39 if (attribute_list[end] == '"' && | |
40 attribute_list[end - 1] != '\\') { | |
41 break; | |
42 } | |
43 end++; | |
44 } | |
45 | |
46 if (end == attribute_list.size()) | |
47 return false; // The value is not quoted. | |
48 | |
49 *value = attribute_list.substr(begin, end - begin); | |
50 return true; | |
51 } | |
52 | |
53 // Given the URL of a page and a favicon data URL, adds an appropriate record | |
54 // to the given favicon usage vector. | |
55 void DataURLToFaviconUsage( | |
56 const GURL& link_url, | |
57 const GURL& favicon_data, | |
58 std::vector<ImportedFaviconUsage>* favicons) { | |
59 if (!link_url.is_valid() || !favicon_data.is_valid() || | |
60 !favicon_data.SchemeIs(chrome::kDataScheme)) | |
61 return; | |
62 | |
63 // Parse the data URL. | |
64 std::string mime_type, char_set, data; | |
65 if (!net::DataURL::Parse(favicon_data, &mime_type, &char_set, &data) || | |
66 data.empty()) | |
67 return; | |
68 | |
69 ImportedFaviconUsage usage; | |
70 if (!FaviconUtil::ReencodeFavicon( | |
71 reinterpret_cast<const unsigned char*>(&data[0]), | |
72 data.size(), &usage.png_data)) | |
73 return; // Unable to decode. | |
74 | |
75 // We need to make up a URL for the favicon. We use a version of the page's | |
76 // URL so that we can be sure it will not collide. | |
77 usage.favicon_url = GURL(std::string("made-up-favicon:") + link_url.spec()); | |
78 | |
79 // We only have one URL per favicon for Firefox 2 bookmarks. | |
80 usage.urls.insert(link_url); | |
81 | |
82 favicons->push_back(usage); | |
83 } | |
84 | |
85 } // namespace | |
86 | |
87 namespace bookmark_html_reader { | |
88 | |
89 void ImportBookmarksFile( | |
90 const base::Callback<bool(void)>& cancellation_callback, | |
91 const base::Callback<bool(const GURL&)>& valid_url_callback, | |
92 const base::FilePath& file_path, | |
93 std::vector<ImportedBookmarkEntry>* bookmarks, | |
94 std::vector<ImportedFaviconUsage>* favicons) { | |
95 std::string content; | |
96 file_util::ReadFileToString(file_path, &content); | |
97 std::vector<std::string> lines; | |
98 base::SplitString(content, '\n', &lines); | |
99 | |
100 base::string16 last_folder; | |
101 bool last_folder_on_toolbar = false; | |
102 bool last_folder_is_empty = true; | |
103 bool has_subfolder = false; | |
104 base::Time last_folder_add_date; | |
105 std::vector<base::string16> path; | |
106 size_t toolbar_folder_index = 0; | |
107 std::string charset; | |
108 for (size_t i = 0; | |
109 i < lines.size() && | |
110 (cancellation_callback.is_null() || !cancellation_callback.Run()); | |
111 ++i) { | |
112 std::string line; | |
113 TrimString(lines[i], " ", &line); | |
114 | |
115 // Get the encoding of the bookmark file. | |
116 if (internal::ParseCharsetFromLine(line, &charset)) | |
117 continue; | |
118 | |
119 // Get the folder name. | |
120 if (internal::ParseFolderNameFromLine(line, | |
121 charset, | |
122 &last_folder, | |
123 &last_folder_on_toolbar, | |
124 &last_folder_add_date)) { | |
125 continue; | |
126 } | |
127 | |
128 // Get the bookmark entry. | |
129 base::string16 title; | |
130 base::string16 shortcut; | |
131 GURL url, favicon; | |
132 base::Time add_date; | |
133 base::string16 post_data; | |
134 bool is_bookmark; | |
135 // TODO(jcampan): http://b/issue?id=1196285 we do not support POST based | |
136 // keywords yet. | |
137 is_bookmark = | |
138 internal::ParseBookmarkFromLine(line, charset, &title, | |
139 &url, &favicon, &shortcut, | |
140 &add_date, &post_data) || | |
141 internal::ParseMinimumBookmarkFromLine(line, charset, &title, &url); | |
142 | |
143 if (is_bookmark) | |
144 last_folder_is_empty = false; | |
145 | |
146 if (is_bookmark && | |
147 post_data.empty() && | |
148 (valid_url_callback.is_null() || valid_url_callback.Run(url))) { | |
149 if (toolbar_folder_index > path.size() && !path.empty()) { | |
150 NOTREACHED(); // error in parsing. | |
151 break; | |
152 } | |
153 | |
154 ImportedBookmarkEntry entry; | |
155 entry.creation_time = add_date; | |
156 entry.url = url; | |
157 entry.title = title; | |
158 | |
159 if (toolbar_folder_index) { | |
160 // The toolbar folder should be at the top level. | |
161 entry.in_toolbar = true; | |
162 entry.path.assign(path.begin() + toolbar_folder_index - 1, path.end()); | |
163 } else { | |
164 // Add this bookmark to the list of |bookmarks|. | |
165 if (!has_subfolder && !last_folder.empty()) { | |
166 path.push_back(last_folder); | |
167 last_folder.clear(); | |
168 } | |
169 entry.path.assign(path.begin(), path.end()); | |
170 } | |
171 bookmarks->push_back(entry); | |
172 | |
173 // Save the favicon. DataURLToFaviconUsage will handle the case where | |
174 // there is no favicon. | |
175 if (favicons) | |
176 DataURLToFaviconUsage(url, favicon, favicons); | |
177 | |
178 continue; | |
179 } | |
180 | |
181 // Bookmarks in sub-folder are encapsulated with <DL> tag. | |
182 if (StartsWithASCII(line, "<DL>", false)) { | |
183 has_subfolder = true; | |
184 if (!last_folder.empty()) { | |
185 path.push_back(last_folder); | |
186 last_folder.clear(); | |
187 } | |
188 if (last_folder_on_toolbar && !toolbar_folder_index) | |
189 toolbar_folder_index = path.size(); | |
190 | |
191 // Mark next folder empty as initial state. | |
192 last_folder_is_empty = true; | |
193 } else if (StartsWithASCII(line, "</DL>", false)) { | |
194 if (path.empty()) | |
195 break; // Mismatch <DL>. | |
196 | |
197 base::string16 folder_title = path.back(); | |
198 path.pop_back(); | |
199 | |
200 if (last_folder_is_empty) { | |
201 // Empty folder should be added explicitly. | |
202 ImportedBookmarkEntry entry; | |
203 entry.is_folder = true; | |
204 entry.creation_time = last_folder_add_date; | |
205 entry.title = folder_title; | |
206 if (toolbar_folder_index) { | |
207 // The toolbar folder should be at the top level. | |
208 // Make sure we don't add the toolbar folder itself if it is empty. | |
209 if (toolbar_folder_index <= path.size()) { | |
210 entry.in_toolbar = true; | |
211 entry.path.assign(path.begin() + toolbar_folder_index - 1, | |
212 path.end()); | |
213 bookmarks->push_back(entry); | |
214 } | |
215 } else { | |
216 // Add this folder to the list of |bookmarks|. | |
217 entry.path.assign(path.begin(), path.end()); | |
218 bookmarks->push_back(entry); | |
219 } | |
220 | |
221 // Parent folder include current one, so it's not empty. | |
222 last_folder_is_empty = false; | |
223 } | |
224 | |
225 if (toolbar_folder_index > path.size()) | |
226 toolbar_folder_index = 0; | |
227 } | |
228 } | |
229 } | |
230 | |
231 namespace internal { | |
232 | |
233 bool ParseCharsetFromLine(const std::string& line, std::string* charset) { | |
234 const char kCharset[] = "charset="; | |
235 if (StartsWithASCII(line, "<META", false) && | |
236 (line.find("CONTENT=\"") != std::string::npos || | |
237 line.find("content=\"") != std::string::npos)) { | |
238 size_t begin = line.find(kCharset); | |
239 if (begin == std::string::npos) | |
240 return false; | |
241 begin += std::string(kCharset).size(); | |
242 size_t end = line.find_first_of('\"', begin); | |
243 *charset = line.substr(begin, end - begin); | |
244 return true; | |
245 } | |
246 return false; | |
247 } | |
248 | |
249 bool ParseFolderNameFromLine(const std::string& line, | |
250 const std::string& charset, | |
251 base::string16* folder_name, | |
252 bool* is_toolbar_folder, | |
253 base::Time* add_date) { | |
254 const char kFolderOpen[] = "<DT><H3"; | |
255 const char kFolderClose[] = "</H3>"; | |
256 const char kToolbarFolderAttribute[] = "PERSONAL_TOOLBAR_FOLDER"; | |
257 const char kAddDateAttribute[] = "ADD_DATE"; | |
258 | |
259 if (!StartsWithASCII(line, kFolderOpen, true)) | |
260 return false; | |
261 | |
262 size_t end = line.find(kFolderClose); | |
263 size_t tag_end = line.rfind('>', end) + 1; | |
264 // If no end tag or start tag is broken, we skip to find the folder name. | |
265 if (end == std::string::npos || tag_end < arraysize(kFolderOpen)) | |
266 return false; | |
267 | |
268 base::CodepageToUTF16(line.substr(tag_end, end - tag_end), charset.c_str(), | |
269 base::OnStringConversionError::SKIP, folder_name); | |
270 *folder_name = net::UnescapeForHTML(*folder_name); | |
271 | |
272 std::string attribute_list = line.substr(arraysize(kFolderOpen), | |
273 tag_end - arraysize(kFolderOpen) - 1); | |
274 std::string value; | |
275 | |
276 // Add date | |
277 if (GetAttribute(attribute_list, kAddDateAttribute, &value)) { | |
278 int64 time; | |
279 base::StringToInt64(value, &time); | |
280 // Upper bound it at 32 bits. | |
281 if (0 < time && time < (1LL << 32)) | |
282 *add_date = base::Time::FromTimeT(time); | |
283 } | |
284 | |
285 if (GetAttribute(attribute_list, kToolbarFolderAttribute, &value) && | |
286 LowerCaseEqualsASCII(value, "true")) | |
287 *is_toolbar_folder = true; | |
288 else | |
289 *is_toolbar_folder = false; | |
290 | |
291 return true; | |
292 } | |
293 | |
294 bool ParseBookmarkFromLine(const std::string& line, | |
295 const std::string& charset, | |
296 base::string16* title, | |
297 GURL* url, | |
298 GURL* favicon, | |
299 base::string16* shortcut, | |
300 base::Time* add_date, | |
301 base::string16* post_data) { | |
302 const char kItemOpen[] = "<DT><A"; | |
303 const char kItemClose[] = "</A>"; | |
304 const char kFeedURLAttribute[] = "FEEDURL"; | |
305 const char kHrefAttribute[] = "HREF"; | |
306 const char kIconAttribute[] = "ICON"; | |
307 const char kShortcutURLAttribute[] = "SHORTCUTURL"; | |
308 const char kAddDateAttribute[] = "ADD_DATE"; | |
309 const char kPostDataAttribute[] = "POST_DATA"; | |
310 | |
311 title->clear(); | |
312 *url = GURL(); | |
313 *favicon = GURL(); | |
314 shortcut->clear(); | |
315 post_data->clear(); | |
316 *add_date = base::Time(); | |
317 | |
318 if (!StartsWithASCII(line, kItemOpen, true)) | |
319 return false; | |
320 | |
321 size_t end = line.find(kItemClose); | |
322 size_t tag_end = line.rfind('>', end) + 1; | |
323 if (end == std::string::npos || tag_end < arraysize(kItemOpen)) | |
324 return false; // No end tag or start tag is broken. | |
325 | |
326 std::string attribute_list = line.substr(arraysize(kItemOpen), | |
327 tag_end - arraysize(kItemOpen) - 1); | |
328 | |
329 // We don't import Live Bookmark folders, which is Firefox's RSS reading | |
330 // feature, since the user never necessarily bookmarked them and we don't | |
331 // have this feature to update their contents. | |
332 std::string value; | |
333 if (GetAttribute(attribute_list, kFeedURLAttribute, &value)) | |
334 return false; | |
335 | |
336 // Title | |
337 base::CodepageToUTF16(line.substr(tag_end, end - tag_end), charset.c_str(), | |
338 base::OnStringConversionError::SKIP, title); | |
339 *title = net::UnescapeForHTML(*title); | |
340 | |
341 // URL | |
342 if (GetAttribute(attribute_list, kHrefAttribute, &value)) { | |
343 base::string16 url16; | |
344 base::CodepageToUTF16(value, charset.c_str(), | |
345 base::OnStringConversionError::SKIP, &url16); | |
346 url16 = net::UnescapeForHTML(url16); | |
347 | |
348 *url = GURL(url16); | |
349 } | |
350 | |
351 // Favicon | |
352 if (GetAttribute(attribute_list, kIconAttribute, &value)) | |
353 *favicon = GURL(value); | |
354 | |
355 // Keyword | |
356 if (GetAttribute(attribute_list, kShortcutURLAttribute, &value)) { | |
357 base::CodepageToUTF16(value, charset.c_str(), | |
358 base::OnStringConversionError::SKIP, shortcut); | |
359 *shortcut = net::UnescapeForHTML(*shortcut); | |
360 } | |
361 | |
362 // Add date | |
363 if (GetAttribute(attribute_list, kAddDateAttribute, &value)) { | |
364 int64 time; | |
365 base::StringToInt64(value, &time); | |
366 // Upper bound it at 32 bits. | |
367 if (0 < time && time < (1LL << 32)) | |
368 *add_date = base::Time::FromTimeT(time); | |
369 } | |
370 | |
371 // Post data. | |
372 if (GetAttribute(attribute_list, kPostDataAttribute, &value)) { | |
373 base::CodepageToUTF16(value, charset.c_str(), | |
374 base::OnStringConversionError::SKIP, post_data); | |
375 *post_data = net::UnescapeForHTML(*post_data); | |
376 } | |
377 | |
378 return true; | |
379 } | |
380 | |
381 bool ParseMinimumBookmarkFromLine(const std::string& line, | |
382 const std::string& charset, | |
383 base::string16* title, | |
384 GURL* url) { | |
385 const char kItemOpen[] = "<DT><A"; | |
386 const char kItemClose[] = "</"; | |
387 const char kHrefAttributeUpper[] = "HREF"; | |
388 const char kHrefAttributeLower[] = "href"; | |
389 | |
390 title->clear(); | |
391 *url = GURL(); | |
392 | |
393 // Case-insensitive check of open tag. | |
394 if (!StartsWithASCII(line, kItemOpen, false)) | |
395 return false; | |
396 | |
397 // Find any close tag. | |
398 size_t end = line.find(kItemClose); | |
399 size_t tag_end = line.rfind('>', end) + 1; | |
400 if (end == std::string::npos || tag_end < arraysize(kItemOpen)) | |
401 return false; // No end tag or start tag is broken. | |
402 | |
403 std::string attribute_list = line.substr(arraysize(kItemOpen), | |
404 tag_end - arraysize(kItemOpen) - 1); | |
405 | |
406 // Title | |
407 base::CodepageToUTF16(line.substr(tag_end, end - tag_end), charset.c_str(), | |
408 base::OnStringConversionError::SKIP, title); | |
409 *title = net::UnescapeForHTML(*title); | |
410 | |
411 // URL | |
412 std::string value; | |
413 if (GetAttribute(attribute_list, kHrefAttributeUpper, &value) || | |
414 GetAttribute(attribute_list, kHrefAttributeLower, &value)) { | |
415 if (charset.length() != 0) { | |
416 base::string16 url16; | |
417 base::CodepageToUTF16(value, charset.c_str(), | |
418 base::OnStringConversionError::SKIP, &url16); | |
419 url16 = net::UnescapeForHTML(url16); | |
420 | |
421 *url = GURL(url16); | |
422 } else { | |
423 *url = GURL(value); | |
424 } | |
425 } | |
426 | |
427 return true; | |
428 } | |
429 | |
430 } // namespace internal | |
431 | |
432 } // namespace bookmark_html_reader | |
OLD | NEW |