Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(89)

Side by Side Diff: chrome/browser/importer/bookmark_html_reader.cc

Issue 18501013: Move most importer code to chrome/utility/importer (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: CanImport Created 7 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/importer/bookmark_html_reader.h"
6
7 #include "base/callback.h"
8 #include "base/file_util.h"
9 #include "base/i18n/icu_string_conversions.h"
10 #include "base/strings/string_number_conversions.h"
11 #include "base/strings/string_split.h"
12 #include "base/strings/string_util.h"
13 #include "base/time/time.h"
14 #include "chrome/browser/favicon/favicon_util.h"
15 #include "chrome/common/importer/imported_bookmark_entry.h"
16 #include "chrome/common/importer/imported_favicon_usage.h"
17 #include "content/public/common/url_constants.h"
18 #include "net/base/data_url.h"
19 #include "net/base/escape.h"
20 #include "url/gurl.h"
21
22 namespace {
23
24 // Fetches the given |attribute| value from the |attribute_list|. Returns true
25 // if successful, and |value| will contain the value.
26 bool GetAttribute(const std::string& attribute_list,
27 const std::string& attribute,
28 std::string* value) {
29 const char kQuote[] = "\"";
30
31 size_t begin = attribute_list.find(attribute + "=" + kQuote);
32 if (begin == std::string::npos)
33 return false; // Can't find the attribute.
34
35 begin += attribute.size() + 2;
36 size_t end = begin + 1;
37
38 while (end < attribute_list.size()) {
39 if (attribute_list[end] == '"' &&
40 attribute_list[end - 1] != '\\') {
41 break;
42 }
43 end++;
44 }
45
46 if (end == attribute_list.size())
47 return false; // The value is not quoted.
48
49 *value = attribute_list.substr(begin, end - begin);
50 return true;
51 }
52
53 // Given the URL of a page and a favicon data URL, adds an appropriate record
54 // to the given favicon usage vector.
55 void DataURLToFaviconUsage(
56 const GURL& link_url,
57 const GURL& favicon_data,
58 std::vector<ImportedFaviconUsage>* favicons) {
59 if (!link_url.is_valid() || !favicon_data.is_valid() ||
60 !favicon_data.SchemeIs(chrome::kDataScheme))
61 return;
62
63 // Parse the data URL.
64 std::string mime_type, char_set, data;
65 if (!net::DataURL::Parse(favicon_data, &mime_type, &char_set, &data) ||
66 data.empty())
67 return;
68
69 ImportedFaviconUsage usage;
70 if (!FaviconUtil::ReencodeFavicon(
71 reinterpret_cast<const unsigned char*>(&data[0]),
72 data.size(), &usage.png_data))
73 return; // Unable to decode.
74
75 // We need to make up a URL for the favicon. We use a version of the page's
76 // URL so that we can be sure it will not collide.
77 usage.favicon_url = GURL(std::string("made-up-favicon:") + link_url.spec());
78
79 // We only have one URL per favicon for Firefox 2 bookmarks.
80 usage.urls.insert(link_url);
81
82 favicons->push_back(usage);
83 }
84
85 } // namespace
86
87 namespace bookmark_html_reader {
88
89 void ImportBookmarksFile(
90 const base::Callback<bool(void)>& cancellation_callback,
91 const base::Callback<bool(const GURL&)>& valid_url_callback,
92 const base::FilePath& file_path,
93 std::vector<ImportedBookmarkEntry>* bookmarks,
94 std::vector<ImportedFaviconUsage>* favicons) {
95 std::string content;
96 file_util::ReadFileToString(file_path, &content);
97 std::vector<std::string> lines;
98 base::SplitString(content, '\n', &lines);
99
100 base::string16 last_folder;
101 bool last_folder_on_toolbar = false;
102 bool last_folder_is_empty = true;
103 bool has_subfolder = false;
104 base::Time last_folder_add_date;
105 std::vector<base::string16> path;
106 size_t toolbar_folder_index = 0;
107 std::string charset;
108 for (size_t i = 0;
109 i < lines.size() &&
110 (cancellation_callback.is_null() || !cancellation_callback.Run());
111 ++i) {
112 std::string line;
113 TrimString(lines[i], " ", &line);
114
115 // Get the encoding of the bookmark file.
116 if (internal::ParseCharsetFromLine(line, &charset))
117 continue;
118
119 // Get the folder name.
120 if (internal::ParseFolderNameFromLine(line,
121 charset,
122 &last_folder,
123 &last_folder_on_toolbar,
124 &last_folder_add_date)) {
125 continue;
126 }
127
128 // Get the bookmark entry.
129 base::string16 title;
130 base::string16 shortcut;
131 GURL url, favicon;
132 base::Time add_date;
133 base::string16 post_data;
134 bool is_bookmark;
135 // TODO(jcampan): http://b/issue?id=1196285 we do not support POST based
136 // keywords yet.
137 is_bookmark =
138 internal::ParseBookmarkFromLine(line, charset, &title,
139 &url, &favicon, &shortcut,
140 &add_date, &post_data) ||
141 internal::ParseMinimumBookmarkFromLine(line, charset, &title, &url);
142
143 if (is_bookmark)
144 last_folder_is_empty = false;
145
146 if (is_bookmark &&
147 post_data.empty() &&
148 (valid_url_callback.is_null() || valid_url_callback.Run(url))) {
149 if (toolbar_folder_index > path.size() && !path.empty()) {
150 NOTREACHED(); // error in parsing.
151 break;
152 }
153
154 ImportedBookmarkEntry entry;
155 entry.creation_time = add_date;
156 entry.url = url;
157 entry.title = title;
158
159 if (toolbar_folder_index) {
160 // The toolbar folder should be at the top level.
161 entry.in_toolbar = true;
162 entry.path.assign(path.begin() + toolbar_folder_index - 1, path.end());
163 } else {
164 // Add this bookmark to the list of |bookmarks|.
165 if (!has_subfolder && !last_folder.empty()) {
166 path.push_back(last_folder);
167 last_folder.clear();
168 }
169 entry.path.assign(path.begin(), path.end());
170 }
171 bookmarks->push_back(entry);
172
173 // Save the favicon. DataURLToFaviconUsage will handle the case where
174 // there is no favicon.
175 if (favicons)
176 DataURLToFaviconUsage(url, favicon, favicons);
177
178 continue;
179 }
180
181 // Bookmarks in sub-folder are encapsulated with <DL> tag.
182 if (StartsWithASCII(line, "<DL>", false)) {
183 has_subfolder = true;
184 if (!last_folder.empty()) {
185 path.push_back(last_folder);
186 last_folder.clear();
187 }
188 if (last_folder_on_toolbar && !toolbar_folder_index)
189 toolbar_folder_index = path.size();
190
191 // Mark next folder empty as initial state.
192 last_folder_is_empty = true;
193 } else if (StartsWithASCII(line, "</DL>", false)) {
194 if (path.empty())
195 break; // Mismatch <DL>.
196
197 base::string16 folder_title = path.back();
198 path.pop_back();
199
200 if (last_folder_is_empty) {
201 // Empty folder should be added explicitly.
202 ImportedBookmarkEntry entry;
203 entry.is_folder = true;
204 entry.creation_time = last_folder_add_date;
205 entry.title = folder_title;
206 if (toolbar_folder_index) {
207 // The toolbar folder should be at the top level.
208 // Make sure we don't add the toolbar folder itself if it is empty.
209 if (toolbar_folder_index <= path.size()) {
210 entry.in_toolbar = true;
211 entry.path.assign(path.begin() + toolbar_folder_index - 1,
212 path.end());
213 bookmarks->push_back(entry);
214 }
215 } else {
216 // Add this folder to the list of |bookmarks|.
217 entry.path.assign(path.begin(), path.end());
218 bookmarks->push_back(entry);
219 }
220
221 // Parent folder include current one, so it's not empty.
222 last_folder_is_empty = false;
223 }
224
225 if (toolbar_folder_index > path.size())
226 toolbar_folder_index = 0;
227 }
228 }
229 }
230
231 namespace internal {
232
233 bool ParseCharsetFromLine(const std::string& line, std::string* charset) {
234 const char kCharset[] = "charset=";
235 if (StartsWithASCII(line, "<META", false) &&
236 (line.find("CONTENT=\"") != std::string::npos ||
237 line.find("content=\"") != std::string::npos)) {
238 size_t begin = line.find(kCharset);
239 if (begin == std::string::npos)
240 return false;
241 begin += std::string(kCharset).size();
242 size_t end = line.find_first_of('\"', begin);
243 *charset = line.substr(begin, end - begin);
244 return true;
245 }
246 return false;
247 }
248
249 bool ParseFolderNameFromLine(const std::string& line,
250 const std::string& charset,
251 base::string16* folder_name,
252 bool* is_toolbar_folder,
253 base::Time* add_date) {
254 const char kFolderOpen[] = "<DT><H3";
255 const char kFolderClose[] = "</H3>";
256 const char kToolbarFolderAttribute[] = "PERSONAL_TOOLBAR_FOLDER";
257 const char kAddDateAttribute[] = "ADD_DATE";
258
259 if (!StartsWithASCII(line, kFolderOpen, true))
260 return false;
261
262 size_t end = line.find(kFolderClose);
263 size_t tag_end = line.rfind('>', end) + 1;
264 // If no end tag or start tag is broken, we skip to find the folder name.
265 if (end == std::string::npos || tag_end < arraysize(kFolderOpen))
266 return false;
267
268 base::CodepageToUTF16(line.substr(tag_end, end - tag_end), charset.c_str(),
269 base::OnStringConversionError::SKIP, folder_name);
270 *folder_name = net::UnescapeForHTML(*folder_name);
271
272 std::string attribute_list = line.substr(arraysize(kFolderOpen),
273 tag_end - arraysize(kFolderOpen) - 1);
274 std::string value;
275
276 // Add date
277 if (GetAttribute(attribute_list, kAddDateAttribute, &value)) {
278 int64 time;
279 base::StringToInt64(value, &time);
280 // Upper bound it at 32 bits.
281 if (0 < time && time < (1LL << 32))
282 *add_date = base::Time::FromTimeT(time);
283 }
284
285 if (GetAttribute(attribute_list, kToolbarFolderAttribute, &value) &&
286 LowerCaseEqualsASCII(value, "true"))
287 *is_toolbar_folder = true;
288 else
289 *is_toolbar_folder = false;
290
291 return true;
292 }
293
294 bool ParseBookmarkFromLine(const std::string& line,
295 const std::string& charset,
296 base::string16* title,
297 GURL* url,
298 GURL* favicon,
299 base::string16* shortcut,
300 base::Time* add_date,
301 base::string16* post_data) {
302 const char kItemOpen[] = "<DT><A";
303 const char kItemClose[] = "</A>";
304 const char kFeedURLAttribute[] = "FEEDURL";
305 const char kHrefAttribute[] = "HREF";
306 const char kIconAttribute[] = "ICON";
307 const char kShortcutURLAttribute[] = "SHORTCUTURL";
308 const char kAddDateAttribute[] = "ADD_DATE";
309 const char kPostDataAttribute[] = "POST_DATA";
310
311 title->clear();
312 *url = GURL();
313 *favicon = GURL();
314 shortcut->clear();
315 post_data->clear();
316 *add_date = base::Time();
317
318 if (!StartsWithASCII(line, kItemOpen, true))
319 return false;
320
321 size_t end = line.find(kItemClose);
322 size_t tag_end = line.rfind('>', end) + 1;
323 if (end == std::string::npos || tag_end < arraysize(kItemOpen))
324 return false; // No end tag or start tag is broken.
325
326 std::string attribute_list = line.substr(arraysize(kItemOpen),
327 tag_end - arraysize(kItemOpen) - 1);
328
329 // We don't import Live Bookmark folders, which is Firefox's RSS reading
330 // feature, since the user never necessarily bookmarked them and we don't
331 // have this feature to update their contents.
332 std::string value;
333 if (GetAttribute(attribute_list, kFeedURLAttribute, &value))
334 return false;
335
336 // Title
337 base::CodepageToUTF16(line.substr(tag_end, end - tag_end), charset.c_str(),
338 base::OnStringConversionError::SKIP, title);
339 *title = net::UnescapeForHTML(*title);
340
341 // URL
342 if (GetAttribute(attribute_list, kHrefAttribute, &value)) {
343 base::string16 url16;
344 base::CodepageToUTF16(value, charset.c_str(),
345 base::OnStringConversionError::SKIP, &url16);
346 url16 = net::UnescapeForHTML(url16);
347
348 *url = GURL(url16);
349 }
350
351 // Favicon
352 if (GetAttribute(attribute_list, kIconAttribute, &value))
353 *favicon = GURL(value);
354
355 // Keyword
356 if (GetAttribute(attribute_list, kShortcutURLAttribute, &value)) {
357 base::CodepageToUTF16(value, charset.c_str(),
358 base::OnStringConversionError::SKIP, shortcut);
359 *shortcut = net::UnescapeForHTML(*shortcut);
360 }
361
362 // Add date
363 if (GetAttribute(attribute_list, kAddDateAttribute, &value)) {
364 int64 time;
365 base::StringToInt64(value, &time);
366 // Upper bound it at 32 bits.
367 if (0 < time && time < (1LL << 32))
368 *add_date = base::Time::FromTimeT(time);
369 }
370
371 // Post data.
372 if (GetAttribute(attribute_list, kPostDataAttribute, &value)) {
373 base::CodepageToUTF16(value, charset.c_str(),
374 base::OnStringConversionError::SKIP, post_data);
375 *post_data = net::UnescapeForHTML(*post_data);
376 }
377
378 return true;
379 }
380
381 bool ParseMinimumBookmarkFromLine(const std::string& line,
382 const std::string& charset,
383 base::string16* title,
384 GURL* url) {
385 const char kItemOpen[] = "<DT><A";
386 const char kItemClose[] = "</";
387 const char kHrefAttributeUpper[] = "HREF";
388 const char kHrefAttributeLower[] = "href";
389
390 title->clear();
391 *url = GURL();
392
393 // Case-insensitive check of open tag.
394 if (!StartsWithASCII(line, kItemOpen, false))
395 return false;
396
397 // Find any close tag.
398 size_t end = line.find(kItemClose);
399 size_t tag_end = line.rfind('>', end) + 1;
400 if (end == std::string::npos || tag_end < arraysize(kItemOpen))
401 return false; // No end tag or start tag is broken.
402
403 std::string attribute_list = line.substr(arraysize(kItemOpen),
404 tag_end - arraysize(kItemOpen) - 1);
405
406 // Title
407 base::CodepageToUTF16(line.substr(tag_end, end - tag_end), charset.c_str(),
408 base::OnStringConversionError::SKIP, title);
409 *title = net::UnescapeForHTML(*title);
410
411 // URL
412 std::string value;
413 if (GetAttribute(attribute_list, kHrefAttributeUpper, &value) ||
414 GetAttribute(attribute_list, kHrefAttributeLower, &value)) {
415 if (charset.length() != 0) {
416 base::string16 url16;
417 base::CodepageToUTF16(value, charset.c_str(),
418 base::OnStringConversionError::SKIP, &url16);
419 url16 = net::UnescapeForHTML(url16);
420
421 *url = GURL(url16);
422 } else {
423 *url = GURL(value);
424 }
425 }
426
427 return true;
428 }
429
430 } // namespace internal
431
432 } // namespace bookmark_html_reader
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698