Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(380)

Side by Side Diff: chrome/browser/bookmarks/bookmark_html_reader.cc

Issue 14575004: Extract BookmarksFileImporter from Firefox2Importer. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: win fix Created 7 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/bookmarks/bookmark_html_reader.h"
6
7 #include "base/callback.h"
8 #include "base/file_util.h"
9 #include "base/i18n/icu_string_conversions.h"
10 #include "base/string_util.h"
11 #include "base/strings/string_number_conversions.h"
12 #include "base/strings/string_split.h"
13 #include "base/time.h"
14 #include "chrome/browser/bookmarks/imported_bookmark_entry.h"
15 #include "chrome/browser/history/history_types.h"
16 #include "content/public/common/url_constants.h"
17 #include "net/base/data_url.h"
18 #include "net/base/escape.h"
19
20 namespace {
21
22 // Fetches the given |attribute| value from the |attribute_list|. Returns true
23 // if successful, and |value| will contain the value.
24 bool GetAttribute(const std::string& attribute_list,
25 const std::string& attribute,
26 std::string* value) {
27 const char kQuote[] = "\"";
28
29 size_t begin = attribute_list.find(attribute + "=" + kQuote);
30 if (begin == std::string::npos)
31 return false; // Can't find the attribute.
32
33 begin = attribute_list.find(kQuote, begin) + 1;
sky 2013/05/10 19:35:20 Any reason you didn't use: begin += attribute.size
Avi (use Gerrit) 2013/05/10 20:29:37 This was straight from the FF2 code. That is simpl
34
35 size_t end = begin + 1;
36 while (end < attribute_list.size()) {
37 if (attribute_list[end] == '"' &&
38 attribute_list[end - 1] != '\\') {
39 break;
40 }
41 end++;
42 }
43
44 if (end == attribute_list.size())
45 return false; // The value is not quoted.
46
47 *value = attribute_list.substr(begin, end - begin);
48 return true;
49 }
50
51 // Given the URL of a page and a favicon data URL, adds an appropriate record
52 // to the given favicon usage vector.
53 void DataURLToFaviconUsage(
54 const GURL& link_url,
55 const GURL& favicon_data,
56 std::vector<history::ImportedFaviconUsage>* favicons) {
57 if (!link_url.is_valid() || !favicon_data.is_valid() ||
58 !favicon_data.SchemeIs(chrome::kDataScheme))
59 return;
60
61 // Parse the data URL.
62 std::string mime_type, char_set, data;
63 if (!net::DataURL::Parse(favicon_data, &mime_type, &char_set, &data) ||
64 data.empty())
65 return;
66
67 history::ImportedFaviconUsage usage;
68 usage.png_data.assign(data.begin(), data.end());
69
70 // We need to make up a URL for the favicon. We use a version of the page's
71 // URL so that we can be sure it will not collide.
72 usage.favicon_url = GURL(std::string("made-up-favicon:") + link_url.spec());
73
74 // We only have one URL per favicon for Firefox 2 bookmarks.
75 usage.urls.insert(link_url);
76
77 favicons->push_back(usage);
78 }
79
80 } // namespace
81
82 namespace bookmark_html_reader {
83
84 void ImportBookmarksFile(base::Callback<bool(void)>* cancellation_callback,
85 base::Callback<bool(const GURL&)>* valid_url_callback,
86 const base::FilePath& file_path,
87 std::vector<ImportedBookmarkEntry>* bookmarks,
88 std::vector<history::ImportedFaviconUsage>* favicons) {
89 std::string content;
90 file_util::ReadFileToString(file_path, &content);
91 std::vector<std::string> lines;
92 base::SplitString(content, '\n', &lines);
93
94 base::string16 last_folder;
95 bool last_folder_on_toolbar = false;
96 bool last_folder_is_empty = true;
97 bool has_subfolder = false;
98 base::Time last_folder_add_date;
99 std::vector<base::string16> path;
100 size_t toolbar_folder_index = 0;
101 std::string charset;
102 for (size_t i = 0;
103 i < lines.size() &&
104 (!cancellation_callback || !cancellation_callback->Run());
105 ++i) {
106 std::string line;
107 TrimString(lines[i], " ", &line);
108
109 // Get the encoding of the bookmark file.
110 if (exposed_for_testing::ParseCharsetFromLine(line, &charset))
111 continue;
112
113 // Get the folder name.
114 if (exposed_for_testing::ParseFolderNameFromLine(line,
115 charset,
116 &last_folder,
117 &last_folder_on_toolbar,
118 &last_folder_add_date)) {
119 continue;
120 }
121
122 // Get the bookmark entry.
123 base::string16 title;
124 base::string16 shortcut;
125 GURL url, favicon;
126 base::Time add_date;
127 base::string16 post_data;
128 bool is_bookmark;
129 // TODO(jcampan): http://b/issue?id=1196285 we do not support POST based
130 // keywords yet.
131 is_bookmark =
132 exposed_for_testing::ParseBookmarkFromLine(line, charset, &title,
133 &url, &favicon, &shortcut,
134 &add_date, &post_data) ||
135 exposed_for_testing::ParseMinimumBookmarkFromLine(line, charset,
136 &title, &url);
137
138 if (is_bookmark)
139 last_folder_is_empty = false;
140
141 if (is_bookmark &&
142 post_data.empty() &&
143 (!valid_url_callback || valid_url_callback->Run(url))) {
144 if (toolbar_folder_index > path.size() && !path.empty()) {
145 NOTREACHED(); // error in parsing.
146 break;
147 }
148
149 ImportedBookmarkEntry entry;
150 entry.creation_time = add_date;
151 entry.url = url;
152 entry.title = title;
153
154 if (toolbar_folder_index) {
155 // The toolbar folder should be at the top level.
156 entry.in_toolbar = true;
157 entry.path.assign(path.begin() + toolbar_folder_index - 1, path.end());
158 } else {
159 // Add this bookmark to the list of |bookmarks|.
160 if (!has_subfolder && !last_folder.empty()) {
161 path.push_back(last_folder);
162 last_folder.clear();
163 }
164 entry.path.assign(path.begin(), path.end());
165 }
166 bookmarks->push_back(entry);
167
168 // Save the favicon. DataURLToFaviconUsage will handle the case where
169 // there is no favicon.
170 if (favicons)
171 DataURLToFaviconUsage(url, favicon, favicons);
172
173 continue;
174 }
175
176 // Bookmarks in sub-folder are encapsulated with <DL> tag.
177 if (StartsWithASCII(line, "<DL>", false)) {
178 has_subfolder = true;
179 if (!last_folder.empty()) {
180 path.push_back(last_folder);
181 last_folder.clear();
182 }
183 if (last_folder_on_toolbar && !toolbar_folder_index)
184 toolbar_folder_index = path.size();
185
186 // Mark next folder empty as initial state.
187 last_folder_is_empty = true;
188 } else if (StartsWithASCII(line, "</DL>", false)) {
189 if (path.empty())
190 break; // Mismatch <DL>.
191
192 base::string16 folder_title = path.back();
193 path.pop_back();
194
195 if (last_folder_is_empty) {
196 // Empty folder should be added explicitly.
197 ImportedBookmarkEntry entry;
198 entry.is_folder = true;
199 entry.creation_time = last_folder_add_date;
200 entry.title = folder_title;
201 if (toolbar_folder_index) {
202 // The toolbar folder should be at the top level.
203 // Make sure we don't add the toolbar folder itself if it is empty.
204 if (toolbar_folder_index <= path.size()) {
205 entry.in_toolbar = true;
206 entry.path.assign(path.begin() + toolbar_folder_index - 1,
207 path.end());
208 bookmarks->push_back(entry);
209 }
210 } else {
211 // Add this folder to the list of |bookmarks|.
212 entry.path.assign(path.begin(), path.end());
213 bookmarks->push_back(entry);
214 }
215
216 // Parent folder include current one, so it's not empty.
217 last_folder_is_empty = false;
218 }
219
220 if (toolbar_folder_index > path.size())
221 toolbar_folder_index = 0;
222 }
223 }
224 }
225
226 namespace exposed_for_testing {
227
228 bool ParseCharsetFromLine(const std::string& line, std::string* charset) {
229 const char kCharset[] = "charset=";
230 if (StartsWithASCII(line, "<META", false) &&
231 (line.find("CONTENT=\"") != std::string::npos ||
232 line.find("content=\"") != std::string::npos)) {
233 size_t begin = line.find(kCharset);
234 if (begin == std::string::npos)
235 return false;
236 begin += std::string(kCharset).size();
237 size_t end = line.find_first_of('\"', begin);
238 *charset = line.substr(begin, end - begin);
239 return true;
240 }
241 return false;
242 }
243
244 bool ParseFolderNameFromLine(const std::string& line,
245 const std::string& charset,
246 base::string16* folder_name,
247 bool* is_toolbar_folder,
248 base::Time* add_date) {
249 const char kFolderOpen[] = "<DT><H3";
250 const char kFolderClose[] = "</H3>";
251 const char kToolbarFolderAttribute[] = "PERSONAL_TOOLBAR_FOLDER";
252 const char kAddDateAttribute[] = "ADD_DATE";
253
254 if (!StartsWithASCII(line, kFolderOpen, true))
255 return false;
256
257 size_t end = line.find(kFolderClose);
258 size_t tag_end = line.rfind('>', end) + 1;
259 // If no end tag or start tag is broken, we skip to find the folder name.
260 if (end == std::string::npos || tag_end < arraysize(kFolderOpen))
261 return false;
262
263 base::CodepageToUTF16(line.substr(tag_end, end - tag_end), charset.c_str(),
264 base::OnStringConversionError::SKIP, folder_name);
265 *folder_name = net::UnescapeForHTML(*folder_name);
266
267 std::string attribute_list = line.substr(arraysize(kFolderOpen),
268 tag_end - arraysize(kFolderOpen) - 1);
269 std::string value;
270
271 // Add date
272 if (GetAttribute(attribute_list, kAddDateAttribute, &value)) {
273 int64 time;
274 base::StringToInt64(value, &time);
275 // Upper bound it at 32 bits.
276 if (0 < time && time < (1LL << 32))
277 *add_date = base::Time::FromTimeT(time);
278 }
279
280 if (GetAttribute(attribute_list, kToolbarFolderAttribute, &value) &&
281 LowerCaseEqualsASCII(value, "true"))
282 *is_toolbar_folder = true;
283 else
284 *is_toolbar_folder = false;
285
286 return true;
287 }
288
289 bool ParseBookmarkFromLine(const std::string& line,
290 const std::string& charset,
291 base::string16* title,
292 GURL* url,
293 GURL* favicon,
294 base::string16* shortcut,
295 base::Time* add_date,
296 base::string16* post_data) {
297 const char kItemOpen[] = "<DT><A";
298 const char kItemClose[] = "</A>";
299 const char kFeedURLAttribute[] = "FEEDURL";
300 const char kHrefAttribute[] = "HREF";
301 const char kIconAttribute[] = "ICON";
302 const char kShortcutURLAttribute[] = "SHORTCUTURL";
303 const char kAddDateAttribute[] = "ADD_DATE";
304 const char kPostDataAttribute[] = "POST_DATA";
305
306 title->clear();
307 *url = GURL();
308 *favicon = GURL();
309 shortcut->clear();
310 post_data->clear();
311 *add_date = base::Time();
312
313 if (!StartsWithASCII(line, kItemOpen, true))
314 return false;
315
316 size_t end = line.find(kItemClose);
317 size_t tag_end = line.rfind('>', end) + 1;
318 if (end == std::string::npos || tag_end < arraysize(kItemOpen))
319 return false; // No end tag or start tag is broken.
320
321 std::string attribute_list = line.substr(arraysize(kItemOpen),
322 tag_end - arraysize(kItemOpen) - 1);
323
324 // We don't import Live Bookmark folders, which is Firefox's RSS reading
325 // feature, since the user never necessarily bookmarked them and we don't
326 // have this feature to update their contents.
327 std::string value;
328 if (GetAttribute(attribute_list, kFeedURLAttribute, &value))
329 return false;
330
331 // Title
332 base::CodepageToUTF16(line.substr(tag_end, end - tag_end), charset.c_str(),
333 base::OnStringConversionError::SKIP, title);
334 *title = net::UnescapeForHTML(*title);
335
336 // URL
337 if (GetAttribute(attribute_list, kHrefAttribute, &value)) {
338 base::string16 url16;
339 base::CodepageToUTF16(value, charset.c_str(),
340 base::OnStringConversionError::SKIP, &url16);
341 url16 = net::UnescapeForHTML(url16);
342
343 *url = GURL(url16);
344 }
345
346 // Favicon
347 if (GetAttribute(attribute_list, kIconAttribute, &value))
348 *favicon = GURL(value);
349
350 // Keyword
351 if (GetAttribute(attribute_list, kShortcutURLAttribute, &value)) {
352 base::CodepageToUTF16(value, charset.c_str(),
353 base::OnStringConversionError::SKIP, shortcut);
354 *shortcut = net::UnescapeForHTML(*shortcut);
355 }
356
357 // Add date
358 if (GetAttribute(attribute_list, kAddDateAttribute, &value)) {
359 int64 time;
360 base::StringToInt64(value, &time);
361 // Upper bound it at 32 bits.
362 if (0 < time && time < (1LL << 32))
363 *add_date = base::Time::FromTimeT(time);
364 }
365
366 // Post data.
367 if (GetAttribute(attribute_list, kPostDataAttribute, &value)) {
368 base::CodepageToUTF16(value, charset.c_str(),
369 base::OnStringConversionError::SKIP, post_data);
370 *post_data = net::UnescapeForHTML(*post_data);
371 }
372
373 return true;
374 }
375
376 bool ParseMinimumBookmarkFromLine(const std::string& line,
377 const std::string& charset,
378 base::string16* title,
379 GURL* url) {
380 const char kItemOpen[] = "<DT><A";
381 const char kItemClose[] = "</";
382 const char kHrefAttributeUpper[] = "HREF";
383 const char kHrefAttributeLower[] = "href";
384
385 title->clear();
386 *url = GURL();
387
388 // Case-insensitive check of open tag.
389 if (!StartsWithASCII(line, kItemOpen, false))
390 return false;
391
392 // Find any close tag.
393 size_t end = line.find(kItemClose);
394 size_t tag_end = line.rfind('>', end) + 1;
395 if (end == std::string::npos || tag_end < arraysize(kItemOpen))
396 return false; // No end tag or start tag is broken.
397
398 std::string attribute_list = line.substr(arraysize(kItemOpen),
399 tag_end - arraysize(kItemOpen) - 1);
400
401 // Title
402 base::CodepageToUTF16(line.substr(tag_end, end - tag_end), charset.c_str(),
403 base::OnStringConversionError::SKIP, title);
404 *title = net::UnescapeForHTML(*title);
405
406 // URL
407 std::string value;
408 if (GetAttribute(attribute_list, kHrefAttributeUpper, &value) ||
409 GetAttribute(attribute_list, kHrefAttributeLower, &value)) {
410 if (charset.length() != 0) {
411 base::string16 url16;
412 base::CodepageToUTF16(value, charset.c_str(),
413 base::OnStringConversionError::SKIP, &url16);
414 url16 = net::UnescapeForHTML(url16);
415
416 *url = GURL(url16);
417 } else {
418 *url = GURL(value);
419 }
420 }
421
422 return true;
423 }
424
425 } // namespace exposed_for_testing
426
427 } // namespace bookmark_html_reader
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698