Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(391)

Unified Diff: chrome/browser/bookmarks/bookmark_html_reader.cc

Issue 14575004: Extract BookmarksFileImporter from Firefox2Importer. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: move stuff to favicon Created 7 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: chrome/browser/bookmarks/bookmark_html_reader.cc
diff --git a/chrome/browser/bookmarks/bookmark_html_reader.cc b/chrome/browser/bookmarks/bookmark_html_reader.cc
new file mode 100644
index 0000000000000000000000000000000000000000..9c48d4c9b6a5933e23caa5bb748c6f1591e88753
--- /dev/null
+++ b/chrome/browser/bookmarks/bookmark_html_reader.cc
@@ -0,0 +1,431 @@
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/browser/bookmarks/bookmark_html_reader.h"
+
+#include "base/callback.h"
+#include "base/file_util.h"
+#include "base/i18n/icu_string_conversions.h"
+#include "base/string_util.h"
+#include "base/strings/string_number_conversions.h"
+#include "base/strings/string_split.h"
+#include "base/time.h"
+#include "chrome/browser/bookmarks/imported_bookmark_entry.h"
+#include "chrome/browser/favicon/favicon_util.h"
+#include "chrome/browser/favicon/imported_favicon_usage.h"
+#include "content/public/common/url_constants.h"
+#include "googleurl/src/gurl.h"
+#include "net/base/data_url.h"
+#include "net/base/escape.h"
+
+namespace {
+
+// Fetches the given |attribute| value from the |attribute_list|. Returns true
+// if successful, and |value| will contain the value.
+bool GetAttribute(const std::string& attribute_list,
+ const std::string& attribute,
+ std::string* value) {
+ const char kQuote[] = "\"";
+
+ size_t begin = attribute_list.find(attribute + "=" + kQuote);
+ if (begin == std::string::npos)
+ return false; // Can't find the attribute.
+
+ begin += attribute.size() + 2;
+ size_t end = begin + 1;
+
+ while (end < attribute_list.size()) {
+ if (attribute_list[end] == '"' &&
+ attribute_list[end - 1] != '\\') {
+ break;
+ }
+ end++;
+ }
+
+ if (end == attribute_list.size())
+ return false; // The value is not quoted.
+
+ *value = attribute_list.substr(begin, end - begin);
+ return true;
+}
+
+// Given the URL of a page and a favicon data URL, adds an appropriate record
+// to the given favicon usage vector.
+void DataURLToFaviconUsage(
+ const GURL& link_url,
+ const GURL& favicon_data,
+ std::vector<ImportedFaviconUsage>* favicons) {
+ if (!link_url.is_valid() || !favicon_data.is_valid() ||
+ !favicon_data.SchemeIs(chrome::kDataScheme))
+ return;
+
+ // Parse the data URL.
+ std::string mime_type, char_set, data;
+ if (!net::DataURL::Parse(favicon_data, &mime_type, &char_set, &data) ||
+ data.empty())
+ return;
+
+ ImportedFaviconUsage usage;
+ if (!FaviconUtil::ReencodeFavicon(
+ reinterpret_cast<const unsigned char*>(&data[0]),
+ data.size(), &usage.png_data))
+ return; // Unable to decode.
+
+ // We need to make up a URL for the favicon. We use a version of the page's
+ // URL so that we can be sure it will not collide.
+ usage.favicon_url = GURL(std::string("made-up-favicon:") + link_url.spec());
+
+ // We only have one URL per favicon for Firefox 2 bookmarks.
+ usage.urls.insert(link_url);
+
+ favicons->push_back(usage);
+}
+
+} // namespace
+
+namespace bookmark_html_reader {
+
+void ImportBookmarksFile(base::Callback<bool(void)>* cancellation_callback,
+ base::Callback<bool(const GURL&)>* valid_url_callback,
+ const base::FilePath& file_path,
+ std::vector<ImportedBookmarkEntry>* bookmarks,
+ std::vector<ImportedFaviconUsage>* favicons) {
+ std::string content;
+ file_util::ReadFileToString(file_path, &content);
+ std::vector<std::string> lines;
+ base::SplitString(content, '\n', &lines);
+
+ base::string16 last_folder;
+ bool last_folder_on_toolbar = false;
+ bool last_folder_is_empty = true;
+ bool has_subfolder = false;
+ base::Time last_folder_add_date;
+ std::vector<base::string16> path;
+ size_t toolbar_folder_index = 0;
+ std::string charset;
+ for (size_t i = 0;
+ i < lines.size() &&
+ (!cancellation_callback || !cancellation_callback->Run());
+ ++i) {
+ std::string line;
+ TrimString(lines[i], " ", &line);
+
+ // Get the encoding of the bookmark file.
+ if (internal::ParseCharsetFromLine(line, &charset))
+ continue;
+
+ // Get the folder name.
+ if (internal::ParseFolderNameFromLine(line,
+ charset,
+ &last_folder,
+ &last_folder_on_toolbar,
+ &last_folder_add_date)) {
+ continue;
+ }
+
+ // Get the bookmark entry.
+ base::string16 title;
+ base::string16 shortcut;
+ GURL url, favicon;
+ base::Time add_date;
+ base::string16 post_data;
+ bool is_bookmark;
+ // TODO(jcampan): http://b/issue?id=1196285 we do not support POST based
+ // keywords yet.
+ is_bookmark =
+ internal::ParseBookmarkFromLine(line, charset, &title,
+ &url, &favicon, &shortcut,
+ &add_date, &post_data) ||
+ internal::ParseMinimumBookmarkFromLine(line, charset, &title, &url);
+
+ if (is_bookmark)
+ last_folder_is_empty = false;
+
+ if (is_bookmark &&
+ post_data.empty() &&
+ (!valid_url_callback || valid_url_callback->Run(url))) {
+ if (toolbar_folder_index > path.size() && !path.empty()) {
+ NOTREACHED(); // error in parsing.
+ break;
+ }
+
+ ImportedBookmarkEntry entry;
+ entry.creation_time = add_date;
+ entry.url = url;
+ entry.title = title;
+
+ if (toolbar_folder_index) {
+ // The toolbar folder should be at the top level.
+ entry.in_toolbar = true;
+ entry.path.assign(path.begin() + toolbar_folder_index - 1, path.end());
+ } else {
+ // Add this bookmark to the list of |bookmarks|.
+ if (!has_subfolder && !last_folder.empty()) {
+ path.push_back(last_folder);
+ last_folder.clear();
+ }
+ entry.path.assign(path.begin(), path.end());
+ }
+ bookmarks->push_back(entry);
+
+ // Save the favicon. DataURLToFaviconUsage will handle the case where
+ // there is no favicon.
+ if (favicons)
+ DataURLToFaviconUsage(url, favicon, favicons);
+
+ continue;
+ }
+
+ // Bookmarks in sub-folder are encapsulated with <DL> tag.
+ if (StartsWithASCII(line, "<DL>", false)) {
+ has_subfolder = true;
+ if (!last_folder.empty()) {
+ path.push_back(last_folder);
+ last_folder.clear();
+ }
+ if (last_folder_on_toolbar && !toolbar_folder_index)
+ toolbar_folder_index = path.size();
+
+ // Mark next folder empty as initial state.
+ last_folder_is_empty = true;
+ } else if (StartsWithASCII(line, "</DL>", false)) {
+ if (path.empty())
+ break; // Mismatch <DL>.
+
+ base::string16 folder_title = path.back();
+ path.pop_back();
+
+ if (last_folder_is_empty) {
+ // Empty folder should be added explicitly.
+ ImportedBookmarkEntry entry;
+ entry.is_folder = true;
+ entry.creation_time = last_folder_add_date;
+ entry.title = folder_title;
+ if (toolbar_folder_index) {
+ // The toolbar folder should be at the top level.
+ // Make sure we don't add the toolbar folder itself if it is empty.
+ if (toolbar_folder_index <= path.size()) {
+ entry.in_toolbar = true;
+ entry.path.assign(path.begin() + toolbar_folder_index - 1,
+ path.end());
+ bookmarks->push_back(entry);
+ }
+ } else {
+ // Add this folder to the list of |bookmarks|.
+ entry.path.assign(path.begin(), path.end());
+ bookmarks->push_back(entry);
+ }
+
+ // Parent folder include current one, so it's not empty.
+ last_folder_is_empty = false;
+ }
+
+ if (toolbar_folder_index > path.size())
+ toolbar_folder_index = 0;
+ }
+ }
+}
+
+namespace internal {
+
+bool ParseCharsetFromLine(const std::string& line, std::string* charset) {
+ const char kCharset[] = "charset=";
+ if (StartsWithASCII(line, "<META", false) &&
+ (line.find("CONTENT=\"") != std::string::npos ||
+ line.find("content=\"") != std::string::npos)) {
+ size_t begin = line.find(kCharset);
+ if (begin == std::string::npos)
+ return false;
+ begin += std::string(kCharset).size();
+ size_t end = line.find_first_of('\"', begin);
+ *charset = line.substr(begin, end - begin);
+ return true;
+ }
+ return false;
+}
+
+bool ParseFolderNameFromLine(const std::string& line,
+ const std::string& charset,
+ base::string16* folder_name,
+ bool* is_toolbar_folder,
+ base::Time* add_date) {
+ const char kFolderOpen[] = "<DT><H3";
+ const char kFolderClose[] = "</H3>";
+ const char kToolbarFolderAttribute[] = "PERSONAL_TOOLBAR_FOLDER";
+ const char kAddDateAttribute[] = "ADD_DATE";
+
+ if (!StartsWithASCII(line, kFolderOpen, true))
+ return false;
+
+ size_t end = line.find(kFolderClose);
+ size_t tag_end = line.rfind('>', end) + 1;
+ // If no end tag or start tag is broken, we skip to find the folder name.
+ if (end == std::string::npos || tag_end < arraysize(kFolderOpen))
+ return false;
+
+ base::CodepageToUTF16(line.substr(tag_end, end - tag_end), charset.c_str(),
+ base::OnStringConversionError::SKIP, folder_name);
+ *folder_name = net::UnescapeForHTML(*folder_name);
+
+ std::string attribute_list = line.substr(arraysize(kFolderOpen),
+ tag_end - arraysize(kFolderOpen) - 1);
+ std::string value;
+
+ // Add date
+ if (GetAttribute(attribute_list, kAddDateAttribute, &value)) {
+ int64 time;
+ base::StringToInt64(value, &time);
+ // Upper bound it at 32 bits.
+ if (0 < time && time < (1LL << 32))
+ *add_date = base::Time::FromTimeT(time);
+ }
+
+ if (GetAttribute(attribute_list, kToolbarFolderAttribute, &value) &&
+ LowerCaseEqualsASCII(value, "true"))
+ *is_toolbar_folder = true;
+ else
+ *is_toolbar_folder = false;
+
+ return true;
+}
+
+bool ParseBookmarkFromLine(const std::string& line,
+ const std::string& charset,
+ base::string16* title,
+ GURL* url,
+ GURL* favicon,
+ base::string16* shortcut,
+ base::Time* add_date,
+ base::string16* post_data) {
+ const char kItemOpen[] = "<DT><A";
+ const char kItemClose[] = "</A>";
+ const char kFeedURLAttribute[] = "FEEDURL";
+ const char kHrefAttribute[] = "HREF";
+ const char kIconAttribute[] = "ICON";
+ const char kShortcutURLAttribute[] = "SHORTCUTURL";
+ const char kAddDateAttribute[] = "ADD_DATE";
+ const char kPostDataAttribute[] = "POST_DATA";
+
+ title->clear();
+ *url = GURL();
+ *favicon = GURL();
+ shortcut->clear();
+ post_data->clear();
+ *add_date = base::Time();
+
+ if (!StartsWithASCII(line, kItemOpen, true))
+ return false;
+
+ size_t end = line.find(kItemClose);
+ size_t tag_end = line.rfind('>', end) + 1;
+ if (end == std::string::npos || tag_end < arraysize(kItemOpen))
+ return false; // No end tag or start tag is broken.
+
+ std::string attribute_list = line.substr(arraysize(kItemOpen),
+ tag_end - arraysize(kItemOpen) - 1);
+
+ // We don't import Live Bookmark folders, which is Firefox's RSS reading
+ // feature, since the user never necessarily bookmarked them and we don't
+ // have this feature to update their contents.
+ std::string value;
+ if (GetAttribute(attribute_list, kFeedURLAttribute, &value))
+ return false;
+
+ // Title
+ base::CodepageToUTF16(line.substr(tag_end, end - tag_end), charset.c_str(),
+ base::OnStringConversionError::SKIP, title);
+ *title = net::UnescapeForHTML(*title);
+
+ // URL
+ if (GetAttribute(attribute_list, kHrefAttribute, &value)) {
+ base::string16 url16;
+ base::CodepageToUTF16(value, charset.c_str(),
+ base::OnStringConversionError::SKIP, &url16);
+ url16 = net::UnescapeForHTML(url16);
+
+ *url = GURL(url16);
+ }
+
+ // Favicon
+ if (GetAttribute(attribute_list, kIconAttribute, &value))
+ *favicon = GURL(value);
+
+ // Keyword
+ if (GetAttribute(attribute_list, kShortcutURLAttribute, &value)) {
+ base::CodepageToUTF16(value, charset.c_str(),
+ base::OnStringConversionError::SKIP, shortcut);
+ *shortcut = net::UnescapeForHTML(*shortcut);
+ }
+
+ // Add date
+ if (GetAttribute(attribute_list, kAddDateAttribute, &value)) {
+ int64 time;
+ base::StringToInt64(value, &time);
+ // Upper bound it at 32 bits.
+ if (0 < time && time < (1LL << 32))
+ *add_date = base::Time::FromTimeT(time);
+ }
+
+ // Post data.
+ if (GetAttribute(attribute_list, kPostDataAttribute, &value)) {
+ base::CodepageToUTF16(value, charset.c_str(),
+ base::OnStringConversionError::SKIP, post_data);
+ *post_data = net::UnescapeForHTML(*post_data);
+ }
+
+ return true;
+}
+
+bool ParseMinimumBookmarkFromLine(const std::string& line,
+ const std::string& charset,
+ base::string16* title,
+ GURL* url) {
+ const char kItemOpen[] = "<DT><A";
+ const char kItemClose[] = "</";
+ const char kHrefAttributeUpper[] = "HREF";
+ const char kHrefAttributeLower[] = "href";
+
+ title->clear();
+ *url = GURL();
+
+ // Case-insensitive check of open tag.
+ if (!StartsWithASCII(line, kItemOpen, false))
+ return false;
+
+ // Find any close tag.
+ size_t end = line.find(kItemClose);
+ size_t tag_end = line.rfind('>', end) + 1;
+ if (end == std::string::npos || tag_end < arraysize(kItemOpen))
+ return false; // No end tag or start tag is broken.
+
+ std::string attribute_list = line.substr(arraysize(kItemOpen),
+ tag_end - arraysize(kItemOpen) - 1);
+
+ // Title
+ base::CodepageToUTF16(line.substr(tag_end, end - tag_end), charset.c_str(),
+ base::OnStringConversionError::SKIP, title);
+ *title = net::UnescapeForHTML(*title);
+
+ // URL
+ std::string value;
+ if (GetAttribute(attribute_list, kHrefAttributeUpper, &value) ||
+ GetAttribute(attribute_list, kHrefAttributeLower, &value)) {
+ if (charset.length() != 0) {
+ base::string16 url16;
+ base::CodepageToUTF16(value, charset.c_str(),
+ base::OnStringConversionError::SKIP, &url16);
+ url16 = net::UnescapeForHTML(url16);
+
+ *url = GURL(url16);
+ } else {
+ *url = GURL(value);
+ }
+ }
+
+ return true;
+}
+
+} // namespace internal
+
+} // namespace bookmark_html_reader

Powered by Google App Engine
This is Rietveld 408576698