| OLD | NEW |
| (Empty) | |
| 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 #include "chrome/browser/bookmarks/bookmark_html_reader.h" |
| 6 |
| 7 #include "base/callback.h" |
| 8 #include "base/file_util.h" |
| 9 #include "base/i18n/icu_string_conversions.h" |
| 10 #include "base/string_util.h" |
| 11 #include "base/strings/string_number_conversions.h" |
| 12 #include "base/strings/string_split.h" |
| 13 #include "base/time.h" |
| 14 #include "chrome/browser/bookmarks/imported_bookmark_entry.h" |
| 15 #include "chrome/browser/favicon/favicon_util.h" |
| 16 #include "chrome/browser/favicon/imported_favicon_usage.h" |
| 17 #include "content/public/common/url_constants.h" |
| 18 #include "googleurl/src/gurl.h" |
| 19 #include "net/base/data_url.h" |
| 20 #include "net/base/escape.h" |
| 21 |
| 22 namespace { |
| 23 |
| 24 // Fetches the given |attribute| value from the |attribute_list|. Returns true |
| 25 // if successful, and |value| will contain the value. |
| 26 bool GetAttribute(const std::string& attribute_list, |
| 27 const std::string& attribute, |
| 28 std::string* value) { |
| 29 const char kQuote[] = "\""; |
| 30 |
| 31 size_t begin = attribute_list.find(attribute + "=" + kQuote); |
| 32 if (begin == std::string::npos) |
| 33 return false; // Can't find the attribute. |
| 34 |
| 35 begin += attribute.size() + 2; |
| 36 size_t end = begin + 1; |
| 37 |
| 38 while (end < attribute_list.size()) { |
| 39 if (attribute_list[end] == '"' && |
| 40 attribute_list[end - 1] != '\\') { |
| 41 break; |
| 42 } |
| 43 end++; |
| 44 } |
| 45 |
| 46 if (end == attribute_list.size()) |
| 47 return false; // The value is not quoted. |
| 48 |
| 49 *value = attribute_list.substr(begin, end - begin); |
| 50 return true; |
| 51 } |
| 52 |
| 53 // Given the URL of a page and a favicon data URL, adds an appropriate record |
| 54 // to the given favicon usage vector. |
| 55 void DataURLToFaviconUsage( |
| 56 const GURL& link_url, |
| 57 const GURL& favicon_data, |
| 58 std::vector<ImportedFaviconUsage>* favicons) { |
| 59 if (!link_url.is_valid() || !favicon_data.is_valid() || |
| 60 !favicon_data.SchemeIs(chrome::kDataScheme)) |
| 61 return; |
| 62 |
| 63 // Parse the data URL. |
| 64 std::string mime_type, char_set, data; |
| 65 if (!net::DataURL::Parse(favicon_data, &mime_type, &char_set, &data) || |
| 66 data.empty()) |
| 67 return; |
| 68 |
| 69 ImportedFaviconUsage usage; |
| 70 if (!FaviconUtil::ReencodeFavicon( |
| 71 reinterpret_cast<const unsigned char*>(&data[0]), |
| 72 data.size(), &usage.png_data)) |
| 73 return; // Unable to decode. |
| 74 |
| 75 // We need to make up a URL for the favicon. We use a version of the page's |
| 76 // URL so that we can be sure it will not collide. |
| 77 usage.favicon_url = GURL(std::string("made-up-favicon:") + link_url.spec()); |
| 78 |
| 79 // We only have one URL per favicon for Firefox 2 bookmarks. |
| 80 usage.urls.insert(link_url); |
| 81 |
| 82 favicons->push_back(usage); |
| 83 } |
| 84 |
| 85 } // namespace |
| 86 |
| 87 namespace bookmark_html_reader { |
| 88 |
| 89 void ImportBookmarksFile( |
| 90 const base::Callback<bool(void)>& cancellation_callback, |
| 91 const base::Callback<bool(const GURL&)>& valid_url_callback, |
| 92 const base::FilePath& file_path, |
| 93 std::vector<ImportedBookmarkEntry>* bookmarks, |
| 94 std::vector<ImportedFaviconUsage>* favicons) { |
| 95 std::string content; |
| 96 file_util::ReadFileToString(file_path, &content); |
| 97 std::vector<std::string> lines; |
| 98 base::SplitString(content, '\n', &lines); |
| 99 |
| 100 base::string16 last_folder; |
| 101 bool last_folder_on_toolbar = false; |
| 102 bool last_folder_is_empty = true; |
| 103 bool has_subfolder = false; |
| 104 base::Time last_folder_add_date; |
| 105 std::vector<base::string16> path; |
| 106 size_t toolbar_folder_index = 0; |
| 107 std::string charset; |
| 108 for (size_t i = 0; |
| 109 i < lines.size() && |
| 110 (cancellation_callback.is_null() || !cancellation_callback.Run()); |
| 111 ++i) { |
| 112 std::string line; |
| 113 TrimString(lines[i], " ", &line); |
| 114 |
| 115 // Get the encoding of the bookmark file. |
| 116 if (internal::ParseCharsetFromLine(line, &charset)) |
| 117 continue; |
| 118 |
| 119 // Get the folder name. |
| 120 if (internal::ParseFolderNameFromLine(line, |
| 121 charset, |
| 122 &last_folder, |
| 123 &last_folder_on_toolbar, |
| 124 &last_folder_add_date)) { |
| 125 continue; |
| 126 } |
| 127 |
| 128 // Get the bookmark entry. |
| 129 base::string16 title; |
| 130 base::string16 shortcut; |
| 131 GURL url, favicon; |
| 132 base::Time add_date; |
| 133 base::string16 post_data; |
| 134 bool is_bookmark; |
| 135 // TODO(jcampan): http://b/issue?id=1196285 we do not support POST based |
| 136 // keywords yet. |
| 137 is_bookmark = |
| 138 internal::ParseBookmarkFromLine(line, charset, &title, |
| 139 &url, &favicon, &shortcut, |
| 140 &add_date, &post_data) || |
| 141 internal::ParseMinimumBookmarkFromLine(line, charset, &title, &url); |
| 142 |
| 143 if (is_bookmark) |
| 144 last_folder_is_empty = false; |
| 145 |
| 146 if (is_bookmark && |
| 147 post_data.empty() && |
| 148 (valid_url_callback.is_null() || valid_url_callback.Run(url))) { |
| 149 if (toolbar_folder_index > path.size() && !path.empty()) { |
| 150 NOTREACHED(); // error in parsing. |
| 151 break; |
| 152 } |
| 153 |
| 154 ImportedBookmarkEntry entry; |
| 155 entry.creation_time = add_date; |
| 156 entry.url = url; |
| 157 entry.title = title; |
| 158 |
| 159 if (toolbar_folder_index) { |
| 160 // The toolbar folder should be at the top level. |
| 161 entry.in_toolbar = true; |
| 162 entry.path.assign(path.begin() + toolbar_folder_index - 1, path.end()); |
| 163 } else { |
| 164 // Add this bookmark to the list of |bookmarks|. |
| 165 if (!has_subfolder && !last_folder.empty()) { |
| 166 path.push_back(last_folder); |
| 167 last_folder.clear(); |
| 168 } |
| 169 entry.path.assign(path.begin(), path.end()); |
| 170 } |
| 171 bookmarks->push_back(entry); |
| 172 |
| 173 // Save the favicon. DataURLToFaviconUsage will handle the case where |
| 174 // there is no favicon. |
| 175 if (favicons) |
| 176 DataURLToFaviconUsage(url, favicon, favicons); |
| 177 |
| 178 continue; |
| 179 } |
| 180 |
| 181 // Bookmarks in sub-folder are encapsulated with <DL> tag. |
| 182 if (StartsWithASCII(line, "<DL>", false)) { |
| 183 has_subfolder = true; |
| 184 if (!last_folder.empty()) { |
| 185 path.push_back(last_folder); |
| 186 last_folder.clear(); |
| 187 } |
| 188 if (last_folder_on_toolbar && !toolbar_folder_index) |
| 189 toolbar_folder_index = path.size(); |
| 190 |
| 191 // Mark next folder empty as initial state. |
| 192 last_folder_is_empty = true; |
| 193 } else if (StartsWithASCII(line, "</DL>", false)) { |
| 194 if (path.empty()) |
| 195 break; // Mismatch <DL>. |
| 196 |
| 197 base::string16 folder_title = path.back(); |
| 198 path.pop_back(); |
| 199 |
| 200 if (last_folder_is_empty) { |
| 201 // Empty folder should be added explicitly. |
| 202 ImportedBookmarkEntry entry; |
| 203 entry.is_folder = true; |
| 204 entry.creation_time = last_folder_add_date; |
| 205 entry.title = folder_title; |
| 206 if (toolbar_folder_index) { |
| 207 // The toolbar folder should be at the top level. |
| 208 // Make sure we don't add the toolbar folder itself if it is empty. |
| 209 if (toolbar_folder_index <= path.size()) { |
| 210 entry.in_toolbar = true; |
| 211 entry.path.assign(path.begin() + toolbar_folder_index - 1, |
| 212 path.end()); |
| 213 bookmarks->push_back(entry); |
| 214 } |
| 215 } else { |
| 216 // Add this folder to the list of |bookmarks|. |
| 217 entry.path.assign(path.begin(), path.end()); |
| 218 bookmarks->push_back(entry); |
| 219 } |
| 220 |
| 221 // Parent folder include current one, so it's not empty. |
| 222 last_folder_is_empty = false; |
| 223 } |
| 224 |
| 225 if (toolbar_folder_index > path.size()) |
| 226 toolbar_folder_index = 0; |
| 227 } |
| 228 } |
| 229 } |
| 230 |
| 231 namespace internal { |
| 232 |
| 233 bool ParseCharsetFromLine(const std::string& line, std::string* charset) { |
| 234 const char kCharset[] = "charset="; |
| 235 if (StartsWithASCII(line, "<META", false) && |
| 236 (line.find("CONTENT=\"") != std::string::npos || |
| 237 line.find("content=\"") != std::string::npos)) { |
| 238 size_t begin = line.find(kCharset); |
| 239 if (begin == std::string::npos) |
| 240 return false; |
| 241 begin += std::string(kCharset).size(); |
| 242 size_t end = line.find_first_of('\"', begin); |
| 243 *charset = line.substr(begin, end - begin); |
| 244 return true; |
| 245 } |
| 246 return false; |
| 247 } |
| 248 |
| 249 bool ParseFolderNameFromLine(const std::string& line, |
| 250 const std::string& charset, |
| 251 base::string16* folder_name, |
| 252 bool* is_toolbar_folder, |
| 253 base::Time* add_date) { |
| 254 const char kFolderOpen[] = "<DT><H3"; |
| 255 const char kFolderClose[] = "</H3>"; |
| 256 const char kToolbarFolderAttribute[] = "PERSONAL_TOOLBAR_FOLDER"; |
| 257 const char kAddDateAttribute[] = "ADD_DATE"; |
| 258 |
| 259 if (!StartsWithASCII(line, kFolderOpen, true)) |
| 260 return false; |
| 261 |
| 262 size_t end = line.find(kFolderClose); |
| 263 size_t tag_end = line.rfind('>', end) + 1; |
| 264 // If no end tag or start tag is broken, we skip to find the folder name. |
| 265 if (end == std::string::npos || tag_end < arraysize(kFolderOpen)) |
| 266 return false; |
| 267 |
| 268 base::CodepageToUTF16(line.substr(tag_end, end - tag_end), charset.c_str(), |
| 269 base::OnStringConversionError::SKIP, folder_name); |
| 270 *folder_name = net::UnescapeForHTML(*folder_name); |
| 271 |
| 272 std::string attribute_list = line.substr(arraysize(kFolderOpen), |
| 273 tag_end - arraysize(kFolderOpen) - 1); |
| 274 std::string value; |
| 275 |
| 276 // Add date |
| 277 if (GetAttribute(attribute_list, kAddDateAttribute, &value)) { |
| 278 int64 time; |
| 279 base::StringToInt64(value, &time); |
| 280 // Upper bound it at 32 bits. |
| 281 if (0 < time && time < (1LL << 32)) |
| 282 *add_date = base::Time::FromTimeT(time); |
| 283 } |
| 284 |
| 285 if (GetAttribute(attribute_list, kToolbarFolderAttribute, &value) && |
| 286 LowerCaseEqualsASCII(value, "true")) |
| 287 *is_toolbar_folder = true; |
| 288 else |
| 289 *is_toolbar_folder = false; |
| 290 |
| 291 return true; |
| 292 } |
| 293 |
| 294 bool ParseBookmarkFromLine(const std::string& line, |
| 295 const std::string& charset, |
| 296 base::string16* title, |
| 297 GURL* url, |
| 298 GURL* favicon, |
| 299 base::string16* shortcut, |
| 300 base::Time* add_date, |
| 301 base::string16* post_data) { |
| 302 const char kItemOpen[] = "<DT><A"; |
| 303 const char kItemClose[] = "</A>"; |
| 304 const char kFeedURLAttribute[] = "FEEDURL"; |
| 305 const char kHrefAttribute[] = "HREF"; |
| 306 const char kIconAttribute[] = "ICON"; |
| 307 const char kShortcutURLAttribute[] = "SHORTCUTURL"; |
| 308 const char kAddDateAttribute[] = "ADD_DATE"; |
| 309 const char kPostDataAttribute[] = "POST_DATA"; |
| 310 |
| 311 title->clear(); |
| 312 *url = GURL(); |
| 313 *favicon = GURL(); |
| 314 shortcut->clear(); |
| 315 post_data->clear(); |
| 316 *add_date = base::Time(); |
| 317 |
| 318 if (!StartsWithASCII(line, kItemOpen, true)) |
| 319 return false; |
| 320 |
| 321 size_t end = line.find(kItemClose); |
| 322 size_t tag_end = line.rfind('>', end) + 1; |
| 323 if (end == std::string::npos || tag_end < arraysize(kItemOpen)) |
| 324 return false; // No end tag or start tag is broken. |
| 325 |
| 326 std::string attribute_list = line.substr(arraysize(kItemOpen), |
| 327 tag_end - arraysize(kItemOpen) - 1); |
| 328 |
| 329 // We don't import Live Bookmark folders, which is Firefox's RSS reading |
| 330 // feature, since the user never necessarily bookmarked them and we don't |
| 331 // have this feature to update their contents. |
| 332 std::string value; |
| 333 if (GetAttribute(attribute_list, kFeedURLAttribute, &value)) |
| 334 return false; |
| 335 |
| 336 // Title |
| 337 base::CodepageToUTF16(line.substr(tag_end, end - tag_end), charset.c_str(), |
| 338 base::OnStringConversionError::SKIP, title); |
| 339 *title = net::UnescapeForHTML(*title); |
| 340 |
| 341 // URL |
| 342 if (GetAttribute(attribute_list, kHrefAttribute, &value)) { |
| 343 base::string16 url16; |
| 344 base::CodepageToUTF16(value, charset.c_str(), |
| 345 base::OnStringConversionError::SKIP, &url16); |
| 346 url16 = net::UnescapeForHTML(url16); |
| 347 |
| 348 *url = GURL(url16); |
| 349 } |
| 350 |
| 351 // Favicon |
| 352 if (GetAttribute(attribute_list, kIconAttribute, &value)) |
| 353 *favicon = GURL(value); |
| 354 |
| 355 // Keyword |
| 356 if (GetAttribute(attribute_list, kShortcutURLAttribute, &value)) { |
| 357 base::CodepageToUTF16(value, charset.c_str(), |
| 358 base::OnStringConversionError::SKIP, shortcut); |
| 359 *shortcut = net::UnescapeForHTML(*shortcut); |
| 360 } |
| 361 |
| 362 // Add date |
| 363 if (GetAttribute(attribute_list, kAddDateAttribute, &value)) { |
| 364 int64 time; |
| 365 base::StringToInt64(value, &time); |
| 366 // Upper bound it at 32 bits. |
| 367 if (0 < time && time < (1LL << 32)) |
| 368 *add_date = base::Time::FromTimeT(time); |
| 369 } |
| 370 |
| 371 // Post data. |
| 372 if (GetAttribute(attribute_list, kPostDataAttribute, &value)) { |
| 373 base::CodepageToUTF16(value, charset.c_str(), |
| 374 base::OnStringConversionError::SKIP, post_data); |
| 375 *post_data = net::UnescapeForHTML(*post_data); |
| 376 } |
| 377 |
| 378 return true; |
| 379 } |
| 380 |
| 381 bool ParseMinimumBookmarkFromLine(const std::string& line, |
| 382 const std::string& charset, |
| 383 base::string16* title, |
| 384 GURL* url) { |
| 385 const char kItemOpen[] = "<DT><A"; |
| 386 const char kItemClose[] = "</"; |
| 387 const char kHrefAttributeUpper[] = "HREF"; |
| 388 const char kHrefAttributeLower[] = "href"; |
| 389 |
| 390 title->clear(); |
| 391 *url = GURL(); |
| 392 |
| 393 // Case-insensitive check of open tag. |
| 394 if (!StartsWithASCII(line, kItemOpen, false)) |
| 395 return false; |
| 396 |
| 397 // Find any close tag. |
| 398 size_t end = line.find(kItemClose); |
| 399 size_t tag_end = line.rfind('>', end) + 1; |
| 400 if (end == std::string::npos || tag_end < arraysize(kItemOpen)) |
| 401 return false; // No end tag or start tag is broken. |
| 402 |
| 403 std::string attribute_list = line.substr(arraysize(kItemOpen), |
| 404 tag_end - arraysize(kItemOpen) - 1); |
| 405 |
| 406 // Title |
| 407 base::CodepageToUTF16(line.substr(tag_end, end - tag_end), charset.c_str(), |
| 408 base::OnStringConversionError::SKIP, title); |
| 409 *title = net::UnescapeForHTML(*title); |
| 410 |
| 411 // URL |
| 412 std::string value; |
| 413 if (GetAttribute(attribute_list, kHrefAttributeUpper, &value) || |
| 414 GetAttribute(attribute_list, kHrefAttributeLower, &value)) { |
| 415 if (charset.length() != 0) { |
| 416 base::string16 url16; |
| 417 base::CodepageToUTF16(value, charset.c_str(), |
| 418 base::OnStringConversionError::SKIP, &url16); |
| 419 url16 = net::UnescapeForHTML(url16); |
| 420 |
| 421 *url = GURL(url16); |
| 422 } else { |
| 423 *url = GURL(value); |
| 424 } |
| 425 } |
| 426 |
| 427 return true; |
| 428 } |
| 429 |
| 430 } // namespace internal |
| 431 |
| 432 } // namespace bookmark_html_reader |
| OLD | NEW |