Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 // Copyright 2013 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "chrome/browser/bookmarks/bookmark_html_reader.h" | |
| 6 | |
| 7 #include "base/callback.h" | |
| 8 #include "base/file_util.h" | |
| 9 #include "base/i18n/icu_string_conversions.h" | |
| 10 #include "base/string_util.h" | |
| 11 #include "base/strings/string_number_conversions.h" | |
| 12 #include "base/strings/string_split.h" | |
| 13 #include "base/time.h" | |
| 14 #include "chrome/browser/bookmarks/imported_bookmark_entry.h" | |
| 15 #include "chrome/browser/history/history_types.h" | |
| 16 #include "content/public/common/url_constants.h" | |
| 17 #include "net/base/data_url.h" | |
| 18 #include "net/base/escape.h" | |
| 19 #include "googleurl/src/gurl.h" | |
|
tfarina
2013/05/10 21:57:04
sort
Avi (use Gerrit)
2013/05/10 22:04:37
Done.
| |
| 20 | |
| 21 namespace { | |
| 22 | |
| 23 // Fetches the given |attribute| value from the |attribute_list|. Returns true | |
| 24 // if successful, and |value| will contain the value. | |
| 25 bool GetAttribute(const std::string& attribute_list, | |
| 26 const std::string& attribute, | |
| 27 std::string* value) { | |
| 28 const char kQuote[] = "\""; | |
| 29 | |
| 30 size_t begin = attribute_list.find(attribute + "=" + kQuote); | |
| 31 if (begin == std::string::npos) | |
| 32 return false; // Can't find the attribute. | |
| 33 | |
| 34 begin += attribute.size() + 2; | |
| 35 size_t end = begin + 1; | |
| 36 | |
| 37 while (end < attribute_list.size()) { | |
| 38 if (attribute_list[end] == '"' && | |
| 39 attribute_list[end - 1] != '\\') { | |
| 40 break; | |
| 41 } | |
| 42 end++; | |
| 43 } | |
| 44 | |
| 45 if (end == attribute_list.size()) | |
| 46 return false; // The value is not quoted. | |
| 47 | |
| 48 *value = attribute_list.substr(begin, end - begin); | |
| 49 return true; | |
| 50 } | |
| 51 | |
| 52 // Given the URL of a page and a favicon data URL, adds an appropriate record | |
| 53 // to the given favicon usage vector. | |
| 54 void DataURLToFaviconUsage( | |
| 55 const GURL& link_url, | |
| 56 const GURL& favicon_data, | |
| 57 std::vector<history::ImportedFaviconUsage>* favicons) { | |
| 58 if (!link_url.is_valid() || !favicon_data.is_valid() || | |
| 59 !favicon_data.SchemeIs(chrome::kDataScheme)) | |
| 60 return; | |
| 61 | |
| 62 // Parse the data URL. | |
| 63 std::string mime_type, char_set, data; | |
| 64 if (!net::DataURL::Parse(favicon_data, &mime_type, &char_set, &data) || | |
| 65 data.empty()) | |
| 66 return; | |
| 67 | |
| 68 history::ImportedFaviconUsage usage; | |
| 69 usage.png_data.assign(data.begin(), data.end()); | |
| 70 | |
| 71 // We need to make up a URL for the favicon. We use a version of the page's | |
| 72 // URL so that we can be sure it will not collide. | |
| 73 usage.favicon_url = GURL(std::string("made-up-favicon:") + link_url.spec()); | |
| 74 | |
| 75 // We only have one URL per favicon for Firefox 2 bookmarks. | |
| 76 usage.urls.insert(link_url); | |
| 77 | |
| 78 favicons->push_back(usage); | |
| 79 } | |
| 80 | |
| 81 } // namespace | |
| 82 | |
| 83 namespace bookmark_html_reader { | |
| 84 | |
| 85 void ImportBookmarksFile(base::Callback<bool(void)>* cancellation_callback, | |
| 86 base::Callback<bool(const GURL&)>* valid_url_callback, | |
| 87 const base::FilePath& file_path, | |
| 88 std::vector<ImportedBookmarkEntry>* bookmarks, | |
| 89 std::vector<history::ImportedFaviconUsage>* favicons) { | |
| 90 std::string content; | |
| 91 file_util::ReadFileToString(file_path, &content); | |
| 92 std::vector<std::string> lines; | |
| 93 base::SplitString(content, '\n', &lines); | |
| 94 | |
| 95 base::string16 last_folder; | |
| 96 bool last_folder_on_toolbar = false; | |
| 97 bool last_folder_is_empty = true; | |
| 98 bool has_subfolder = false; | |
| 99 base::Time last_folder_add_date; | |
| 100 std::vector<base::string16> path; | |
| 101 size_t toolbar_folder_index = 0; | |
| 102 std::string charset; | |
| 103 for (size_t i = 0; | |
| 104 i < lines.size() && | |
| 105 (!cancellation_callback || !cancellation_callback->Run()); | |
| 106 ++i) { | |
| 107 std::string line; | |
| 108 TrimString(lines[i], " ", &line); | |
| 109 | |
| 110 // Get the encoding of the bookmark file. | |
| 111 if (internal::ParseCharsetFromLine(line, &charset)) | |
| 112 continue; | |
| 113 | |
| 114 // Get the folder name. | |
| 115 if (internal::ParseFolderNameFromLine(line, | |
| 116 charset, | |
| 117 &last_folder, | |
| 118 &last_folder_on_toolbar, | |
| 119 &last_folder_add_date)) { | |
| 120 continue; | |
| 121 } | |
| 122 | |
| 123 // Get the bookmark entry. | |
| 124 base::string16 title; | |
| 125 base::string16 shortcut; | |
| 126 GURL url, favicon; | |
| 127 base::Time add_date; | |
| 128 base::string16 post_data; | |
| 129 bool is_bookmark; | |
| 130 // TODO(jcampan): http://b/issue?id=1196285 we do not support POST based | |
| 131 // keywords yet. | |
| 132 is_bookmark = | |
| 133 internal::ParseBookmarkFromLine(line, charset, &title, | |
| 134 &url, &favicon, &shortcut, | |
| 135 &add_date, &post_data) || | |
| 136 internal::ParseMinimumBookmarkFromLine(line, charset, &title, &url); | |
| 137 | |
| 138 if (is_bookmark) | |
| 139 last_folder_is_empty = false; | |
| 140 | |
| 141 if (is_bookmark && | |
| 142 post_data.empty() && | |
| 143 (!valid_url_callback || valid_url_callback->Run(url))) { | |
| 144 if (toolbar_folder_index > path.size() && !path.empty()) { | |
| 145 NOTREACHED(); // error in parsing. | |
| 146 break; | |
| 147 } | |
| 148 | |
| 149 ImportedBookmarkEntry entry; | |
| 150 entry.creation_time = add_date; | |
| 151 entry.url = url; | |
| 152 entry.title = title; | |
| 153 | |
| 154 if (toolbar_folder_index) { | |
| 155 // The toolbar folder should be at the top level. | |
| 156 entry.in_toolbar = true; | |
| 157 entry.path.assign(path.begin() + toolbar_folder_index - 1, path.end()); | |
| 158 } else { | |
| 159 // Add this bookmark to the list of |bookmarks|. | |
| 160 if (!has_subfolder && !last_folder.empty()) { | |
| 161 path.push_back(last_folder); | |
| 162 last_folder.clear(); | |
| 163 } | |
| 164 entry.path.assign(path.begin(), path.end()); | |
| 165 } | |
| 166 bookmarks->push_back(entry); | |
| 167 | |
| 168 // Save the favicon. DataURLToFaviconUsage will handle the case where | |
| 169 // there is no favicon. | |
| 170 if (favicons) | |
| 171 DataURLToFaviconUsage(url, favicon, favicons); | |
| 172 | |
| 173 continue; | |
| 174 } | |
| 175 | |
| 176 // Bookmarks in sub-folder are encapsulated with <DL> tag. | |
| 177 if (StartsWithASCII(line, "<DL>", false)) { | |
| 178 has_subfolder = true; | |
| 179 if (!last_folder.empty()) { | |
| 180 path.push_back(last_folder); | |
| 181 last_folder.clear(); | |
| 182 } | |
| 183 if (last_folder_on_toolbar && !toolbar_folder_index) | |
| 184 toolbar_folder_index = path.size(); | |
| 185 | |
| 186 // Mark next folder empty as initial state. | |
| 187 last_folder_is_empty = true; | |
| 188 } else if (StartsWithASCII(line, "</DL>", false)) { | |
| 189 if (path.empty()) | |
| 190 break; // Mismatch <DL>. | |
| 191 | |
| 192 base::string16 folder_title = path.back(); | |
| 193 path.pop_back(); | |
| 194 | |
| 195 if (last_folder_is_empty) { | |
| 196 // Empty folder should be added explicitly. | |
| 197 ImportedBookmarkEntry entry; | |
| 198 entry.is_folder = true; | |
| 199 entry.creation_time = last_folder_add_date; | |
| 200 entry.title = folder_title; | |
| 201 if (toolbar_folder_index) { | |
| 202 // The toolbar folder should be at the top level. | |
| 203 // Make sure we don't add the toolbar folder itself if it is empty. | |
| 204 if (toolbar_folder_index <= path.size()) { | |
| 205 entry.in_toolbar = true; | |
| 206 entry.path.assign(path.begin() + toolbar_folder_index - 1, | |
| 207 path.end()); | |
| 208 bookmarks->push_back(entry); | |
| 209 } | |
| 210 } else { | |
| 211 // Add this folder to the list of |bookmarks|. | |
| 212 entry.path.assign(path.begin(), path.end()); | |
| 213 bookmarks->push_back(entry); | |
| 214 } | |
| 215 | |
| 216 // Parent folder include current one, so it's not empty. | |
| 217 last_folder_is_empty = false; | |
| 218 } | |
| 219 | |
| 220 if (toolbar_folder_index > path.size()) | |
| 221 toolbar_folder_index = 0; | |
| 222 } | |
| 223 } | |
| 224 } | |
| 225 | |
| 226 namespace internal { | |
| 227 | |
| 228 bool ParseCharsetFromLine(const std::string& line, std::string* charset) { | |
| 229 const char kCharset[] = "charset="; | |
| 230 if (StartsWithASCII(line, "<META", false) && | |
| 231 (line.find("CONTENT=\"") != std::string::npos || | |
| 232 line.find("content=\"") != std::string::npos)) { | |
| 233 size_t begin = line.find(kCharset); | |
| 234 if (begin == std::string::npos) | |
| 235 return false; | |
| 236 begin += std::string(kCharset).size(); | |
| 237 size_t end = line.find_first_of('\"', begin); | |
| 238 *charset = line.substr(begin, end - begin); | |
| 239 return true; | |
| 240 } | |
| 241 return false; | |
| 242 } | |
| 243 | |
| 244 bool ParseFolderNameFromLine(const std::string& line, | |
| 245 const std::string& charset, | |
| 246 base::string16* folder_name, | |
| 247 bool* is_toolbar_folder, | |
| 248 base::Time* add_date) { | |
| 249 const char kFolderOpen[] = "<DT><H3"; | |
| 250 const char kFolderClose[] = "</H3>"; | |
| 251 const char kToolbarFolderAttribute[] = "PERSONAL_TOOLBAR_FOLDER"; | |
| 252 const char kAddDateAttribute[] = "ADD_DATE"; | |
| 253 | |
| 254 if (!StartsWithASCII(line, kFolderOpen, true)) | |
| 255 return false; | |
| 256 | |
| 257 size_t end = line.find(kFolderClose); | |
| 258 size_t tag_end = line.rfind('>', end) + 1; | |
| 259 // If no end tag or start tag is broken, we skip to find the folder name. | |
| 260 if (end == std::string::npos || tag_end < arraysize(kFolderOpen)) | |
| 261 return false; | |
| 262 | |
| 263 base::CodepageToUTF16(line.substr(tag_end, end - tag_end), charset.c_str(), | |
| 264 base::OnStringConversionError::SKIP, folder_name); | |
| 265 *folder_name = net::UnescapeForHTML(*folder_name); | |
| 266 | |
| 267 std::string attribute_list = line.substr(arraysize(kFolderOpen), | |
| 268 tag_end - arraysize(kFolderOpen) - 1); | |
| 269 std::string value; | |
| 270 | |
| 271 // Add date | |
| 272 if (GetAttribute(attribute_list, kAddDateAttribute, &value)) { | |
| 273 int64 time; | |
| 274 base::StringToInt64(value, &time); | |
| 275 // Upper bound it at 32 bits. | |
| 276 if (0 < time && time < (1LL << 32)) | |
| 277 *add_date = base::Time::FromTimeT(time); | |
| 278 } | |
| 279 | |
| 280 if (GetAttribute(attribute_list, kToolbarFolderAttribute, &value) && | |
| 281 LowerCaseEqualsASCII(value, "true")) | |
| 282 *is_toolbar_folder = true; | |
| 283 else | |
| 284 *is_toolbar_folder = false; | |
| 285 | |
| 286 return true; | |
| 287 } | |
| 288 | |
| 289 bool ParseBookmarkFromLine(const std::string& line, | |
| 290 const std::string& charset, | |
| 291 base::string16* title, | |
| 292 GURL* url, | |
| 293 GURL* favicon, | |
| 294 base::string16* shortcut, | |
| 295 base::Time* add_date, | |
| 296 base::string16* post_data) { | |
| 297 const char kItemOpen[] = "<DT><A"; | |
| 298 const char kItemClose[] = "</A>"; | |
| 299 const char kFeedURLAttribute[] = "FEEDURL"; | |
| 300 const char kHrefAttribute[] = "HREF"; | |
| 301 const char kIconAttribute[] = "ICON"; | |
| 302 const char kShortcutURLAttribute[] = "SHORTCUTURL"; | |
| 303 const char kAddDateAttribute[] = "ADD_DATE"; | |
| 304 const char kPostDataAttribute[] = "POST_DATA"; | |
| 305 | |
| 306 title->clear(); | |
| 307 *url = GURL(); | |
| 308 *favicon = GURL(); | |
| 309 shortcut->clear(); | |
| 310 post_data->clear(); | |
| 311 *add_date = base::Time(); | |
| 312 | |
| 313 if (!StartsWithASCII(line, kItemOpen, true)) | |
| 314 return false; | |
| 315 | |
| 316 size_t end = line.find(kItemClose); | |
| 317 size_t tag_end = line.rfind('>', end) + 1; | |
| 318 if (end == std::string::npos || tag_end < arraysize(kItemOpen)) | |
| 319 return false; // No end tag or start tag is broken. | |
| 320 | |
| 321 std::string attribute_list = line.substr(arraysize(kItemOpen), | |
| 322 tag_end - arraysize(kItemOpen) - 1); | |
| 323 | |
| 324 // We don't import Live Bookmark folders, which is Firefox's RSS reading | |
| 325 // feature, since the user never necessarily bookmarked them and we don't | |
| 326 // have this feature to update their contents. | |
| 327 std::string value; | |
| 328 if (GetAttribute(attribute_list, kFeedURLAttribute, &value)) | |
| 329 return false; | |
| 330 | |
| 331 // Title | |
| 332 base::CodepageToUTF16(line.substr(tag_end, end - tag_end), charset.c_str(), | |
| 333 base::OnStringConversionError::SKIP, title); | |
| 334 *title = net::UnescapeForHTML(*title); | |
| 335 | |
| 336 // URL | |
| 337 if (GetAttribute(attribute_list, kHrefAttribute, &value)) { | |
| 338 base::string16 url16; | |
| 339 base::CodepageToUTF16(value, charset.c_str(), | |
| 340 base::OnStringConversionError::SKIP, &url16); | |
| 341 url16 = net::UnescapeForHTML(url16); | |
| 342 | |
| 343 *url = GURL(url16); | |
| 344 } | |
| 345 | |
| 346 // Favicon | |
| 347 if (GetAttribute(attribute_list, kIconAttribute, &value)) | |
| 348 *favicon = GURL(value); | |
| 349 | |
| 350 // Keyword | |
| 351 if (GetAttribute(attribute_list, kShortcutURLAttribute, &value)) { | |
| 352 base::CodepageToUTF16(value, charset.c_str(), | |
| 353 base::OnStringConversionError::SKIP, shortcut); | |
| 354 *shortcut = net::UnescapeForHTML(*shortcut); | |
| 355 } | |
| 356 | |
| 357 // Add date | |
| 358 if (GetAttribute(attribute_list, kAddDateAttribute, &value)) { | |
| 359 int64 time; | |
| 360 base::StringToInt64(value, &time); | |
| 361 // Upper bound it at 32 bits. | |
| 362 if (0 < time && time < (1LL << 32)) | |
| 363 *add_date = base::Time::FromTimeT(time); | |
| 364 } | |
| 365 | |
| 366 // Post data. | |
| 367 if (GetAttribute(attribute_list, kPostDataAttribute, &value)) { | |
| 368 base::CodepageToUTF16(value, charset.c_str(), | |
| 369 base::OnStringConversionError::SKIP, post_data); | |
| 370 *post_data = net::UnescapeForHTML(*post_data); | |
| 371 } | |
| 372 | |
| 373 return true; | |
| 374 } | |
| 375 | |
| 376 bool ParseMinimumBookmarkFromLine(const std::string& line, | |
| 377 const std::string& charset, | |
| 378 base::string16* title, | |
| 379 GURL* url) { | |
| 380 const char kItemOpen[] = "<DT><A"; | |
| 381 const char kItemClose[] = "</"; | |
| 382 const char kHrefAttributeUpper[] = "HREF"; | |
| 383 const char kHrefAttributeLower[] = "href"; | |
| 384 | |
| 385 title->clear(); | |
| 386 *url = GURL(); | |
| 387 | |
| 388 // Case-insensitive check of open tag. | |
| 389 if (!StartsWithASCII(line, kItemOpen, false)) | |
| 390 return false; | |
| 391 | |
| 392 // Find any close tag. | |
| 393 size_t end = line.find(kItemClose); | |
| 394 size_t tag_end = line.rfind('>', end) + 1; | |
| 395 if (end == std::string::npos || tag_end < arraysize(kItemOpen)) | |
| 396 return false; // No end tag or start tag is broken. | |
| 397 | |
| 398 std::string attribute_list = line.substr(arraysize(kItemOpen), | |
| 399 tag_end - arraysize(kItemOpen) - 1); | |
| 400 | |
| 401 // Title | |
| 402 base::CodepageToUTF16(line.substr(tag_end, end - tag_end), charset.c_str(), | |
| 403 base::OnStringConversionError::SKIP, title); | |
| 404 *title = net::UnescapeForHTML(*title); | |
| 405 | |
| 406 // URL | |
| 407 std::string value; | |
| 408 if (GetAttribute(attribute_list, kHrefAttributeUpper, &value) || | |
| 409 GetAttribute(attribute_list, kHrefAttributeLower, &value)) { | |
| 410 if (charset.length() != 0) { | |
| 411 base::string16 url16; | |
| 412 base::CodepageToUTF16(value, charset.c_str(), | |
| 413 base::OnStringConversionError::SKIP, &url16); | |
| 414 url16 = net::UnescapeForHTML(url16); | |
| 415 | |
| 416 *url = GURL(url16); | |
| 417 } else { | |
| 418 *url = GURL(value); | |
| 419 } | |
| 420 } | |
| 421 | |
| 422 return true; | |
| 423 } | |
| 424 | |
| 425 } // namespace internal | |
| 426 | |
| 427 } // namespace bookmark_html_reader | |
| OLD | NEW |