Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(652)

Side by Side Diff: chrome/utility/importer/bookmark_html_reader.cc

Issue 616763002: Importing certain bookmarks from firefox and HTML file as search engines. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 6 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2013 The Chromium Authors. All rights reserved. 1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/utility/importer/bookmark_html_reader.h" 5 #include "chrome/utility/importer/bookmark_html_reader.h"
6 6
7 #include "base/callback.h" 7 #include "base/callback.h"
8 #include "base/files/file_util.h" 8 #include "base/files/file_util.h"
9 #include "base/i18n/icu_string_conversions.h" 9 #include "base/i18n/icu_string_conversions.h"
10 #include "base/strings/string_number_conversions.h" 10 #include "base/strings/string_number_conversions.h"
11 #include "base/strings/string_split.h" 11 #include "base/strings/string_split.h"
12 #include "base/strings/string_util.h" 12 #include "base/strings/string_util.h"
13 #include "base/strings/utf_string_conversions.h"
13 #include "base/time/time.h" 14 #include "base/time/time.h"
14 #include "chrome/common/importer/imported_bookmark_entry.h" 15 #include "chrome/common/importer/imported_bookmark_entry.h"
15 #include "chrome/common/importer/imported_favicon_usage.h" 16 #include "chrome/common/importer/imported_favicon_usage.h"
16 #include "chrome/utility/importer/favicon_reencode.h" 17 #include "chrome/utility/importer/favicon_reencode.h"
17 #include "net/base/data_url.h" 18 #include "net/base/data_url.h"
18 #include "net/base/escape.h" 19 #include "net/base/escape.h"
19 #include "url/gurl.h" 20 #include "url/gurl.h"
20 #include "url/url_constants.h" 21 #include "url/url_constants.h"
22 #include "url/url_util.h"
21 23
22 namespace { 24 namespace {
23 25
24 // Fetches the given |attribute| value from the |attribute_list|. Returns true 26 // Fetches the given |attribute| value from the |attribute_list|. Returns true
25 // if successful, and |value| will contain the value. 27 // if successful, and |value| will contain the value.
26 bool GetAttribute(const std::string& attribute_list, 28 bool GetAttribute(const std::string& attribute_list,
27 const std::string& attribute, 29 const std::string& attribute,
28 std::string* value) { 30 std::string* value) {
29 const char kQuote[] = "\""; 31 const char kQuote[] = "\"";
30 32
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after
84 86
85 } // namespace 87 } // namespace
86 88
87 namespace bookmark_html_reader { 89 namespace bookmark_html_reader {
88 90
89 void ImportBookmarksFile( 91 void ImportBookmarksFile(
90 const base::Callback<bool(void)>& cancellation_callback, 92 const base::Callback<bool(void)>& cancellation_callback,
91 const base::Callback<bool(const GURL&)>& valid_url_callback, 93 const base::Callback<bool(const GURL&)>& valid_url_callback,
92 const base::FilePath& file_path, 94 const base::FilePath& file_path,
93 std::vector<ImportedBookmarkEntry>* bookmarks, 95 std::vector<ImportedBookmarkEntry>* bookmarks,
96 std::vector<importer::URLKeywordInfo>* url_keywords,
94 std::vector<ImportedFaviconUsage>* favicons) { 97 std::vector<ImportedFaviconUsage>* favicons) {
95 std::string content; 98 std::string content;
96 base::ReadFileToString(file_path, &content); 99 base::ReadFileToString(file_path, &content);
97 std::vector<std::string> lines; 100 std::vector<std::string> lines;
98 base::SplitString(content, '\n', &lines); 101 base::SplitString(content, '\n', &lines);
99 102
100 base::string16 last_folder; 103 base::string16 last_folder;
101 bool last_folder_on_toolbar = false; 104 bool last_folder_on_toolbar = false;
102 bool last_folder_is_empty = true; 105 bool last_folder_is_empty = true;
103 bool has_subfolder = false; 106 bool has_subfolder = false;
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after
146 // keywords yet. 149 // keywords yet.
147 is_bookmark = 150 is_bookmark =
148 internal::ParseBookmarkFromLine(line, charset, &title, 151 internal::ParseBookmarkFromLine(line, charset, &title,
149 &url, &favicon, &shortcut, 152 &url, &favicon, &shortcut,
150 &add_date, &post_data) || 153 &add_date, &post_data) ||
151 internal::ParseMinimumBookmarkFromLine(line, charset, &title, &url); 154 internal::ParseMinimumBookmarkFromLine(line, charset, &title, &url);
152 155
153 if (is_bookmark) 156 if (is_bookmark)
154 last_folder_is_empty = false; 157 last_folder_is_empty = false;
155 158
159 base::string16 decoded_url;
160
156 if (is_bookmark && 161 if (is_bookmark &&
157 post_data.empty() && 162 post_data.empty() &&
158 (valid_url_callback.is_null() || valid_url_callback.Run(url))) { 163 (valid_url_callback.is_null() || valid_url_callback.Run(url)
164 || CanImportURLAsSearchEngine(url, &decoded_url))) {
Ilya Sherman 2014/09/30 20:54:12 nit: The "||" should be on the previous line. ("g
Tapu Ghose 2014/10/07 02:02:44 Done.
159 if (toolbar_folder_index > path.size() && !path.empty()) { 165 if (toolbar_folder_index > path.size() && !path.empty()) {
160 NOTREACHED(); // error in parsing. 166 NOTREACHED(); // error in parsing.
161 break; 167 break;
162 } 168 }
163 169
170 // If bookmark contains a keyword then import it as search engine.
171 // If |url| is invalid, set raw_url property of |url_keyword_info|
172 // which will be used in importing as search engine.
173 if (!shortcut.empty()) {
Peter Kasting 2014/10/01 01:02:55 Doesn't this mean we'll also import non-replaceabl
Tapu Ghose 2014/10/07 02:02:44 Agree with you. Updated logic to address your comm
174 importer::URLKeywordInfo url_keyword_info;
175 if (url.is_valid())
176 url_keyword_info.url = url;
177 else
178 url_keyword_info.raw_url = decoded_url;
179 url_keyword_info.keyword.assign(shortcut);
180 url_keyword_info.display_name = title;
181 url_keywords->push_back(url_keyword_info);
182 continue;
183 }
184
164 ImportedBookmarkEntry entry; 185 ImportedBookmarkEntry entry;
165 entry.creation_time = add_date; 186 entry.creation_time = add_date;
166 entry.url = url; 187 entry.url = url;
167 entry.title = title; 188 entry.title = title;
168 189
169 if (toolbar_folder_index) { 190 if (toolbar_folder_index) {
170 // The toolbar folder should be at the top level. 191 // The toolbar folder should be at the top level.
171 entry.in_toolbar = true; 192 entry.in_toolbar = true;
172 entry.path.assign(path.begin() + toolbar_folder_index - 1, path.end()); 193 entry.path.assign(path.begin() + toolbar_folder_index - 1, path.end());
173 } else { 194 } else {
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after
231 // Parent folder include current one, so it's not empty. 252 // Parent folder include current one, so it's not empty.
232 last_folder_is_empty = false; 253 last_folder_is_empty = false;
233 } 254 }
234 255
235 if (toolbar_folder_index > path.size()) 256 if (toolbar_folder_index > path.size())
236 toolbar_folder_index = 0; 257 toolbar_folder_index = 0;
237 } 258 }
238 } 259 }
239 } 260 }
240 261
262 bool CanImportURLAsSearchEngine(const GURL& url, base::string16* decoded_url) {
263 // If |url| is valid then we are not supposed to be here.
Peter Kasting 2014/10/01 01:02:55 That implies callers shouldn't be calling us. I d
Tapu Ghose 2014/10/07 02:02:44 Updated comments.
264 if (url.is_valid())
265 return false;
266
267 std::string raw_url = url.possibly_invalid_spec();
268
269 // Decode raw_url.
270 url::RawCanonOutputW<1024> canon_output;
Ilya Sherman 2014/09/30 20:54:12 Hmm, how do you know that 1024 characters is enoug
Peter Kasting 2014/10/01 01:02:55 It's not necessarily enough. This is almost certa
Tapu Ghose 2014/10/07 02:02:44 Acknowledged.
Tapu Ghose 2014/10/07 02:02:44 Acknowledged.
271 url::DecodeURLEscapeSequences(raw_url.c_str(), raw_url.size(), &canon_output);
Peter Kasting 2014/10/01 01:02:55 Why can't we use the higher-level unescaping funct
Tapu Ghose 2014/10/07 02:02:44 My bad. I could not find higher-level unescaping f
272 decoded_url->assign(base::string16(canon_output.data(),
273 canon_output.length()));
274
275 raw_url = base::UTF16ToUTF8(*decoded_url);
276 const std::string kReplacementTerm("%s");
277
278 if (raw_url.find(kReplacementTerm) == std::string::npos)
279 return false;
280
281 // Substitute replacement term with arbitrary value. Return false if the
282 // resulted output is an invalid url.
283 size_t n = 0;
284 const std::string kReplacementValue("val");
285 while ((n = raw_url.find(kReplacementTerm, n)) != std::string::npos) {
286 raw_url.replace(n, kReplacementTerm.size(), kReplacementValue);
287 n += kReplacementValue.size();
288 }
Ilya Sherman 2014/09/30 20:54:12 Please use a utility function from base/strings/st
Peter Kasting 2014/10/01 01:02:55 Furthermore, there's already code in Chrome that a
Tapu Ghose 2014/10/07 02:02:44 Before writing my custom code I tried to use Repla
Tapu Ghose 2014/10/07 02:02:44 Replaced my own version with ReplaceSubstringsAfte
Ilya Sherman 2014/10/07 22:08:22 That sounds like a dependency issue, caused by the
Tapu Ghose 2014/10/12 00:58:19 Tried after adding '../components/search_engines.g
Ilya Sherman 2014/10/14 00:41:49 Hmm, based on local testing, it looks like you nee
Tapu Ghose 2014/10/19 02:46:33 After modifying .gyp* and BUILD.gn do I need to ru
289 return GURL(raw_url).is_valid();
290 }
291
241 namespace internal { 292 namespace internal {
242 293
243 bool ParseCharsetFromLine(const std::string& line, std::string* charset) { 294 bool ParseCharsetFromLine(const std::string& line, std::string* charset) {
244 const char kCharset[] = "charset="; 295 const char kCharset[] = "charset=";
245 if (StartsWithASCII(line, "<META", false) && 296 if (StartsWithASCII(line, "<META", false) &&
246 (line.find("CONTENT=\"") != std::string::npos || 297 (line.find("CONTENT=\"") != std::string::npos ||
247 line.find("content=\"") != std::string::npos)) { 298 line.find("content=\"") != std::string::npos)) {
248 size_t begin = line.find(kCharset); 299 size_t begin = line.find(kCharset);
249 if (begin == std::string::npos) 300 if (begin == std::string::npos)
250 return false; 301 return false;
(...skipping 182 matching lines...) Expand 10 before | Expand all | Expand 10 after
433 *url = GURL(value); 484 *url = GURL(value);
434 } 485 }
435 } 486 }
436 487
437 return true; 488 return true;
438 } 489 }
439 490
440 } // namespace internal 491 } // namespace internal
441 492
442 } // namespace bookmark_html_reader 493 } // namespace bookmark_html_reader
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698