Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(527)

Side by Side Diff: chrome/browser/history/url_index_private_data.cc

Issue 10837244: Replace HistoryQuickProvider protobuf-based caching with an SQLite-based database. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/
Patch Set: Tweak suppression. Created 8 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/history/url_index_private_data.h" 5 #include "chrome/browser/history/url_index_private_data.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <functional> 8 #include <functional>
9 #include <iterator> 9 #include <iterator>
10 #include <limits> 10 #include <limits>
11 #include <numeric> 11 #include <numeric>
12 #include <vector> 12 #include <vector>
13 13
14 #include "base/basictypes.h" 14 #include "base/basictypes.h"
15 #include "base/file_util.h" 15 #include "base/file_util.h"
16 #include "base/i18n/case_conversion.h" 16 #include "base/i18n/case_conversion.h"
17 #include "base/metrics/histogram.h"
18 #include "base/string_util.h" 17 #include "base/string_util.h"
19 #include "base/time.h"
20 #include "base/utf_string_conversions.h" 18 #include "base/utf_string_conversions.h"
21 #include "chrome/browser/autocomplete/autocomplete_provider.h" 19 #include "chrome/browser/autocomplete/autocomplete_provider.h"
22 #include "chrome/browser/autocomplete/url_prefix.h"
23 #include "chrome/browser/history/history_database.h" 20 #include "chrome/browser/history/history_database.h"
24 #include "chrome/browser/history/in_memory_url_index.h" 21 #include "chrome/browser/history/in_memory_url_cache_database.h"
22 #include "chrome/common/chrome_constants.h"
23 #include "chrome/common/url_constants.h"
25 #include "content/public/browser/browser_thread.h" 24 #include "content/public/browser/browser_thread.h"
26 #include "content/public/browser/notification_details.h" 25 #include "content/public/browser/notification_details.h"
27 #include "content/public/browser/notification_service.h" 26 #include "content/public/browser/notification_service.h"
28 #include "content/public/browser/notification_source.h" 27 #include "content/public/browser/notification_source.h"
29 #include "net/base/net_util.h" 28 #include "net/base/net_util.h"
30 #include "third_party/protobuf/src/google/protobuf/repeated_field.h" 29
31 30 using content::BrowserThread;
32 using google::protobuf::RepeatedField;
33 using google::protobuf::RepeatedPtrField;
34 using in_memory_url_index::InMemoryURLIndexCacheItem;
35 31
36 namespace history { 32 namespace history {
37 33
38 typedef imui::InMemoryURLIndexCacheItem_WordListItem WordListItem; 34 // Comparison function for sorting search terms by descending length.
39 typedef imui::InMemoryURLIndexCacheItem_WordMapItem_WordMapEntry WordMapEntry; 35 bool LengthGreater(const string16& string_a, const string16& string_b) {
40 typedef imui::InMemoryURLIndexCacheItem_WordMapItem WordMapItem; 36 return string_a.length() > string_b.length();
41 typedef imui::InMemoryURLIndexCacheItem_CharWordMapItem CharWordMapItem; 37 }
42 typedef imui::InMemoryURLIndexCacheItem_CharWordMapItem_CharWordMapEntry 38
43 CharWordMapEntry; 39 // Initializes a whitelist of URL schemes.
44 typedef imui::InMemoryURLIndexCacheItem_WordIDHistoryMapItem 40 void InitializeSchemeWhitelist(std::set<std::string>* whitelist) {
45 WordIDHistoryMapItem; 41 DCHECK(whitelist);
46 typedef imui:: 42 if (!whitelist->empty())
47 InMemoryURLIndexCacheItem_WordIDHistoryMapItem_WordIDHistoryMapEntry 43 return; // Nothing to do, already initialized.
48 WordIDHistoryMapEntry; 44 whitelist->insert(std::string(chrome::kAboutScheme));
49 typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem HistoryInfoMapItem; 45 whitelist->insert(std::string(chrome::kChromeUIScheme));
50 typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem_HistoryInfoMapEntry 46 whitelist->insert(std::string(chrome::kFileScheme));
51 HistoryInfoMapEntry; 47 whitelist->insert(std::string(chrome::kFtpScheme));
52 typedef imui::InMemoryURLIndexCacheItem_WordStartsMapItem WordStartsMapItem; 48 whitelist->insert(std::string(chrome::kHttpScheme));
53 typedef imui::InMemoryURLIndexCacheItem_WordStartsMapItem_WordStartsMapEntry 49 whitelist->insert(std::string(chrome::kHttpsScheme));
54 WordStartsMapEntry; 50 whitelist->insert(std::string(chrome::kMailToScheme));
51 }
52
53 // CacheTransaction ------------------------------------------------------------
54
55 // Simple automatic helper class encapsulating calls to BeginTransaction/
56 // CommitTransaction.
57 class CacheTransaction {
58 public:
59 explicit CacheTransaction(InMemoryURLCacheDatabase* db);
60 ~CacheTransaction();
61
62 private:
63 InMemoryURLCacheDatabase* db_;
64
65 DISALLOW_COPY_AND_ASSIGN(CacheTransaction);
66 };
67
68 CacheTransaction::CacheTransaction(InMemoryURLCacheDatabase* db)
69 : db_(db) {
70 if (db_)
71 db_->BeginTransaction();
72 }
73
74 CacheTransaction::~CacheTransaction() {
75 if (db_)
76 db_->CommitTransaction();
77 }
78
79 // URLIndexPrivateData Public (but only to InMemoryURLIndex) Functions ---------
80
81 URLIndexPrivateData::URLIndexPrivateData(const FilePath& history_dir,
82 const std::string& languages)
83 : history_dir_(history_dir),
84 languages_(languages),
85 cache_enabled_(true),
86 shutdown_(false),
87 lock_(),
88 pre_filter_item_count_(0),
89 post_filter_item_count_(0),
90 post_scoring_item_count_(0) {
91 InitializeSchemeWhitelist(&scheme_whitelist_);
92 }
93
94 bool URLIndexPrivateData::Init(
95 base::SequencedWorkerPool::SequenceToken sequence_token) {
96 // Since init occurs on the DB thread, it is possible that the profile was
97 // told to shutdown before this task had a chance to kick off.
98 base::AutoLock lock(lock_);
99 if (shutdown_)
100 return false;
101 cache_db_ = new InMemoryURLCacheDatabase();
102 FilePath dir_path;
103 set_cache_enabled(GetCacheDBPath(&dir_path) &&
104 cache_db_->Init(dir_path, sequence_token));
105 return cache_enabled();
106 }
107
108 void URLIndexPrivateData::Reset() {
109 Clear();
110 if (cache_enabled())
111 cache_db_->Reset();
112 }
113
114 bool URLIndexPrivateData::Empty() const {
115 return history_info_map_.empty();
116 }
117
118 URLIndexPrivateData* URLIndexPrivateData::Snapshot() const {
119 return new URLIndexPrivateData(*this);
120 }
121
122 void URLIndexPrivateData::Shutdown() {
123 base::AutoLock lock(lock_);
124 shutdown_ = true;
125 if (cache_enabled())
126 cache_db_->Shutdown();
127 }
128
129 // Helper predicates for ValidateConsistency.
130 template <typename Pair>
131 struct SelectFirst : std::unary_function<Pair, typename Pair::first_type> {
132 const typename Pair::first_type& operator()(const Pair& x) const {
133 return x.first;
134 }
135 };
136
137 template <typename Pair>
138 struct SelectSecond : std::unary_function<Pair, typename Pair::second_type> {
139 const typename Pair::second_type& operator()(const Pair& x) const {
140 return x.second;
141 }
142 };
143
144 template <typename SourcePair, typename Target>
145 struct TargetInserter : std::unary_function<SourcePair, void> {
146 explicit TargetInserter(Target* target) : target_(target) {}
147
148 void operator()(const SourcePair& source) {
149 target_->insert(source.second.begin(), source.second.end());
150 }
151
152 Target* target_;
153 };
154
155 bool URLIndexPrivateData::ValidateConsistency() const {
156 // Scope things so that a large data set doesn't unnecessarily accumulate and
157 // hang around.
158 {
159 // Make a set of WordIDs from word_map_.
160 WordIDSet word_id_set_a;
161 std::transform(word_map_.begin(), word_map_.end(),
162 std::inserter(word_id_set_a, word_id_set_a.begin()),
163 SelectSecond<WordMap::value_type>());
164
165 // Compare word_map_'s word set to the words from word_id_history_map_.
166 {
167 WordIDSet word_id_set_b;
168 std::transform(word_id_history_map_.begin(), word_id_history_map_.end(),
169 std::inserter(word_id_set_b, word_id_set_b.begin()),
170 SelectFirst<WordIDHistoryMap::value_type>());
171 WordIDSet leftovers;
172 std::set_symmetric_difference(
173 word_id_set_a.begin(), word_id_set_a.end(),
174 word_id_set_b.begin(), word_id_set_b.end(),
175 std::inserter(leftovers, leftovers.begin()));
176 if (!leftovers.empty())
177 return false;
178 }
179
180 // Compare word_map_'s word set to the words from history_id_word_map_.
181 {
182 WordIDSet word_id_set_b;
183 std::for_each(history_id_word_map_.begin(), history_id_word_map_.end(),
184 TargetInserter<HistoryIDWordMap::value_type,
185 WordIDSet>(&word_id_set_b));
186 WordIDSet leftovers;
187 std::set_symmetric_difference(
188 word_id_set_a.begin(), word_id_set_a.end(),
189 word_id_set_b.begin(), word_id_set_b.end(),
190 std::inserter(leftovers, leftovers.begin()));
191 if (!leftovers.empty())
192 return false;
193 }
194
195 // Compare word_map_'s word set to the words from char_word_map_.
196 {
197 WordIDSet word_id_set_b;
198 std::for_each(char_word_map_.begin(), char_word_map_.end(),
199 TargetInserter<CharWordIDMap::value_type,
200 WordIDSet>(&word_id_set_b));
201 WordIDSet leftovers;
202 std::set_symmetric_difference(
203 word_id_set_a.begin(), word_id_set_a.end(),
204 word_id_set_b.begin(), word_id_set_b.end(),
205 std::inserter(leftovers, leftovers.begin()));
206 if (!leftovers.empty())
207 return false;
208 }
209
210 // Intersect available_words_ with set of WordIDs (created above from
211 // word_map_) and test for no common WordIDs.
212 {
213 WordIDSet leftovers;
214 std::set_intersection(word_id_set_a.begin(), word_id_set_a.end(),
215 available_words_.begin(), available_words_.end(),
216 std::inserter(leftovers, leftovers.begin()));
217 if (!leftovers.empty())
218 return false;
219 }
220 }
221
222 {
223 // Make a set of HistoryIDs from history_info_map_.
224 HistoryIDSet history_id_set_a;
225 std::transform(history_info_map_.begin(), history_info_map_.end(),
226 std::inserter(history_id_set_a, history_id_set_a.begin()),
227 SelectFirst<HistoryInfoMap::value_type>());
228
229 // Compare history_info_map_'s set to word_id_history_map_'s HistoryIDs.
230 {
231 HistoryIDSet history_id_set_b;
232 std::transform(history_info_map_.begin(), history_info_map_.end(),
233 std::inserter(history_id_set_b, history_id_set_b.begin()),
234 SelectFirst<HistoryInfoMap::value_type>());
235 HistoryIDSet leftovers;
236 std::set_symmetric_difference(
237 history_id_set_a.begin(), history_id_set_a.end(),
238 history_id_set_b.begin(), history_id_set_b.end(),
239 std::inserter(leftovers, leftovers.begin()));
240 if (!leftovers.empty())
241 return false;
242 }
243
244 // Compare history_info_map_'s set to word_id_history_map_'s HistoryIDs.
245 {
246 HistoryIDSet history_id_set_b;
247 std::for_each(word_id_history_map_.begin(), word_id_history_map_.end(),
248 TargetInserter<WordIDHistoryMap::value_type,
249 HistoryIDSet>(&history_id_set_b));
250 HistoryIDSet leftovers;
251 std::set_symmetric_difference(
252 history_id_set_a.begin(), history_id_set_a.end(),
253 history_id_set_b.begin(), history_id_set_b.end(),
254 std::inserter(leftovers, leftovers.begin()));
255 if (!leftovers.empty())
256 return false;
257 }
258
259 // Compare history_info_map_'s set to word_starts_map_'s HistoryIDs.
260 {
261 HistoryIDSet history_id_set_b;
262 std::transform(word_starts_map_.begin(), word_starts_map_.end(),
263 std::inserter(history_id_set_b, history_id_set_b.begin()),
264 SelectFirst<WordStartsMap::value_type>());
265 HistoryIDSet leftovers;
266 std::set_symmetric_difference(
267 history_id_set_a.begin(), history_id_set_a.end(),
268 history_id_set_b.begin(), history_id_set_b.end(),
269 std::inserter(leftovers, leftovers.begin()));
270 if (!leftovers.empty())
271 return false;
272 }
273 }
274
275 // Make sets of words from word_list_ and word_map_ and test for equality.
276 {
277 String16Set word_list_set;
278 std::copy(word_list_.begin(), word_list_.end(),
279 std::inserter(word_list_set, word_list_set.end()));
280 String16Set word_map_set;
281 std::transform(word_map_.begin(), word_map_.end(),
282 std::inserter(word_map_set, word_map_set.begin()),
283 SelectFirst<WordMap::value_type>());
284 if (word_list_set != word_map_set)
285 return false;
286 }
287
288 return true;
289 }
290
291 ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms(
292 const string16& search_string) {
293 pre_filter_item_count_ = 0;
294 post_filter_item_count_ = 0;
295 post_scoring_item_count_ = 0;
296 // The search string we receive may contain escaped characters. For reducing
297 // the index we need individual, lower-cased words, ignoring escapings. For
298 // the final filtering we need whitespace separated substrings possibly
299 // containing escaped characters.
300 string16 lower_raw_string(base::i18n::ToLower(search_string));
301 string16 lower_unescaped_string =
302 net::UnescapeURLComponent(lower_raw_string,
303 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS);
304 // Extract individual 'words' (as opposed to 'terms'; see below) from the
305 // search string. When the user types "colspec=ID%20Mstone Release" we get
306 // four 'words': "colspec", "id", "mstone" and "release".
307 String16Vector lower_words(
308 history::String16VectorFromString16(lower_unescaped_string, false, NULL));
309 ScoredHistoryMatches scored_items;
310
311 // Do nothing if we have indexed no words (probably because we've not been
312 // initialized yet) or the search string has no words.
313 if (word_list_.empty() || lower_words.empty()) {
314 search_term_cache_.clear(); // Invalidate the term cache.
315 return scored_items;
316 }
317
318 // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep
319 // approach.
320 ResetSearchTermCache();
321
322 HistoryIDSet history_id_set = HistoryIDSetFromWords(lower_words);
323
324 // Trim the candidate pool if it is large. Note that we do not filter out
325 // items that do not contain the search terms as proper substrings -- doing
326 // so is the performance-costly operation we are trying to avoid in order
327 // to maintain omnibox responsiveness.
328 const size_t kItemsToScoreLimit = 500;
329 pre_filter_item_count_ = history_id_set.size();
330 // If we trim the results set we do not want to cache the results for next
331 // time as the user's ultimately desired result could easily be eliminated
332 // in this early rough filter.
333 bool was_trimmed = (pre_filter_item_count_ > kItemsToScoreLimit);
334 if (was_trimmed) {
335 HistoryIDVector history_ids;
336 std::copy(history_id_set.begin(), history_id_set.end(),
337 std::back_inserter(history_ids));
338 // Trim down the set by sorting by typed-count, visit-count, and last
339 // visit.
340 HistoryItemFactorGreater
341 item_factor_functor(history_info_map_);
342 std::partial_sort(history_ids.begin(),
343 history_ids.begin() + kItemsToScoreLimit,
344 history_ids.end(),
345 item_factor_functor);
346 history_id_set.clear();
347 std::copy(history_ids.begin(), history_ids.begin() + kItemsToScoreLimit,
348 std::inserter(history_id_set, history_id_set.end()));
349 post_filter_item_count_ = history_id_set.size();
350 }
351
352 // Pass over all of the candidates filtering out any without a proper
353 // substring match, inserting those which pass in order by score. Note that
354 // in this step we are using the raw search string complete with escaped
355 // URL elements. When the user has specifically typed something akin to
356 // "sort=pri&colspec=ID%20Mstone%20Release" we want to make sure that that
357 // specific substring appears in the URL or page title.
358
359 // We call these 'terms' (as opposed to 'words'; see above) as in this case
360 // we only want to break up the search string on 'true' whitespace rather than
361 // escaped whitespace. When the user types "colspec=ID%20Mstone Release" we
362 // get two 'terms': "colspec=id%20mstone" and "release".
363 history::String16Vector lower_raw_terms;
364 Tokenize(lower_raw_string, kWhitespaceUTF16, &lower_raw_terms);
365 scored_items = std::for_each(history_id_set.begin(), history_id_set.end(),
366 AddHistoryMatch(*this, lower_raw_string,
367 lower_raw_terms, base::Time::Now())).ScoredMatches();
368
369 // Select and sort only the top kMaxMatches results.
370 if (scored_items.size() > AutocompleteProvider::kMaxMatches) {
371 std::partial_sort(scored_items.begin(),
372 scored_items.begin() +
373 AutocompleteProvider::kMaxMatches,
374 scored_items.end(),
375 ScoredHistoryMatch::MatchScoreGreater);
376 scored_items.resize(AutocompleteProvider::kMaxMatches);
377 } else {
378 std::sort(scored_items.begin(), scored_items.end(),
379 ScoredHistoryMatch::MatchScoreGreater);
380 }
381 post_scoring_item_count_ = scored_items.size();
382
383 if (was_trimmed) {
384 search_term_cache_.clear(); // Invalidate the term cache.
385 } else {
386 // Remove any stale SearchTermCacheItems.
387 for (SearchTermCacheMap::iterator cache_iter = search_term_cache_.begin();
388 cache_iter != search_term_cache_.end(); ) {
389 if (!cache_iter->second.used_)
390 search_term_cache_.erase(cache_iter++);
391 else
392 ++cache_iter;
393 }
394 }
395
396 return scored_items;
397 }
398
399 bool URLIndexPrivateData::UpdateURL(const URLRow& row) {
400 // The row may or may not already be in our index. If it is not already
401 // indexed and it qualifies then it gets indexed. If it is already
402 // indexed and still qualifies then it gets updated, otherwise it
403 // is deleted from the index.
404 bool row_was_updated = false;
405 URLID row_id = row.id();
406 HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id);
407 if (row_pos == history_info_map_.end()) {
408 // This new row should be indexed if it qualifies.
409 URLRow new_row(row);
410 new_row.set_id(row_id);
411 row_was_updated = RowQualifiesAsSignificant(new_row, base::Time()) &&
412 IndexRow(new_row);
413 } else if (RowQualifiesAsSignificant(row, base::Time())) {
414 // This indexed row still qualifies and will be re-indexed.
415 // The url won't have changed but the title, visit count, etc.
416 // might have changed.
417 URLRow& row_to_update = row_pos->second;
418 bool title_updated = row_to_update.title() != row.title();
419 if (row_to_update.visit_count() != row.visit_count() ||
420 row_to_update.typed_count() != row.typed_count() ||
421 row_to_update.last_visit() != row.last_visit() || title_updated) {
422 CacheTransaction cache_transaction(cache_enabled() ?
423 cache_db_.get() : NULL);
424 row_to_update.set_visit_count(row.visit_count());
425 row_to_update.set_typed_count(row.typed_count());
426 row_to_update.set_last_visit(row.last_visit());
427 row_to_update.set_hidden(row.hidden());
428 // While the URL is guaranteed to remain stable, the title may have
429 // changed. If so, then update the index with the changed words.
430 if (title_updated) {
431 // Clear all words associated with this row and re-index both the
432 // URL and title.
433 RemoveRowWordsFromIndex(row_to_update);
434 row_to_update.set_title(row.title());
435 RowWordStarts word_starts;
436 AddRowWordsToIndex(row_to_update, &word_starts);
437 word_starts_map_[row_id] = word_starts;
438 // Replace all word_starts for the row.
439 if (cache_enabled()) {
440 cache_db_->RemoveWordStarts(row_id);
441 cache_db_->AddRowWordStarts(row_id, word_starts);
442 }
443 }
444 if (cache_enabled()) {
445 cache_db_->RemoveHistoryIDFromURLs(row_id);
446 cache_db_->AddHistoryToURLs(row_id, row);
447 }
448 row_was_updated = true;
449 }
450 } else {
451 // This indexed row no longer qualifies and will be de-indexed by
452 // clearing all words associated with this row.
453 RemoveRowFromIndex(row);
454 row_was_updated = true;
455 }
456 if (row_was_updated)
457 search_term_cache_.clear(); // This invalidates the cache.
458 return row_was_updated;
459 }
460
461 // Helper functor for DeleteURL.
462 class HistoryInfoMapItemHasURL {
463 public:
464 explicit HistoryInfoMapItemHasURL(const GURL& url): url_(url) {}
465
466 bool operator()(const std::pair<const HistoryID, URLRow>& item) {
467 return item.second.url() == url_;
468 }
469
470 private:
471 const GURL& url_;
472 };
473
474 bool URLIndexPrivateData::DeleteURL(const GURL& url) {
475 DCHECK(!BrowserThread::IsWellKnownThread(BrowserThread::UI) ||
476 BrowserThread::CurrentlyOn(BrowserThread::UI));
477 // Find the matching entry in the history_info_map_.
478 HistoryInfoMap::iterator pos = std::find_if(
479 history_info_map_.begin(),
480 history_info_map_.end(),
481 HistoryInfoMapItemHasURL(url));
482 if (pos == history_info_map_.end())
483 return false;
484 RemoveRowFromIndex(pos->second);
485 search_term_cache_.clear(); // This invalidates the cache.
486 return true;
487 }
488
489 bool URLIndexPrivateData::RestoreFromCacheTask() {
490 DCHECK(!BrowserThread::IsWellKnownThread(BrowserThread::UI) ||
491 !BrowserThread::CurrentlyOn(BrowserThread::UI));
492 DCHECK(cache_db_);
493 if (IsShutdown())
494 return false;
495 Clear();
496 if (RestoreFromCache(cache_db_))
497 return true;
498 // If there was no SQLite-based cache then there might be an old,
499 // deprecated, protobuf-based cache file laying around. Get rid of it.
500 DeleteOldVersionCacheFile();
501 return false;
502 }
503
504 // static
505 scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::RebuildFromHistory(
506 HistoryDatabase* history_db,
507 scoped_refptr<URLIndexPrivateData> old_data) {
508 DCHECK(!BrowserThread::IsWellKnownThread(BrowserThread::UI) ||
509 !BrowserThread::CurrentlyOn(BrowserThread::UI));
510 if (!history_db)
511 return NULL;
512
513 scoped_refptr<URLIndexPrivateData> rebuilt_data(
514 new URLIndexPrivateData(*(old_data.get())));
515
516 // NOTE: We disable the cache database until after the replacement private
517 // data has been created and then we re-enable the database. Once the private
518 // data has been swapped in the database is refreshed with the new data.
519 URLDatabase::URLEnumerator history_enum;
520 if (!history_db->InitURLEnumeratorForSignificant(&history_enum))
521 return NULL;
522
523 for (URLRow row; !old_data->IsShutdown() && history_enum.GetNextURL(&row); )
524 rebuilt_data->IndexRow(row);
525
526 if (old_data->IsShutdown()) {
527 rebuilt_data.release();
528 }
529
530 return rebuilt_data;
531 }
532
533 void URLIndexPrivateData::RefreshCacheTask() {
534 DCHECK(!BrowserThread::IsWellKnownThread(BrowserThread::UI) ||
535 !BrowserThread::CurrentlyOn(BrowserThread::UI));
536 base::AutoLock lock(lock_);
537 if (shutdown_ || !cache_enabled())
538 return;
539 // Should a refresh fail for any reason we consider the database to be
540 // corrupt and unavailable; no further remedial action is possible.
541 set_cache_enabled(cache_db_->Reset() && cache_db_->Refresh(*this));
542 }
543
544 // static
545 void URLIndexPrivateData::InitializeSchemeWhitelistForTesting(
546 std::set<std::string>* whitelist) {
547 InitializeSchemeWhitelist(whitelist);
548 }
55 549
56 // SearchTermCacheItem --------------------------------------------------------- 550 // SearchTermCacheItem ---------------------------------------------------------
57 551
58 URLIndexPrivateData::SearchTermCacheItem::SearchTermCacheItem( 552 URLIndexPrivateData::SearchTermCacheItem::SearchTermCacheItem(
59 const WordIDSet& word_id_set, 553 const WordIDSet& word_id_set,
60 const HistoryIDSet& history_id_set) 554 const HistoryIDSet& history_id_set)
61 : word_id_set_(word_id_set), 555 : word_id_set_(word_id_set),
62 history_id_set_(history_id_set), 556 history_id_set_(history_id_set),
63 used_(true) {} 557 used_(true) {}
64 558
65 URLIndexPrivateData::SearchTermCacheItem::SearchTermCacheItem() 559 URLIndexPrivateData::SearchTermCacheItem::SearchTermCacheItem()
66 : used_(true) {} 560 : used_(true) {}
67 561
68 URLIndexPrivateData::SearchTermCacheItem::~SearchTermCacheItem() {} 562 URLIndexPrivateData::SearchTermCacheItem::~SearchTermCacheItem() {}
69 563
70 // Algorithm Functions --------------------------------------------------------- 564 // URLIndexPrivateData::AddHistoryMatch ----------------------------------------
71 565
72 // Comparison function for sorting search terms by descending length. 566 URLIndexPrivateData::AddHistoryMatch::AddHistoryMatch(
73 bool LengthGreater(const string16& string_a, const string16& string_b) { 567 const URLIndexPrivateData& private_data,
74 return string_a.length() > string_b.length(); 568 const string16& lower_string,
569 const String16Vector& lower_terms,
570 const base::Time now)
571 : private_data_(private_data),
572 lower_string_(lower_string),
573 lower_terms_(lower_terms),
574 now_(now) {}
575
576 URLIndexPrivateData::AddHistoryMatch::~AddHistoryMatch() {}
577
578 void URLIndexPrivateData::AddHistoryMatch::operator()(
579 const HistoryID history_id) {
580 HistoryInfoMap::const_iterator hist_pos =
581 private_data_.history_info_map_.find(history_id);
582 if (hist_pos != private_data_.history_info_map_.end()) {
583 const URLRow& hist_item = hist_pos->second;
584 WordStartsMap::const_iterator starts_pos =
585 private_data_.word_starts_map_.find(history_id);
586 DCHECK(starts_pos != private_data_.word_starts_map_.end());
587 ScoredHistoryMatch match(hist_item, lower_string_, lower_terms_,
588 starts_pos->second, now_);
589 if (match.raw_score > 0)
590 scored_matches_.push_back(match);
591 }
75 } 592 }
76 593
77 // InMemoryURLIndex's Private Data --------------------------------------------- 594 // URLIndexPrivateData::HistoryItemFactorGreater -------------------------------
78 595
79 URLIndexPrivateData::URLIndexPrivateData() 596 URLIndexPrivateData::HistoryItemFactorGreater::HistoryItemFactorGreater(
80 : restored_cache_version_(0), 597 const HistoryInfoMap& history_info_map)
81 saved_cache_version_(kCurrentCacheFileVersion), 598 : history_info_map_(history_info_map) {
599 }
600
601 URLIndexPrivateData::HistoryItemFactorGreater::~HistoryItemFactorGreater() {}
602
603 bool URLIndexPrivateData::HistoryItemFactorGreater::operator()(
604 const HistoryID h1,
605 const HistoryID h2) {
606 HistoryInfoMap::const_iterator entry1(history_info_map_.find(h1));
607 if (entry1 == history_info_map_.end())
608 return false;
609 HistoryInfoMap::const_iterator entry2(history_info_map_.find(h2));
610 if (entry2 == history_info_map_.end())
611 return true;
612 const URLRow& r1(entry1->second);
613 const URLRow& r2(entry2->second);
614 // First cut: typed count, visit count, recency.
615 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks
616 // recently visited (within the last 12/24 hours) as highly important. Get
617 // input from mpearson.
618 if (r1.typed_count() != r2.typed_count())
619 return (r1.typed_count() > r2.typed_count());
620 if (r1.visit_count() != r2.visit_count())
621 return (r1.visit_count() > r2.visit_count());
622 return (r1.last_visit() > r2.last_visit());
623 }
624
625 // URLIndexPrivateData Private Functions ---------------------------------------
626
627 URLIndexPrivateData::URLIndexPrivateData(const URLIndexPrivateData& old_data)
628 : history_dir_(old_data.history_dir_),
629 languages_(old_data.languages_),
630 cache_enabled_(old_data.cache_enabled_),
631 shutdown_(old_data.IsShutdown()),
632 lock_(),
633 scheme_whitelist_(old_data.scheme_whitelist_),
82 pre_filter_item_count_(0), 634 pre_filter_item_count_(0),
83 post_filter_item_count_(0), 635 post_filter_item_count_(0),
84 post_scoring_item_count_(0) { 636 post_scoring_item_count_(0) {
637 cache_db_ = old_data.cache_db_;
638 }
639
640 // Called only by unit tests.
641 URLIndexPrivateData::URLIndexPrivateData()
642 : cache_enabled_(true),
643 shutdown_(false),
644 lock_(),
645 pre_filter_item_count_(0),
646 post_filter_item_count_(0),
647 post_scoring_item_count_(0) {
648 InitializeSchemeWhitelist(&scheme_whitelist_);
85 } 649 }
86 650
87 URLIndexPrivateData::~URLIndexPrivateData() {} 651 URLIndexPrivateData::~URLIndexPrivateData() {}
88 652
653 bool URLIndexPrivateData::IsShutdown() const {
654 base::AutoLock lock(lock_);
655 return shutdown_;
sky 2012/08/22 19:42:34 nit: spacing.
656 }
657
89 void URLIndexPrivateData::Clear() { 658 void URLIndexPrivateData::Clear() {
90 word_list_.clear(); 659 word_list_.clear();
91 available_words_.clear(); 660 available_words_.clear();
92 word_map_.clear(); 661 word_map_.clear();
93 char_word_map_.clear(); 662 char_word_map_.clear();
94 word_id_history_map_.clear(); 663 word_id_history_map_.clear();
95 history_id_word_map_.clear(); 664 history_id_word_map_.clear();
96 history_info_map_.clear(); 665 history_info_map_.clear();
97 word_starts_map_.clear(); 666 word_starts_map_.clear();
98 } 667 }
99 668
100 bool URLIndexPrivateData::Empty() const {
101 return history_info_map_.empty();
102 }
103
104 scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::Duplicate() const {
105 scoped_refptr<URLIndexPrivateData> data_copy = new URLIndexPrivateData;
106 data_copy->word_list_ = word_list_;
107 data_copy->available_words_ = available_words_;
108 data_copy->word_map_ = word_map_;
109 data_copy->char_word_map_ = char_word_map_;
110 data_copy->word_id_history_map_ = word_id_history_map_;
111 data_copy->history_id_word_map_ = history_id_word_map_;
112 data_copy->history_info_map_ = history_info_map_;
113 data_copy->word_starts_map_ = word_starts_map_;
114 return data_copy;
115 // Not copied:
116 // search_term_cache_
117 // pre_filter_item_count_
118 // post_filter_item_count_
119 // post_scoring_item_count_
120 };
121
122 // Cache Updating -------------------------------------------------------------- 669 // Cache Updating --------------------------------------------------------------
123 670
124 bool URLIndexPrivateData::IndexRow( 671 bool URLIndexPrivateData::IndexRow(const URLRow& row) {
125 const URLRow& row,
126 const std::string& languages,
127 const std::set<std::string>& scheme_whitelist) {
128 const GURL& gurl(row.url()); 672 const GURL& gurl(row.url());
129 673
130 // Index only URLs with a whitelisted scheme. 674 // Index only URLs with a whitelisted scheme.
131 if (!URLSchemeIsWhitelisted(gurl, scheme_whitelist)) 675 if (!URLSchemeIsWhitelisted(gurl, scheme_whitelist_))
132 return false; 676 return false;
133 677
134 URLID row_id = row.id();
135 // Strip out username and password before saving and indexing. 678 // Strip out username and password before saving and indexing.
136 string16 url(net::FormatUrl(gurl, languages, 679 string16 url(net::FormatUrl(gurl, languages_,
137 net::kFormatUrlOmitUsernamePassword, 680 net::kFormatUrlOmitUsernamePassword,
138 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS, 681 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS,
139 NULL, NULL, NULL)); 682 NULL, NULL, NULL));
140 683
684 URLID row_id = row.id();
141 HistoryID history_id = static_cast<HistoryID>(row_id); 685 HistoryID history_id = static_cast<HistoryID>(row_id);
686 DCHECK(history_info_map_.find(history_id) == history_info_map_.end());
142 DCHECK_LT(history_id, std::numeric_limits<HistoryID>::max()); 687 DCHECK_LT(history_id, std::numeric_limits<HistoryID>::max());
143 688
144 // Add the row for quick lookup in the history info store. 689 // Add the row for quick lookup in the history info store.
145 URLRow new_row(GURL(url), row_id); 690 URLRow new_row(GURL(url), row_id);
146 new_row.set_visit_count(row.visit_count()); 691 new_row.set_visit_count(row.visit_count());
147 new_row.set_typed_count(row.typed_count()); 692 new_row.set_typed_count(row.typed_count());
148 new_row.set_last_visit(row.last_visit()); 693 new_row.set_last_visit(row.last_visit());
149 new_row.set_title(row.title()); 694 new_row.set_title(row.title());
695
696 CacheTransaction cache_transaction(cache_enabled() ? cache_db_.get() : NULL);
150 history_info_map_[history_id] = new_row; 697 history_info_map_[history_id] = new_row;
698 if (cache_enabled())
699 cache_db_->AddHistoryToURLs(history_id, row);
151 700
152 // Index the words contained in the URL and title of the row. 701 // Index the words contained in the URL and title of the row.
153 RowWordStarts word_starts; 702 RowWordStarts word_starts;
154 AddRowWordsToIndex(new_row, &word_starts, languages); 703 AddRowWordsToIndex(new_row, &word_starts);
155 word_starts_map_[history_id] = word_starts; 704 word_starts_map_[history_id] = word_starts;
705 if (cache_enabled())
706 cache_db_->AddRowWordStarts(history_id, word_starts);
156 return true; 707 return true;
157 } 708 }
158 709
159 void URLIndexPrivateData::AddRowWordsToIndex(const URLRow& row, 710 void URLIndexPrivateData::AddRowWordsToIndex(const URLRow& row,
160 RowWordStarts* word_starts, 711 RowWordStarts* word_starts) {
161 const std::string& languages) {
162 HistoryID history_id = static_cast<HistoryID>(row.id()); 712 HistoryID history_id = static_cast<HistoryID>(row.id());
163 // Split URL into individual, unique words then add in the title words. 713 // Split URL into individual, unique words then add in the title words.
164 const GURL& gurl(row.url()); 714 const GURL& gurl(row.url());
165 string16 url(net::FormatUrl(gurl, languages, 715 string16 url(net::FormatUrl(gurl, languages_,
166 net::kFormatUrlOmitUsernamePassword, 716 net::kFormatUrlOmitUsernamePassword,
167 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS, 717 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS,
168 NULL, NULL, NULL)); 718 NULL, NULL, NULL));
169 url = base::i18n::ToLower(url); 719 url = base::i18n::ToLower(url);
170 String16Set url_words = String16SetFromString16(url, 720 String16Set url_words = String16SetFromString16(url,
171 word_starts ? &word_starts->url_word_starts_ : NULL); 721 word_starts ? &word_starts->url_word_starts_ : NULL);
172 String16Set title_words = String16SetFromString16(row.title(), 722 String16Set title_words = String16SetFromString16(row.title(),
173 word_starts ? &word_starts->title_word_starts_ : NULL); 723 word_starts ? &word_starts->title_word_starts_ : NULL);
174 String16Set words; 724 String16Set words;
175 std::set_union(url_words.begin(), url_words.end(), 725 std::set_union(url_words.begin(), url_words.end(),
(...skipping 12 matching lines...) Expand all
188 if (word_pos != word_map_.end()) 738 if (word_pos != word_map_.end())
189 UpdateWordHistory(word_pos->second, history_id); 739 UpdateWordHistory(word_pos->second, history_id);
190 else 740 else
191 AddWordHistory(term, history_id); 741 AddWordHistory(term, history_id);
192 } 742 }
193 743
194 void URLIndexPrivateData::UpdateWordHistory(WordID word_id, 744 void URLIndexPrivateData::UpdateWordHistory(WordID word_id,
195 HistoryID history_id) { 745 HistoryID history_id) {
196 WordIDHistoryMap::iterator history_pos = word_id_history_map_.find(word_id); 746 WordIDHistoryMap::iterator history_pos = word_id_history_map_.find(word_id);
197 DCHECK(history_pos != word_id_history_map_.end()); 747 DCHECK(history_pos != word_id_history_map_.end());
748 // There is no need to record changes to the word_id_history_map_ in the cache
749 // because it can be rebuilt from the history_id_word_map_'s table.
198 HistoryIDSet& history_id_set(history_pos->second); 750 HistoryIDSet& history_id_set(history_pos->second);
199 history_id_set.insert(history_id); 751 history_id_set.insert(history_id);
200 AddToHistoryIDWordMap(history_id, word_id); 752 AddToHistoryIDWordMap(history_id, word_id);
753
754 // Add word_id/history_id entry to the word_history table
755 if (cache_enabled())
756 cache_db_->AddHistoryToWordHistory(word_id, history_id);
201 } 757 }
202 758
203 void URLIndexPrivateData::AddWordHistory(const string16& term, 759 void URLIndexPrivateData::AddWordHistory(const string16& term,
204 HistoryID history_id) { 760 HistoryID history_id) {
205 WordID word_id = word_list_.size(); 761 WordID word_id = word_list_.size();
206 if (available_words_.empty()) { 762 if (available_words_.empty()) {
207 word_list_.push_back(term); 763 word_list_.push_back(term);
208 } else { 764 } else {
209 word_id = *(available_words_.begin()); 765 word_id = *(available_words_.begin());
210 word_list_[word_id] = term; 766 word_list_[word_id] = term;
211 available_words_.erase(word_id); 767 available_words_.erase(word_id);
212 } 768 }
213 word_map_[term] = word_id; 769 word_map_[term] = word_id;
214 770
771 // Add word_id/term entry to the words table;
772 if (cache_enabled())
773 cache_db_->AddWordToWords(word_id, term);
774
215 HistoryIDSet history_id_set; 775 HistoryIDSet history_id_set;
216 history_id_set.insert(history_id); 776 history_id_set.insert(history_id);
217 word_id_history_map_[word_id] = history_id_set; 777 word_id_history_map_[word_id] = history_id_set;
218 AddToHistoryIDWordMap(history_id, word_id); 778 AddToHistoryIDWordMap(history_id, word_id);
219 779
780 // Add word_id/history_id entry to the word_history table
781 if (cache_enabled())
782 cache_db_->AddHistoryToWordHistory(word_id, history_id);
783
220 // For each character in the newly added word (i.e. a word that is not 784 // For each character in the newly added word (i.e. a word that is not
221 // already in the word index), add the word to the character index. 785 // already in the word index), add the word to the character index.
222 Char16Set characters = Char16SetFromString16(term); 786 Char16Set characters = Char16SetFromString16(term);
223 for (Char16Set::iterator uni_char_iter = characters.begin(); 787 for (Char16Set::iterator uni_char_iter = characters.begin();
224 uni_char_iter != characters.end(); ++uni_char_iter) { 788 uni_char_iter != characters.end(); ++uni_char_iter) {
225 char16 uni_char = *uni_char_iter; 789 char16 uni_char = *uni_char_iter;
226 CharWordIDMap::iterator char_iter = char_word_map_.find(uni_char); 790 CharWordIDMap::iterator char_iter = char_word_map_.find(uni_char);
227 if (char_iter != char_word_map_.end()) { 791 if (char_iter != char_word_map_.end()) {
228 // Update existing entry in the char/word index. 792 // Update existing entry in the char/word index.
229 WordIDSet& word_id_set(char_iter->second); 793 WordIDSet& word_id_set(char_iter->second);
230 word_id_set.insert(word_id); 794 word_id_set.insert(word_id);
231 } else { 795 } else {
232 // Create a new entry in the char/word index. 796 // Create a new entry in the char/word index.
233 WordIDSet word_id_set; 797 WordIDSet word_id_set;
234 word_id_set.insert(word_id); 798 word_id_set.insert(word_id);
235 char_word_map_[uni_char] = word_id_set; 799 char_word_map_[uni_char] = word_id_set;
236 } 800 }
801 // Add uni_char/word_id entry to the char_words database table.
802 if (cache_enabled())
803 cache_db_->AddWordToCharWords(uni_char, word_id);
237 } 804 }
238 } 805 }
239 806
240 void URLIndexPrivateData::RemoveRowFromIndex(const URLRow& row) { 807 void URLIndexPrivateData::RemoveRowFromIndex(const URLRow& row) {
808 CacheTransaction cache_transaction(cache_enabled() ? cache_db_.get() : NULL);
241 RemoveRowWordsFromIndex(row); 809 RemoveRowWordsFromIndex(row);
242 HistoryID history_id = static_cast<HistoryID>(row.id()); 810 HistoryID history_id = static_cast<HistoryID>(row.id());
243 history_info_map_.erase(history_id); 811 history_info_map_.erase(history_id);
244 word_starts_map_.erase(history_id); 812 word_starts_map_.erase(history_id);
813 if (cache_enabled())
814 cache_db_->RemoveHistoryIDFromURLs(history_id);
245 } 815 }
246 816
247 void URLIndexPrivateData::RemoveRowWordsFromIndex(const URLRow& row) { 817 void URLIndexPrivateData::RemoveRowWordsFromIndex(const URLRow& row) {
248 // Remove the entries in history_id_word_map_ and word_id_history_map_ for 818 // Remove the entries in history_id_word_map_ and word_id_history_map_ for
249 // this row. 819 // this row.
250 HistoryID history_id = static_cast<HistoryID>(row.id()); 820 HistoryID history_id = static_cast<HistoryID>(row.id());
251 WordIDSet word_id_set = history_id_word_map_[history_id]; 821 WordIDSet word_id_set = history_id_word_map_[history_id];
252 history_id_word_map_.erase(history_id); 822 history_id_word_map_.erase(history_id);
253 823
254 // Reconcile any changes to word usage. 824 // Reconcile any changes to word usage.
(...skipping 13 matching lines...) Expand all
268 char_word_map_[uni_char].erase(word_id); 838 char_word_map_[uni_char].erase(word_id);
269 if (char_word_map_[uni_char].empty()) 839 if (char_word_map_[uni_char].empty())
270 char_word_map_.erase(uni_char); // No longer in use. 840 char_word_map_.erase(uni_char); // No longer in use.
271 } 841 }
272 842
273 // Complete the removal of references to the word. 843 // Complete the removal of references to the word.
274 word_id_history_map_.erase(word_id); 844 word_id_history_map_.erase(word_id);
275 word_map_.erase(word); 845 word_map_.erase(word);
276 word_list_[word_id] = string16(); 846 word_list_[word_id] = string16();
277 available_words_.insert(word_id); 847 available_words_.insert(word_id);
848 if (cache_enabled())
849 cache_db_->RemoveWordFromWords(word_id);
278 } 850 }
851 if (cache_enabled())
852 cache_db_->RemoveHistoryIDFromWordHistory(history_id);
279 } 853 }
280 854
281 void URLIndexPrivateData::AddToHistoryIDWordMap(HistoryID history_id, 855 void URLIndexPrivateData::AddToHistoryIDWordMap(HistoryID history_id,
282 WordID word_id) { 856 WordID word_id) {
283 HistoryIDWordMap::iterator iter = history_id_word_map_.find(history_id); 857 HistoryIDWordMap::iterator iter = history_id_word_map_.find(history_id);
284 if (iter != history_id_word_map_.end()) { 858 if (iter != history_id_word_map_.end()) {
285 WordIDSet& word_id_set(iter->second); 859 WordIDSet& word_id_set(iter->second);
286 word_id_set.insert(word_id); 860 word_id_set.insert(word_id);
287 } else { 861 } else {
288 WordIDSet word_id_set; 862 WordIDSet word_id_set;
289 word_id_set.insert(word_id); 863 word_id_set.insert(word_id);
290 history_id_word_map_[history_id] = word_id_set; 864 history_id_word_map_[history_id] = word_id_set;
291 } 865 }
292 } 866 }
293 867
294 bool URLIndexPrivateData::UpdateURL(
295 const URLRow& row,
296 const std::string& languages,
297 const std::set<std::string>& scheme_whitelist) {
298 // The row may or may not already be in our index. If it is not already
299 // indexed and it qualifies then it gets indexed. If it is already
300 // indexed and still qualifies then it gets updated, otherwise it
301 // is deleted from the index.
302 bool row_was_updated = false;
303 URLID row_id = row.id();
304 HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id);
305 if (row_pos == history_info_map_.end()) {
306 // This new row should be indexed if it qualifies.
307 URLRow new_row(row);
308 new_row.set_id(row_id);
309 row_was_updated = RowQualifiesAsSignificant(new_row, base::Time()) &&
310 IndexRow(new_row, languages, scheme_whitelist);
311 } else if (RowQualifiesAsSignificant(row, base::Time())) {
312 // This indexed row still qualifies and will be re-indexed.
313 // The url won't have changed but the title, visit count, etc.
314 // might have changed.
315 URLRow& row_to_update = row_pos->second;
316 bool title_updated = row_to_update.title() != row.title();
317 if (row_to_update.visit_count() != row.visit_count() ||
318 row_to_update.typed_count() != row.typed_count() ||
319 row_to_update.last_visit() != row.last_visit() || title_updated) {
320 row_to_update.set_visit_count(row.visit_count());
321 row_to_update.set_typed_count(row.typed_count());
322 row_to_update.set_last_visit(row.last_visit());
323 // While the URL is guaranteed to remain stable, the title may have
324 // changed. If so, then update the index with the changed words.
325 if (title_updated) {
326 // Clear all words associated with this row and re-index both the
327 // URL and title.
328 RemoveRowWordsFromIndex(row_to_update);
329 row_to_update.set_title(row.title());
330 RowWordStarts word_starts;
331 AddRowWordsToIndex(row_to_update, &word_starts, languages);
332 word_starts_map_[row_id] = word_starts;
333 }
334 row_was_updated = true;
335 }
336 } else {
337 // This indexed row no longer qualifies and will be de-indexed by
338 // clearing all words associated with this row.
339 RemoveRowFromIndex(row);
340 row_was_updated = true;
341 }
342 if (row_was_updated)
343 search_term_cache_.clear(); // This invalidates the cache.
344 return row_was_updated;
345 }
346
347 // Helper functor for DeleteURL.
348 class HistoryInfoMapItemHasURL {
349 public:
350 explicit HistoryInfoMapItemHasURL(const GURL& url): url_(url) {}
351
352 bool operator()(const std::pair<const HistoryID, URLRow>& item) {
353 return item.second.url() == url_;
354 }
355
356 private:
357 const GURL& url_;
358 };
359
360 bool URLIndexPrivateData::DeleteURL(const GURL& url) {
361 // Find the matching entry in the history_info_map_.
362 HistoryInfoMap::iterator pos = std::find_if(
363 history_info_map_.begin(),
364 history_info_map_.end(),
365 HistoryInfoMapItemHasURL(url));
366 if (pos == history_info_map_.end())
367 return false;
368 RemoveRowFromIndex(pos->second);
369 search_term_cache_.clear(); // This invalidates the cache.
370 return true;
371 }
372
373 // URLIndexPrivateData::HistoryItemFactorGreater -------------------------------
374
375 URLIndexPrivateData::HistoryItemFactorGreater::HistoryItemFactorGreater(
376 const HistoryInfoMap& history_info_map)
377 : history_info_map_(history_info_map) {
378 }
379
380 URLIndexPrivateData::HistoryItemFactorGreater::~HistoryItemFactorGreater() {}
381
382 bool URLIndexPrivateData::HistoryItemFactorGreater::operator()(
383 const HistoryID h1,
384 const HistoryID h2) {
385 HistoryInfoMap::const_iterator entry1(history_info_map_.find(h1));
386 if (entry1 == history_info_map_.end())
387 return false;
388 HistoryInfoMap::const_iterator entry2(history_info_map_.find(h2));
389 if (entry2 == history_info_map_.end())
390 return true;
391 const URLRow& r1(entry1->second);
392 const URLRow& r2(entry2->second);
393 // First cut: typed count, visit count, recency.
394 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks
395 // recently visited (within the last 12/24 hours) as highly important. Get
396 // input from mpearson.
397 if (r1.typed_count() != r2.typed_count())
398 return (r1.typed_count() > r2.typed_count());
399 if (r1.visit_count() != r2.visit_count())
400 return (r1.visit_count() > r2.visit_count());
401 return (r1.last_visit() > r2.last_visit());
402 }
403
404 // Cache Searching -------------------------------------------------------------
405
406 // NOTE: This is the main public search function.
407 ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms(
408 const string16& search_string) {
409 pre_filter_item_count_ = 0;
410 post_filter_item_count_ = 0;
411 post_scoring_item_count_ = 0;
412 // The search string we receive may contain escaped characters. For reducing
413 // the index we need individual, lower-cased words, ignoring escapings. For
414 // the final filtering we need whitespace separated substrings possibly
415 // containing escaped characters.
416 string16 lower_raw_string(base::i18n::ToLower(search_string));
417 string16 lower_unescaped_string =
418 net::UnescapeURLComponent(lower_raw_string,
419 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS);
420 // Extract individual 'words' (as opposed to 'terms'; see below) from the
421 // search string. When the user types "colspec=ID%20Mstone Release" we get
422 // four 'words': "colspec", "id", "mstone" and "release".
423 String16Vector lower_words(
424 history::String16VectorFromString16(lower_unescaped_string, false, NULL));
425 ScoredHistoryMatches scored_items;
426
427 // Do nothing if we have indexed no words (probably because we've not been
428 // initialized yet) or the search string has no words.
429 if (word_list_.empty() || lower_words.empty()) {
430 search_term_cache_.clear(); // Invalidate the term cache.
431 return scored_items;
432 }
433
434 // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep
435 // approach.
436 ResetSearchTermCache();
437
438 HistoryIDSet history_id_set = HistoryIDSetFromWords(lower_words);
439
440 // Trim the candidate pool if it is large. Note that we do not filter out
441 // items that do not contain the search terms as proper substrings -- doing
442 // so is the performance-costly operation we are trying to avoid in order
443 // to maintain omnibox responsiveness.
444 const size_t kItemsToScoreLimit = 500;
445 pre_filter_item_count_ = history_id_set.size();
446 // If we trim the results set we do not want to cache the results for next
447 // time as the user's ultimately desired result could easily be eliminated
448 // in this early rough filter.
449 bool was_trimmed = (pre_filter_item_count_ > kItemsToScoreLimit);
450 if (was_trimmed) {
451 HistoryIDVector history_ids;
452 std::copy(history_id_set.begin(), history_id_set.end(),
453 std::back_inserter(history_ids));
454 // Trim down the set by sorting by typed-count, visit-count, and last
455 // visit.
456 HistoryItemFactorGreater
457 item_factor_functor(history_info_map_);
458 std::partial_sort(history_ids.begin(),
459 history_ids.begin() + kItemsToScoreLimit,
460 history_ids.end(),
461 item_factor_functor);
462 history_id_set.clear();
463 std::copy(history_ids.begin(), history_ids.begin() + kItemsToScoreLimit,
464 std::inserter(history_id_set, history_id_set.end()));
465 post_filter_item_count_ = history_id_set.size();
466 }
467
468 // Pass over all of the candidates filtering out any without a proper
469 // substring match, inserting those which pass in order by score. Note that
470 // in this step we are using the raw search string complete with escaped
471 // URL elements. When the user has specifically typed something akin to
472 // "sort=pri&colspec=ID%20Mstone%20Release" we want to make sure that that
473 // specific substring appears in the URL or page title.
474
475 // We call these 'terms' (as opposed to 'words'; see above) as in this case
476 // we only want to break up the search string on 'true' whitespace rather than
477 // escaped whitespace. When the user types "colspec=ID%20Mstone Release" we
478 // get two 'terms': "colspec=id%20mstone" and "release".
479 history::String16Vector lower_raw_terms;
480 Tokenize(lower_raw_string, kWhitespaceUTF16, &lower_raw_terms);
481 scored_items = std::for_each(history_id_set.begin(), history_id_set.end(),
482 AddHistoryMatch(*this, lower_raw_string,
483 lower_raw_terms, base::Time::Now())).ScoredMatches();
484
485 // Select and sort only the top kMaxMatches results.
486 if (scored_items.size() > AutocompleteProvider::kMaxMatches) {
487 std::partial_sort(scored_items.begin(),
488 scored_items.begin() +
489 AutocompleteProvider::kMaxMatches,
490 scored_items.end(),
491 ScoredHistoryMatch::MatchScoreGreater);
492 scored_items.resize(AutocompleteProvider::kMaxMatches);
493 } else {
494 std::sort(scored_items.begin(), scored_items.end(),
495 ScoredHistoryMatch::MatchScoreGreater);
496 }
497 post_scoring_item_count_ = scored_items.size();
498
499 if (was_trimmed) {
500 search_term_cache_.clear(); // Invalidate the term cache.
501 } else {
502 // Remove any stale SearchTermCacheItems.
503 for (SearchTermCacheMap::iterator cache_iter = search_term_cache_.begin();
504 cache_iter != search_term_cache_.end(); ) {
505 if (!cache_iter->second.used_)
506 search_term_cache_.erase(cache_iter++);
507 else
508 ++cache_iter;
509 }
510 }
511
512 return scored_items;
513 }
514
515 // URLIndexPrivateData::AddHistoryMatch ----------------------------------------
516
517 URLIndexPrivateData::AddHistoryMatch::AddHistoryMatch(
518 const URLIndexPrivateData& private_data,
519 const string16& lower_string,
520 const String16Vector& lower_terms,
521 const base::Time now)
522 : private_data_(private_data),
523 lower_string_(lower_string),
524 lower_terms_(lower_terms),
525 now_(now) {}
526
527 URLIndexPrivateData::AddHistoryMatch::~AddHistoryMatch() {}
528
529 void URLIndexPrivateData::AddHistoryMatch::operator()(
530 const HistoryID history_id) {
531 HistoryInfoMap::const_iterator hist_pos =
532 private_data_.history_info_map_.find(history_id);
533 if (hist_pos != private_data_.history_info_map_.end()) {
534 const URLRow& hist_item = hist_pos->second;
535 WordStartsMap::const_iterator starts_pos =
536 private_data_.word_starts_map_.find(history_id);
537 DCHECK(starts_pos != private_data_.word_starts_map_.end());
538 ScoredHistoryMatch match(hist_item, lower_string_, lower_terms_,
539 starts_pos->second, now_);
540 if (match.raw_score > 0)
541 scored_matches_.push_back(match);
542 }
543 }
544
545 void URLIndexPrivateData::ResetSearchTermCache() { 868 void URLIndexPrivateData::ResetSearchTermCache() {
546 for (SearchTermCacheMap::iterator iter = search_term_cache_.begin(); 869 for (SearchTermCacheMap::iterator iter = search_term_cache_.begin();
547 iter != search_term_cache_.end(); ++iter) 870 iter != search_term_cache_.end(); ++iter)
548 iter->second.used_ = false; 871 iter->second.used_ = false;
549 } 872 }
550 873
551 HistoryIDSet URLIndexPrivateData::HistoryIDSetFromWords( 874 HistoryIDSet URLIndexPrivateData::HistoryIDSetFromWords(
552 const String16Vector& unsorted_words) { 875 const String16Vector& unsorted_words) {
553 // Break the terms down into individual terms (words), get the candidate 876 // Break the terms down into individual terms (words), get the candidate
554 // set for each term, and intersect each to get a final candidate list. 877 // set for each term, and intersect each to get a final candidate list.
(...skipping 168 matching lines...) Expand 10 before | Expand all | Expand 10 after
723 std::set_intersection(word_id_set.begin(), word_id_set.end(), 1046 std::set_intersection(word_id_set.begin(), word_id_set.end(),
724 char_word_id_set.begin(), char_word_id_set.end(), 1047 char_word_id_set.begin(), char_word_id_set.end(),
725 std::inserter(new_word_id_set, 1048 std::inserter(new_word_id_set,
726 new_word_id_set.begin())); 1049 new_word_id_set.begin()));
727 word_id_set.swap(new_word_id_set); 1050 word_id_set.swap(new_word_id_set);
728 } 1051 }
729 } 1052 }
730 return word_id_set; 1053 return word_id_set;
731 } 1054 }
732 1055
733 // Cache Saving ---------------------------------------------------------------- 1056 bool URLIndexPrivateData::RestoreFromCache(InMemoryURLCacheDatabase* cache_db) {
734 1057 DCHECK(cache_db);
735 // static 1058 if (!cache_db->RestorePrivateData(this)) {
736 void URLIndexPrivateData::WritePrivateDataToCacheFileTask( 1059 cache_db->Reset();
737 scoped_refptr<URLIndexPrivateData> private_data, 1060 return false;
738 const FilePath& file_path, 1061 }
739 scoped_refptr<RefCountedBool> succeeded) { 1062 return !Empty() && ValidateConsistency();
740 DCHECK(private_data.get());
741 DCHECK(!file_path.empty());
742 succeeded->set_value(private_data->SaveToFile(file_path));
743 } 1063 }
744 1064
745 bool URLIndexPrivateData::SaveToFile(const FilePath& file_path) { 1065 void URLIndexPrivateData::DeleteOldVersionCacheFile() const {
746 base::TimeTicks beginning_time = base::TimeTicks::Now(); 1066 if (history_dir_.empty())
747 InMemoryURLIndexCacheItem index_cache; 1067 return;
748 SavePrivateData(&index_cache); 1068 FilePath path = history_dir_.Append(chrome::kHQPCacheFilename);
749 std::string data; 1069 file_util::Delete(path, false);
750 if (!index_cache.SerializeToString(&data)) { 1070 }
751 LOG(WARNING) << "Failed to serialize the InMemoryURLIndex cache."; 1071
1072 bool URLIndexPrivateData::GetCacheDBPath(FilePath* file_path) {
1073 DCHECK(file_path);
1074 if (history_dir_.empty())
752 return false; 1075 return false;
753 } 1076 *file_path = history_dir_.Append(chrome::kHQPCacheDBFilename);
754
755 int size = data.size();
756 if (file_util::WriteFile(file_path, data.c_str(), size) != size) {
757 LOG(WARNING) << "Failed to write " << file_path.value();
758 return false;
759 }
760 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexSaveCacheTime",
761 base::TimeTicks::Now() - beginning_time);
762 return true; 1077 return true;
763 } 1078 }
764 1079
765 void URLIndexPrivateData::SavePrivateData( 1080 void URLIndexPrivateData::SetCacheDatabaseForTesting(
766 InMemoryURLIndexCacheItem* cache) const { 1081 InMemoryURLCacheDatabase* test_db) {
767 DCHECK(cache); 1082 cache_db_ = test_db;
768 cache->set_timestamp(base::Time::Now().ToInternalValue());
769 cache->set_version(saved_cache_version_);
770 // history_item_count_ is no longer used but rather than change the protobuf
771 // definition use a placeholder. This will go away with the switch to SQLite.
772 cache->set_history_item_count(0);
773 SaveWordList(cache);
774 SaveWordMap(cache);
775 SaveCharWordMap(cache);
776 SaveWordIDHistoryMap(cache);
777 SaveHistoryInfoMap(cache);
778 SaveWordStartsMap(cache);
779 }
780
781 void URLIndexPrivateData::SaveWordList(InMemoryURLIndexCacheItem* cache) const {
782 if (word_list_.empty())
783 return;
784 WordListItem* list_item = cache->mutable_word_list();
785 list_item->set_word_count(word_list_.size());
786 for (String16Vector::const_iterator iter = word_list_.begin();
787 iter != word_list_.end(); ++iter)
788 list_item->add_word(UTF16ToUTF8(*iter));
789 }
790
791 void URLIndexPrivateData::SaveWordMap(InMemoryURLIndexCacheItem* cache) const {
792 if (word_map_.empty())
793 return;
794 WordMapItem* map_item = cache->mutable_word_map();
795 map_item->set_item_count(word_map_.size());
796 for (WordMap::const_iterator iter = word_map_.begin();
797 iter != word_map_.end(); ++iter) {
798 WordMapEntry* map_entry = map_item->add_word_map_entry();
799 map_entry->set_word(UTF16ToUTF8(iter->first));
800 map_entry->set_word_id(iter->second);
801 }
802 }
803
804 void URLIndexPrivateData::SaveCharWordMap(
805 InMemoryURLIndexCacheItem* cache) const {
806 if (char_word_map_.empty())
807 return;
808 CharWordMapItem* map_item = cache->mutable_char_word_map();
809 map_item->set_item_count(char_word_map_.size());
810 for (CharWordIDMap::const_iterator iter = char_word_map_.begin();
811 iter != char_word_map_.end(); ++iter) {
812 CharWordMapEntry* map_entry = map_item->add_char_word_map_entry();
813 map_entry->set_char_16(iter->first);
814 const WordIDSet& word_id_set(iter->second);
815 map_entry->set_item_count(word_id_set.size());
816 for (WordIDSet::const_iterator set_iter = word_id_set.begin();
817 set_iter != word_id_set.end(); ++set_iter)
818 map_entry->add_word_id(*set_iter);
819 }
820 }
821
822 void URLIndexPrivateData::SaveWordIDHistoryMap(
823 InMemoryURLIndexCacheItem* cache) const {
824 if (word_id_history_map_.empty())
825 return;
826 WordIDHistoryMapItem* map_item = cache->mutable_word_id_history_map();
827 map_item->set_item_count(word_id_history_map_.size());
828 for (WordIDHistoryMap::const_iterator iter = word_id_history_map_.begin();
829 iter != word_id_history_map_.end(); ++iter) {
830 WordIDHistoryMapEntry* map_entry =
831 map_item->add_word_id_history_map_entry();
832 map_entry->set_word_id(iter->first);
833 const HistoryIDSet& history_id_set(iter->second);
834 map_entry->set_item_count(history_id_set.size());
835 for (HistoryIDSet::const_iterator set_iter = history_id_set.begin();
836 set_iter != history_id_set.end(); ++set_iter)
837 map_entry->add_history_id(*set_iter);
838 }
839 }
840
841 void URLIndexPrivateData::SaveHistoryInfoMap(
842 InMemoryURLIndexCacheItem* cache) const {
843 if (history_info_map_.empty())
844 return;
845 HistoryInfoMapItem* map_item = cache->mutable_history_info_map();
846 map_item->set_item_count(history_info_map_.size());
847 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin();
848 iter != history_info_map_.end(); ++iter) {
849 HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry();
850 map_entry->set_history_id(iter->first);
851 const URLRow& url_row(iter->second);
852 // Note: We only save information that contributes to the index so there
853 // is no need to save search_term_cache_ (not persistent).
854 map_entry->set_visit_count(url_row.visit_count());
855 map_entry->set_typed_count(url_row.typed_count());
856 map_entry->set_last_visit(url_row.last_visit().ToInternalValue());
857 map_entry->set_url(url_row.url().spec());
858 map_entry->set_title(UTF16ToUTF8(url_row.title()));
859 }
860 }
861
862 void URLIndexPrivateData::SaveWordStartsMap(
863 InMemoryURLIndexCacheItem* cache) const {
864 if (word_starts_map_.empty())
865 return;
866 // For unit testing: Enable saving of the cache as an earlier version to
867 // allow testing of cache file upgrading in ReadFromFile().
868 // TODO(mrossetti): Instead of intruding on production code with this kind of
869 // test harness, save a copy of an older version cache with known results.
870 // Implement this when switching the caching over to SQLite.
871 if (saved_cache_version_ < 1)
872 return;
873
874 WordStartsMapItem* map_item = cache->mutable_word_starts_map();
875 map_item->set_item_count(word_starts_map_.size());
876 for (WordStartsMap::const_iterator iter = word_starts_map_.begin();
877 iter != word_starts_map_.end(); ++iter) {
878 WordStartsMapEntry* map_entry = map_item->add_word_starts_map_entry();
879 map_entry->set_history_id(iter->first);
880 const RowWordStarts& word_starts(iter->second);
881 for (WordStarts::const_iterator i = word_starts.url_word_starts_.begin();
882 i != word_starts.url_word_starts_.end(); ++i)
883 map_entry->add_url_word_starts(*i);
884 for (WordStarts::const_iterator i = word_starts.title_word_starts_.begin();
885 i != word_starts.title_word_starts_.end(); ++i)
886 map_entry->add_title_word_starts(*i);
887 }
888 }
889
890 // Cache Restoring -------------------------------------------------------------
891
892 // static
893 void URLIndexPrivateData::RestoreFromFileTask(
894 const FilePath& file_path,
895 scoped_refptr<URLIndexPrivateData> private_data,
896 const std::string& languages) {
897 private_data = URLIndexPrivateData::RestoreFromFile(file_path, languages);
898 }
899
900 // static
901 scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::RestoreFromFile(
902 const FilePath& file_path,
903 const std::string& languages) {
904 base::TimeTicks beginning_time = base::TimeTicks::Now();
905 if (!file_util::PathExists(file_path))
906 return NULL;
907 std::string data;
908 // If there is no cache file then simply give up. This will cause us to
909 // attempt to rebuild from the history database.
910 if (!file_util::ReadFileToString(file_path, &data))
911 return NULL;
912
913 scoped_refptr<URLIndexPrivateData> restored_data(new URLIndexPrivateData);
914 InMemoryURLIndexCacheItem index_cache;
915 if (!index_cache.ParseFromArray(data.c_str(), data.size())) {
916 LOG(WARNING) << "Failed to parse URLIndexPrivateData cache data read from "
917 << file_path.value();
918 return restored_data;
919 }
920
921 if (!restored_data->RestorePrivateData(index_cache, languages))
922 return NULL;
923
924 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime",
925 base::TimeTicks::Now() - beginning_time);
926 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems",
927 restored_data->history_id_word_map_.size());
928 UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size());
929 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords",
930 restored_data->word_map_.size());
931 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars",
932 restored_data->char_word_map_.size());
933 if (restored_data->Empty())
934 return NULL; // 'No data' is the same as a failed reload.
935 return restored_data;
936 }
937
938 // static
939 scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::RebuildFromHistory(
940 HistoryDatabase* history_db,
941 const std::string& languages,
942 const std::set<std::string>& scheme_whitelist) {
943 if (!history_db)
944 return NULL;
945
946 base::TimeTicks beginning_time = base::TimeTicks::Now();
947
948 scoped_refptr<URLIndexPrivateData> rebuilt_data(new URLIndexPrivateData);
949 URLDatabase::URLEnumerator history_enum;
950 if (!history_db->InitURLEnumeratorForSignificant(&history_enum))
951 return NULL;
952 for (URLRow row; history_enum.GetNextURL(&row); )
953 rebuilt_data->IndexRow(row, languages, scheme_whitelist);
954
955 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime",
956 base::TimeTicks::Now() - beginning_time);
957 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems",
958 rebuilt_data->history_id_word_map_.size());
959 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords",
960 rebuilt_data->word_map_.size());
961 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars",
962 rebuilt_data->char_word_map_.size());
963 return rebuilt_data;
964 }
965
966 bool URLIndexPrivateData::RestorePrivateData(
967 const InMemoryURLIndexCacheItem& cache,
968 const std::string& languages) {
969 if (cache.has_version())
970 restored_cache_version_ = cache.version();
971 return RestoreWordList(cache) && RestoreWordMap(cache) &&
972 RestoreCharWordMap(cache) && RestoreWordIDHistoryMap(cache) &&
973 RestoreHistoryInfoMap(cache) && RestoreWordStartsMap(cache, languages);
974 }
975
976 bool URLIndexPrivateData::RestoreWordList(
977 const InMemoryURLIndexCacheItem& cache) {
978 if (!cache.has_word_list())
979 return false;
980 const WordListItem& list_item(cache.word_list());
981 uint32 expected_item_count = list_item.word_count();
982 uint32 actual_item_count = list_item.word_size();
983 if (actual_item_count == 0 || actual_item_count != expected_item_count)
984 return false;
985 const RepeatedPtrField<std::string>& words(list_item.word());
986 for (RepeatedPtrField<std::string>::const_iterator iter = words.begin();
987 iter != words.end(); ++iter)
988 word_list_.push_back(UTF8ToUTF16(*iter));
989 return true;
990 }
991
992 bool URLIndexPrivateData::RestoreWordMap(
993 const InMemoryURLIndexCacheItem& cache) {
994 if (!cache.has_word_map())
995 return false;
996 const WordMapItem& list_item(cache.word_map());
997 uint32 expected_item_count = list_item.item_count();
998 uint32 actual_item_count = list_item.word_map_entry_size();
999 if (actual_item_count == 0 || actual_item_count != expected_item_count)
1000 return false;
1001 const RepeatedPtrField<WordMapEntry>& entries(list_item.word_map_entry());
1002 for (RepeatedPtrField<WordMapEntry>::const_iterator iter = entries.begin();
1003 iter != entries.end(); ++iter)
1004 word_map_[UTF8ToUTF16(iter->word())] = iter->word_id();
1005 return true;
1006 }
1007
1008 bool URLIndexPrivateData::RestoreCharWordMap(
1009 const InMemoryURLIndexCacheItem& cache) {
1010 if (!cache.has_char_word_map())
1011 return false;
1012 const CharWordMapItem& list_item(cache.char_word_map());
1013 uint32 expected_item_count = list_item.item_count();
1014 uint32 actual_item_count = list_item.char_word_map_entry_size();
1015 if (actual_item_count == 0 || actual_item_count != expected_item_count)
1016 return false;
1017 const RepeatedPtrField<CharWordMapEntry>&
1018 entries(list_item.char_word_map_entry());
1019 for (RepeatedPtrField<CharWordMapEntry>::const_iterator iter =
1020 entries.begin(); iter != entries.end(); ++iter) {
1021 expected_item_count = iter->item_count();
1022 actual_item_count = iter->word_id_size();
1023 if (actual_item_count == 0 || actual_item_count != expected_item_count)
1024 return false;
1025 char16 uni_char = static_cast<char16>(iter->char_16());
1026 WordIDSet word_id_set;
1027 const RepeatedField<int32>& word_ids(iter->word_id());
1028 for (RepeatedField<int32>::const_iterator jiter = word_ids.begin();
1029 jiter != word_ids.end(); ++jiter)
1030 word_id_set.insert(*jiter);
1031 char_word_map_[uni_char] = word_id_set;
1032 }
1033 return true;
1034 }
1035
1036 bool URLIndexPrivateData::RestoreWordIDHistoryMap(
1037 const InMemoryURLIndexCacheItem& cache) {
1038 if (!cache.has_word_id_history_map())
1039 return false;
1040 const WordIDHistoryMapItem& list_item(cache.word_id_history_map());
1041 uint32 expected_item_count = list_item.item_count();
1042 uint32 actual_item_count = list_item.word_id_history_map_entry_size();
1043 if (actual_item_count == 0 || actual_item_count != expected_item_count)
1044 return false;
1045 const RepeatedPtrField<WordIDHistoryMapEntry>&
1046 entries(list_item.word_id_history_map_entry());
1047 for (RepeatedPtrField<WordIDHistoryMapEntry>::const_iterator iter =
1048 entries.begin(); iter != entries.end(); ++iter) {
1049 expected_item_count = iter->item_count();
1050 actual_item_count = iter->history_id_size();
1051 if (actual_item_count == 0 || actual_item_count != expected_item_count)
1052 return false;
1053 WordID word_id = iter->word_id();
1054 HistoryIDSet history_id_set;
1055 const RepeatedField<int64>& history_ids(iter->history_id());
1056 for (RepeatedField<int64>::const_iterator jiter = history_ids.begin();
1057 jiter != history_ids.end(); ++jiter) {
1058 history_id_set.insert(*jiter);
1059 AddToHistoryIDWordMap(*jiter, word_id);
1060 }
1061 word_id_history_map_[word_id] = history_id_set;
1062 }
1063 return true;
1064 }
1065
1066 bool URLIndexPrivateData::RestoreHistoryInfoMap(
1067 const InMemoryURLIndexCacheItem& cache) {
1068 if (!cache.has_history_info_map())
1069 return false;
1070 const HistoryInfoMapItem& list_item(cache.history_info_map());
1071 uint32 expected_item_count = list_item.item_count();
1072 uint32 actual_item_count = list_item.history_info_map_entry_size();
1073 if (actual_item_count == 0 || actual_item_count != expected_item_count)
1074 return false;
1075 const RepeatedPtrField<HistoryInfoMapEntry>&
1076 entries(list_item.history_info_map_entry());
1077 for (RepeatedPtrField<HistoryInfoMapEntry>::const_iterator iter =
1078 entries.begin(); iter != entries.end(); ++iter) {
1079 HistoryID history_id = iter->history_id();
1080 GURL url(iter->url());
1081 URLRow url_row(url, history_id);
1082 url_row.set_visit_count(iter->visit_count());
1083 url_row.set_typed_count(iter->typed_count());
1084 url_row.set_last_visit(base::Time::FromInternalValue(iter->last_visit()));
1085 if (iter->has_title()) {
1086 string16 title(UTF8ToUTF16(iter->title()));
1087 url_row.set_title(title);
1088 }
1089 history_info_map_[history_id] = url_row;
1090 }
1091 return true;
1092 }
1093
1094 bool URLIndexPrivateData::RestoreWordStartsMap(
1095 const InMemoryURLIndexCacheItem& cache,
1096 const std::string& languages) {
1097 // Note that this function must be called after RestoreHistoryInfoMap() has
1098 // been run as the word starts may have to be recalculated from the urls and
1099 // page titles.
1100 if (cache.has_word_starts_map()) {
1101 const WordStartsMapItem& list_item(cache.word_starts_map());
1102 uint32 expected_item_count = list_item.item_count();
1103 uint32 actual_item_count = list_item.word_starts_map_entry_size();
1104 if (actual_item_count == 0 || actual_item_count != expected_item_count)
1105 return false;
1106 const RepeatedPtrField<WordStartsMapEntry>&
1107 entries(list_item.word_starts_map_entry());
1108 for (RepeatedPtrField<WordStartsMapEntry>::const_iterator iter =
1109 entries.begin(); iter != entries.end(); ++iter) {
1110 HistoryID history_id = iter->history_id();
1111 RowWordStarts word_starts;
1112 // Restore the URL word starts.
1113 const RepeatedField<int32>& url_starts(iter->url_word_starts());
1114 for (RepeatedField<int32>::const_iterator jiter = url_starts.begin();
1115 jiter != url_starts.end(); ++jiter)
1116 word_starts.url_word_starts_.push_back(*jiter);
1117 // Restore the page title word starts.
1118 const RepeatedField<int32>& title_starts(iter->title_word_starts());
1119 for (RepeatedField<int32>::const_iterator jiter = title_starts.begin();
1120 jiter != title_starts.end(); ++jiter)
1121 word_starts.title_word_starts_.push_back(*jiter);
1122 word_starts_map_[history_id] = word_starts;
1123 }
1124 } else {
1125 // Since the cache did not contain any word starts we must rebuild then from
1126 // the URL and page titles.
1127 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin();
1128 iter != history_info_map_.end(); ++iter) {
1129 RowWordStarts word_starts;
1130 const URLRow& row(iter->second);
1131 string16 url(net::FormatUrl(row.url(), languages,
1132 net::kFormatUrlOmitUsernamePassword,
1133 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS,
1134 NULL, NULL, NULL));
1135 url = base::i18n::ToLower(url);
1136 String16VectorFromString16(url, false, &word_starts.url_word_starts_);
1137 String16VectorFromString16(
1138 row.title(), false, &word_starts.title_word_starts_);
1139 word_starts_map_[iter->first] = word_starts;
1140 }
1141 }
1142 return true;
1143 } 1083 }
1144 1084
1145 // static 1085 // static
1146 bool URLIndexPrivateData::URLSchemeIsWhitelisted( 1086 bool URLIndexPrivateData::URLSchemeIsWhitelisted(
1147 const GURL& gurl, 1087 const GURL& gurl,
1148 const std::set<std::string>& whitelist) { 1088 const std::set<std::string>& whitelist) {
1149 return whitelist.find(gurl.scheme()) != whitelist.end(); 1089 return whitelist.find(gurl.scheme()) != whitelist.end();
1150 } 1090 }
1151 1091
1152 } // namespace history 1092 } // namespace history
OLDNEW
« no previous file with comments | « chrome/browser/history/url_index_private_data.h ('k') | chrome/browser/history/url_index_private_data_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698