chrome/browser/history/url_index_private_data.cc - Issue 10837244: Replace HistoryQuickProvider protobuf-based caching with an SQLite-based database.

Side by Side Diff: chrome/browser/history/url_index_private_data.cc

Issue 10837244: Replace HistoryQuickProvider protobuf-based caching with an SQLite-based database. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/

Patch Set: Tweak suppression. Created 8 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "chrome/browser/history/url_index_private_data.h"	5 #include "chrome/browser/history/url_index_private_data.h"

6	6

7 #include <algorithm>	7 #include <algorithm>

8 #include <functional>	8 #include <functional>

9 #include <iterator>	9 #include <iterator>

10 #include <limits>	10 #include <limits>

11 #include <numeric>	11 #include <numeric>

12 #include <vector>	12 #include <vector>

13	13

14 #include "base/basictypes.h"	14 #include "base/basictypes.h"

15 #include "base/file_util.h"	15 #include "base/file_util.h"

16 #include "base/i18n/case_conversion.h"	16 #include "base/i18n/case_conversion.h"

17 #include "base/metrics/histogram.h"

18 #include "base/string_util.h"	17 #include "base/string_util.h"

19 #include "base/time.h"

20 #include "base/utf_string_conversions.h"	18 #include "base/utf_string_conversions.h"

21 #include "chrome/browser/autocomplete/autocomplete_provider.h"	19 #include "chrome/browser/autocomplete/autocomplete_provider.h"

22 #include "chrome/browser/autocomplete/url_prefix.h"

23 #include "chrome/browser/history/history_database.h"	20 #include "chrome/browser/history/history_database.h"

24 #include "chrome/browser/history/in_memory_url_index.h"	21 #include "chrome/browser/history/in_memory_url_cache_database.h"

	22 #include "chrome/common/chrome_constants.h"

	23 #include "chrome/common/url_constants.h"

25 #include "content/public/browser/browser_thread.h"	24 #include "content/public/browser/browser_thread.h"

26 #include "content/public/browser/notification_details.h"	25 #include "content/public/browser/notification_details.h"

27 #include "content/public/browser/notification_service.h"	26 #include "content/public/browser/notification_service.h"

28 #include "content/public/browser/notification_source.h"	27 #include "content/public/browser/notification_source.h"

29 #include "net/base/net_util.h"	28 #include "net/base/net_util.h"

30 #include "third_party/protobuf/src/google/protobuf/repeated_field.h"	29

31	30 using content::BrowserThread;

32 using google::protobuf::RepeatedField;

33 using google::protobuf::RepeatedPtrField;

34 using in_memory_url_index::InMemoryURLIndexCacheItem;

35	31

36 namespace history {	32 namespace history {

37	33

38 typedef imui::InMemoryURLIndexCacheItem_WordListItem WordListItem;	34 // Comparison function for sorting search terms by descending length.

39 typedef imui::InMemoryURLIndexCacheItem_WordMapItem_WordMapEntry WordMapEntry;	35 bool LengthGreater(const string16& string_a, const string16& string_b) {

40 typedef imui::InMemoryURLIndexCacheItem_WordMapItem WordMapItem;	36 return string_a.length() > string_b.length();

41 typedef imui::InMemoryURLIndexCacheItem_CharWordMapItem CharWordMapItem;	37 }

42 typedef imui::InMemoryURLIndexCacheItem_CharWordMapItem_CharWordMapEntry	38

43 CharWordMapEntry;	39 // Initializes a whitelist of URL schemes.

44 typedef imui::InMemoryURLIndexCacheItem_WordIDHistoryMapItem	40 void InitializeSchemeWhitelist(std::set<std::string>* whitelist) {

45 WordIDHistoryMapItem;	41 DCHECK(whitelist);

46 typedef imui::	42 if (!whitelist->empty())

47 InMemoryURLIndexCacheItem_WordIDHistoryMapItem_WordIDHistoryMapEntry	43 return; // Nothing to do, already initialized.

48 WordIDHistoryMapEntry;	44 whitelist->insert(std::string(chrome::kAboutScheme));

49 typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem HistoryInfoMapItem;	45 whitelist->insert(std::string(chrome::kChromeUIScheme));

50 typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem_HistoryInfoMapEntry	46 whitelist->insert(std::string(chrome::kFileScheme));

51 HistoryInfoMapEntry;	47 whitelist->insert(std::string(chrome::kFtpScheme));

52 typedef imui::InMemoryURLIndexCacheItem_WordStartsMapItem WordStartsMapItem;	48 whitelist->insert(std::string(chrome::kHttpScheme));

53 typedef imui::InMemoryURLIndexCacheItem_WordStartsMapItem_WordStartsMapEntry	49 whitelist->insert(std::string(chrome::kHttpsScheme));

54 WordStartsMapEntry;	50 whitelist->insert(std::string(chrome::kMailToScheme));

	51 }

	52

	53 // CacheTransaction ------------------------------------------------------------

	54

	55 // Simple automatic helper class encapsulating calls to BeginTransaction/

	56 // CommitTransaction.

	57 class CacheTransaction {

	58 public:

	59 explicit CacheTransaction(InMemoryURLCacheDatabase* db);

	60 ~CacheTransaction();

	61

	62 private:

	63 InMemoryURLCacheDatabase* db_;

	64

	65 DISALLOW_COPY_AND_ASSIGN(CacheTransaction);

	66 };

	67

	68 CacheTransaction::CacheTransaction(InMemoryURLCacheDatabase* db)

	69 : db_(db) {

	70 if (db_)

	71 db_->BeginTransaction();

	72 }

	73

	74 CacheTransaction::~CacheTransaction() {

	75 if (db_)

	76 db_->CommitTransaction();

	77 }

	78

	79 // URLIndexPrivateData Public (but only to InMemoryURLIndex) Functions ---------

	80

	81 URLIndexPrivateData::URLIndexPrivateData(const FilePath& history_dir,

	82 const std::string& languages)

	83 : history_dir_(history_dir),

	84 languages_(languages),

	85 cache_enabled_(true),

	86 shutdown_(false),

	87 lock_(),

	88 pre_filter_item_count_(0),

	89 post_filter_item_count_(0),

	90 post_scoring_item_count_(0) {

	91 InitializeSchemeWhitelist(&scheme_whitelist_);

	92 }

	93

	94 bool URLIndexPrivateData::Init(

	95 base::SequencedWorkerPool::SequenceToken sequence_token) {

	96 // Since init occurs on the DB thread, it is possible that the profile was

	97 // told to shutdown before this task had a chance to kick off.

	98 base::AutoLock lock(lock_);

	99 if (shutdown_)

	100 return false;

	101 cache_db_ = new InMemoryURLCacheDatabase();

	102 FilePath dir_path;

	103 set_cache_enabled(GetCacheDBPath(&dir_path) &&

	104 cache_db_->Init(dir_path, sequence_token));

	105 return cache_enabled();

	106 }

	107

	108 void URLIndexPrivateData::Reset() {

	109 Clear();

	110 if (cache_enabled())

	111 cache_db_->Reset();

	112 }

	113

	114 bool URLIndexPrivateData::Empty() const {

	115 return history_info_map_.empty();

	116 }

	117

	118 URLIndexPrivateData* URLIndexPrivateData::Snapshot() const {

	119 return new URLIndexPrivateData(*this);

	120 }

	121

	122 void URLIndexPrivateData::Shutdown() {

	123 base::AutoLock lock(lock_);

	124 shutdown_ = true;

	125 if (cache_enabled())

	126 cache_db_->Shutdown();

	127 }

	128

	129 // Helper predicates for ValidateConsistency.

	130 template <typename Pair>

	131 struct SelectFirst : std::unary_function<Pair, typename Pair::first_type> {

	132 const typename Pair::first_type& operator()(const Pair& x) const {

	133 return x.first;

	134 }

	135 };

	136

	137 template <typename Pair>

	138 struct SelectSecond : std::unary_function<Pair, typename Pair::second_type> {

	139 const typename Pair::second_type& operator()(const Pair& x) const {

	140 return x.second;

	141 }

	142 };

	143

	144 template <typename SourcePair, typename Target>

	145 struct TargetInserter : std::unary_function<SourcePair, void> {

	146 explicit TargetInserter(Target* target) : target_(target) {}

	147

	148 void operator()(const SourcePair& source) {

	149 target_->insert(source.second.begin(), source.second.end());

	150 }

	151

	152 Target* target_;

	153 };

	154

	155 bool URLIndexPrivateData::ValidateConsistency() const {

	156 // Scope things so that a large data set doesn't unnecessarily accumulate and

	157 // hang around.

	158 {

	159 // Make a set of WordIDs from word_map_.

	160 WordIDSet word_id_set_a;

	161 std::transform(word_map_.begin(), word_map_.end(),

	162 std::inserter(word_id_set_a, word_id_set_a.begin()),

	163 SelectSecond<WordMap::value_type>());

	164

	165 // Compare word_map_'s word set to the words from word_id_history_map_.

	166 {

	167 WordIDSet word_id_set_b;

	168 std::transform(word_id_history_map_.begin(), word_id_history_map_.end(),

	169 std::inserter(word_id_set_b, word_id_set_b.begin()),

	170 SelectFirst<WordIDHistoryMap::value_type>());

	171 WordIDSet leftovers;

	172 std::set_symmetric_difference(

	173 word_id_set_a.begin(), word_id_set_a.end(),

	174 word_id_set_b.begin(), word_id_set_b.end(),

	175 std::inserter(leftovers, leftovers.begin()));

	176 if (!leftovers.empty())

	177 return false;

	178 }

	179

	180 // Compare word_map_'s word set to the words from history_id_word_map_.

	181 {

	182 WordIDSet word_id_set_b;

	183 std::for_each(history_id_word_map_.begin(), history_id_word_map_.end(),

	184 TargetInserter<HistoryIDWordMap::value_type,

	185 WordIDSet>(&word_id_set_b));

	186 WordIDSet leftovers;

	187 std::set_symmetric_difference(

	188 word_id_set_a.begin(), word_id_set_a.end(),

	189 word_id_set_b.begin(), word_id_set_b.end(),

	190 std::inserter(leftovers, leftovers.begin()));

	191 if (!leftovers.empty())

	192 return false;

	193 }

	194

	195 // Compare word_map_'s word set to the words from char_word_map_.

	196 {

	197 WordIDSet word_id_set_b;

	198 std::for_each(char_word_map_.begin(), char_word_map_.end(),

	199 TargetInserter<CharWordIDMap::value_type,

	200 WordIDSet>(&word_id_set_b));

	201 WordIDSet leftovers;

	202 std::set_symmetric_difference(

	203 word_id_set_a.begin(), word_id_set_a.end(),

	204 word_id_set_b.begin(), word_id_set_b.end(),

	205 std::inserter(leftovers, leftovers.begin()));

	206 if (!leftovers.empty())

	207 return false;

	208 }

	209

	210 // Intersect available_words_ with set of WordIDs (created above from

	211 // word_map_) and test for no common WordIDs.

	212 {

	213 WordIDSet leftovers;

	214 std::set_intersection(word_id_set_a.begin(), word_id_set_a.end(),

	215 available_words_.begin(), available_words_.end(),

	216 std::inserter(leftovers, leftovers.begin()));

	217 if (!leftovers.empty())

	218 return false;

	219 }

	220 }

	221

	222 {

	223 // Make a set of HistoryIDs from history_info_map_.

	224 HistoryIDSet history_id_set_a;

	225 std::transform(history_info_map_.begin(), history_info_map_.end(),

	226 std::inserter(history_id_set_a, history_id_set_a.begin()),

	227 SelectFirst<HistoryInfoMap::value_type>());

	228

	229 // Compare history_info_map_'s set to word_id_history_map_'s HistoryIDs.

	230 {

	231 HistoryIDSet history_id_set_b;

	232 std::transform(history_info_map_.begin(), history_info_map_.end(),

	233 std::inserter(history_id_set_b, history_id_set_b.begin()),

	234 SelectFirst<HistoryInfoMap::value_type>());

	235 HistoryIDSet leftovers;

	236 std::set_symmetric_difference(

	237 history_id_set_a.begin(), history_id_set_a.end(),

	238 history_id_set_b.begin(), history_id_set_b.end(),

	239 std::inserter(leftovers, leftovers.begin()));

	240 if (!leftovers.empty())

	241 return false;

	242 }

	243

	244 // Compare history_info_map_'s set to word_id_history_map_'s HistoryIDs.

	245 {

	246 HistoryIDSet history_id_set_b;

	247 std::for_each(word_id_history_map_.begin(), word_id_history_map_.end(),

	248 TargetInserter<WordIDHistoryMap::value_type,

	249 HistoryIDSet>(&history_id_set_b));

	250 HistoryIDSet leftovers;

	251 std::set_symmetric_difference(

	252 history_id_set_a.begin(), history_id_set_a.end(),

	253 history_id_set_b.begin(), history_id_set_b.end(),

	254 std::inserter(leftovers, leftovers.begin()));

	255 if (!leftovers.empty())

	256 return false;

	257 }

	258

	259 // Compare history_info_map_'s set to word_starts_map_'s HistoryIDs.

	260 {

	261 HistoryIDSet history_id_set_b;

	262 std::transform(word_starts_map_.begin(), word_starts_map_.end(),

	263 std::inserter(history_id_set_b, history_id_set_b.begin()),

	264 SelectFirst<WordStartsMap::value_type>());

	265 HistoryIDSet leftovers;

	266 std::set_symmetric_difference(

	267 history_id_set_a.begin(), history_id_set_a.end(),

	268 history_id_set_b.begin(), history_id_set_b.end(),

	269 std::inserter(leftovers, leftovers.begin()));

	270 if (!leftovers.empty())

	271 return false;

	272 }

	273 }

	274

	275 // Make sets of words from word_list_ and word_map_ and test for equality.

	276 {

	277 String16Set word_list_set;

	278 std::copy(word_list_.begin(), word_list_.end(),

	279 std::inserter(word_list_set, word_list_set.end()));

	280 String16Set word_map_set;

	281 std::transform(word_map_.begin(), word_map_.end(),

	282 std::inserter(word_map_set, word_map_set.begin()),

	283 SelectFirst<WordMap::value_type>());

	284 if (word_list_set != word_map_set)

	285 return false;

	286 }

	287

	288 return true;

	289 }

	290

	291 ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms(

	292 const string16& search_string) {

	293 pre_filter_item_count_ = 0;

	294 post_filter_item_count_ = 0;

	295 post_scoring_item_count_ = 0;

	296 // The search string we receive may contain escaped characters. For reducing

	297 // the index we need individual, lower-cased words, ignoring escapings. For

	298 // the final filtering we need whitespace separated substrings possibly

	299 // containing escaped characters.

	300 string16 lower_raw_string(base::i18n::ToLower(search_string));

	301 string16 lower_unescaped_string =

	302 net::UnescapeURLComponent(lower_raw_string,

	303 net::UnescapeRule::SPACES \| net::UnescapeRule::URL_SPECIAL_CHARS);

	304 // Extract individual 'words' (as opposed to 'terms'; see below) from the

	305 // search string. When the user types "colspec=ID%20Mstone Release" we get

	306 // four 'words': "colspec", "id", "mstone" and "release".

	307 String16Vector lower_words(

	308 history::String16VectorFromString16(lower_unescaped_string, false, NULL));

	309 ScoredHistoryMatches scored_items;

	310

	311 // Do nothing if we have indexed no words (probably because we've not been

	312 // initialized yet) or the search string has no words.

	313 if (word_list_.empty() \|\| lower_words.empty()) {

	314 search_term_cache_.clear(); // Invalidate the term cache.

	315 return scored_items;

	316 }

	317

	318 // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep

	319 // approach.

	320 ResetSearchTermCache();

	321

	322 HistoryIDSet history_id_set = HistoryIDSetFromWords(lower_words);

	323

	324 // Trim the candidate pool if it is large. Note that we do not filter out

	325 // items that do not contain the search terms as proper substrings -- doing

	326 // so is the performance-costly operation we are trying to avoid in order

	327 // to maintain omnibox responsiveness.

	328 const size_t kItemsToScoreLimit = 500;

	329 pre_filter_item_count_ = history_id_set.size();

	330 // If we trim the results set we do not want to cache the results for next

	331 // time as the user's ultimately desired result could easily be eliminated

	332 // in this early rough filter.

	333 bool was_trimmed = (pre_filter_item_count_ > kItemsToScoreLimit);

	334 if (was_trimmed) {

	335 HistoryIDVector history_ids;

	336 std::copy(history_id_set.begin(), history_id_set.end(),

	337 std::back_inserter(history_ids));

	338 // Trim down the set by sorting by typed-count, visit-count, and last

	339 // visit.

	340 HistoryItemFactorGreater

	341 item_factor_functor(history_info_map_);

	342 std::partial_sort(history_ids.begin(),

	343 history_ids.begin() + kItemsToScoreLimit,

	344 history_ids.end(),

	345 item_factor_functor);

	346 history_id_set.clear();

	347 std::copy(history_ids.begin(), history_ids.begin() + kItemsToScoreLimit,

	348 std::inserter(history_id_set, history_id_set.end()));

	349 post_filter_item_count_ = history_id_set.size();

	350 }

	351

	352 // Pass over all of the candidates filtering out any without a proper

	353 // substring match, inserting those which pass in order by score. Note that

	354 // in this step we are using the raw search string complete with escaped

	355 // URL elements. When the user has specifically typed something akin to

	356 // "sort=pri&colspec=ID%20Mstone%20Release" we want to make sure that that

	357 // specific substring appears in the URL or page title.

	358

	359 // We call these 'terms' (as opposed to 'words'; see above) as in this case

	360 // we only want to break up the search string on 'true' whitespace rather than

	361 // escaped whitespace. When the user types "colspec=ID%20Mstone Release" we

	362 // get two 'terms': "colspec=id%20mstone" and "release".

	363 history::String16Vector lower_raw_terms;

	364 Tokenize(lower_raw_string, kWhitespaceUTF16, &lower_raw_terms);

	365 scored_items = std::for_each(history_id_set.begin(), history_id_set.end(),

	366 AddHistoryMatch(*this, lower_raw_string,

	367 lower_raw_terms, base::Time::Now())).ScoredMatches();

	368

	369 // Select and sort only the top kMaxMatches results.

	370 if (scored_items.size() > AutocompleteProvider::kMaxMatches) {

	371 std::partial_sort(scored_items.begin(),

	372 scored_items.begin() +

	373 AutocompleteProvider::kMaxMatches,

	374 scored_items.end(),

	375 ScoredHistoryMatch::MatchScoreGreater);

	376 scored_items.resize(AutocompleteProvider::kMaxMatches);

	377 } else {

	378 std::sort(scored_items.begin(), scored_items.end(),

	379 ScoredHistoryMatch::MatchScoreGreater);

	380 }

	381 post_scoring_item_count_ = scored_items.size();

	382

	383 if (was_trimmed) {

	384 search_term_cache_.clear(); // Invalidate the term cache.

	385 } else {

	386 // Remove any stale SearchTermCacheItems.

	387 for (SearchTermCacheMap::iterator cache_iter = search_term_cache_.begin();

	388 cache_iter != search_term_cache_.end(); ) {

	389 if (!cache_iter->second.used_)

	390 search_term_cache_.erase(cache_iter++);

	391 else

	392 ++cache_iter;

	393 }

	394 }

	395

	396 return scored_items;

	397 }

	398

	399 bool URLIndexPrivateData::UpdateURL(const URLRow& row) {

	400 // The row may or may not already be in our index. If it is not already

	401 // indexed and it qualifies then it gets indexed. If it is already

	402 // indexed and still qualifies then it gets updated, otherwise it

	403 // is deleted from the index.

	404 bool row_was_updated = false;

	405 URLID row_id = row.id();

	406 HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id);

	407 if (row_pos == history_info_map_.end()) {

	408 // This new row should be indexed if it qualifies.

	409 URLRow new_row(row);

	410 new_row.set_id(row_id);

	411 row_was_updated = RowQualifiesAsSignificant(new_row, base::Time()) &&

	412 IndexRow(new_row);

	413 } else if (RowQualifiesAsSignificant(row, base::Time())) {

	414 // This indexed row still qualifies and will be re-indexed.

	415 // The url won't have changed but the title, visit count, etc.

	416 // might have changed.

	417 URLRow& row_to_update = row_pos->second;

	418 bool title_updated = row_to_update.title() != row.title();

	419 if (row_to_update.visit_count() != row.visit_count() \|\|

	420 row_to_update.typed_count() != row.typed_count() \|\|

	421 row_to_update.last_visit() != row.last_visit() \|\| title_updated) {

	422 CacheTransaction cache_transaction(cache_enabled() ?

	423 cache_db_.get() : NULL);

	424 row_to_update.set_visit_count(row.visit_count());

	425 row_to_update.set_typed_count(row.typed_count());

	426 row_to_update.set_last_visit(row.last_visit());

	427 row_to_update.set_hidden(row.hidden());

	428 // While the URL is guaranteed to remain stable, the title may have

	429 // changed. If so, then update the index with the changed words.

	430 if (title_updated) {

	431 // Clear all words associated with this row and re-index both the

	432 // URL and title.

	433 RemoveRowWordsFromIndex(row_to_update);

	434 row_to_update.set_title(row.title());

	435 RowWordStarts word_starts;

	436 AddRowWordsToIndex(row_to_update, &word_starts);

	437 word_starts_map_[row_id] = word_starts;

	438 // Replace all word_starts for the row.

	439 if (cache_enabled()) {

	440 cache_db_->RemoveWordStarts(row_id);

	441 cache_db_->AddRowWordStarts(row_id, word_starts);

	442 }

	443 }

	444 if (cache_enabled()) {

	445 cache_db_->RemoveHistoryIDFromURLs(row_id);

	446 cache_db_->AddHistoryToURLs(row_id, row);

	447 }

	448 row_was_updated = true;

	449 }

	450 } else {

	451 // This indexed row no longer qualifies and will be de-indexed by

	452 // clearing all words associated with this row.

	453 RemoveRowFromIndex(row);

	454 row_was_updated = true;

	455 }

	456 if (row_was_updated)

	457 search_term_cache_.clear(); // This invalidates the cache.

	458 return row_was_updated;

	459 }

	460

	461 // Helper functor for DeleteURL.

	462 class HistoryInfoMapItemHasURL {

	463 public:

	464 explicit HistoryInfoMapItemHasURL(const GURL& url): url_(url) {}

	465

	466 bool operator()(const std::pair<const HistoryID, URLRow>& item) {

	467 return item.second.url() == url_;

	468 }

	469

	470 private:

	471 const GURL& url_;

	472 };

	473

	474 bool URLIndexPrivateData::DeleteURL(const GURL& url) {

	475 DCHECK(!BrowserThread::IsWellKnownThread(BrowserThread::UI) \|\|

	476 BrowserThread::CurrentlyOn(BrowserThread::UI));

	477 // Find the matching entry in the history_info_map_.

	478 HistoryInfoMap::iterator pos = std::find_if(

	479 history_info_map_.begin(),

	480 history_info_map_.end(),

	481 HistoryInfoMapItemHasURL(url));

	482 if (pos == history_info_map_.end())

	483 return false;

	484 RemoveRowFromIndex(pos->second);

	485 search_term_cache_.clear(); // This invalidates the cache.

	486 return true;

	487 }

	488

	489 bool URLIndexPrivateData::RestoreFromCacheTask() {

	490 DCHECK(!BrowserThread::IsWellKnownThread(BrowserThread::UI) \|\|

	491 !BrowserThread::CurrentlyOn(BrowserThread::UI));

	492 DCHECK(cache_db_);

	493 if (IsShutdown())

	494 return false;

	495 Clear();

	496 if (RestoreFromCache(cache_db_))

	497 return true;

	498 // If there was no SQLite-based cache then there might be an old,

	499 // deprecated, protobuf-based cache file laying around. Get rid of it.

	500 DeleteOldVersionCacheFile();

	501 return false;

	502 }

	503

	504 // static

	505 scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::RebuildFromHistory(

	506 HistoryDatabase* history_db,

	507 scoped_refptr<URLIndexPrivateData> old_data) {

	508 DCHECK(!BrowserThread::IsWellKnownThread(BrowserThread::UI) \|\|

	509 !BrowserThread::CurrentlyOn(BrowserThread::UI));

	510 if (!history_db)

	511 return NULL;

	512

	513 scoped_refptr<URLIndexPrivateData> rebuilt_data(

	514 new URLIndexPrivateData(*(old_data.get())));

	515

	516 // NOTE: We disable the cache database until after the replacement private

	517 // data has been created and then we re-enable the database. Once the private

	518 // data has been swapped in the database is refreshed with the new data.

	519 URLDatabase::URLEnumerator history_enum;

	520 if (!history_db->InitURLEnumeratorForSignificant(&history_enum))

	521 return NULL;

	522

	523 for (URLRow row; !old_data->IsShutdown() && history_enum.GetNextURL(&row); )

	524 rebuilt_data->IndexRow(row);

	525

	526 if (old_data->IsShutdown()) {

	527 rebuilt_data.release();

	528 }

	529

	530 return rebuilt_data;

	531 }

	532

	533 void URLIndexPrivateData::RefreshCacheTask() {

	534 DCHECK(!BrowserThread::IsWellKnownThread(BrowserThread::UI) \|\|

	535 !BrowserThread::CurrentlyOn(BrowserThread::UI));

	536 base::AutoLock lock(lock_);

	537 if (shutdown_ \|\| !cache_enabled())

	538 return;

	539 // Should a refresh fail for any reason we consider the database to be

	540 // corrupt and unavailable; no further remedial action is possible.

	541 set_cache_enabled(cache_db_->Reset() && cache_db_->Refresh(*this));

	542 }

	543

	544 // static

	545 void URLIndexPrivateData::InitializeSchemeWhitelistForTesting(

	546 std::set<std::string>* whitelist) {

	547 InitializeSchemeWhitelist(whitelist);

	548 }

55	549

56 // SearchTermCacheItem ---------------------------------------------------------	550 // SearchTermCacheItem ---------------------------------------------------------

57	551

58 URLIndexPrivateData::SearchTermCacheItem::SearchTermCacheItem(	552 URLIndexPrivateData::SearchTermCacheItem::SearchTermCacheItem(

59 const WordIDSet& word_id_set,	553 const WordIDSet& word_id_set,

60 const HistoryIDSet& history_id_set)	554 const HistoryIDSet& history_id_set)

61 : word_id_set_(word_id_set),	555 : word_id_set_(word_id_set),

62 history_id_set_(history_id_set),	556 history_id_set_(history_id_set),

63 used_(true) {}	557 used_(true) {}

64	558

65 URLIndexPrivateData::SearchTermCacheItem::SearchTermCacheItem()	559 URLIndexPrivateData::SearchTermCacheItem::SearchTermCacheItem()

66 : used_(true) {}	560 : used_(true) {}

67	561

68 URLIndexPrivateData::SearchTermCacheItem::~SearchTermCacheItem() {}	562 URLIndexPrivateData::SearchTermCacheItem::~SearchTermCacheItem() {}

69	563

70 // Algorithm Functions ---------------------------------------------------------	564 // URLIndexPrivateData::AddHistoryMatch ----------------------------------------

71	565

72 // Comparison function for sorting search terms by descending length.	566 URLIndexPrivateData::AddHistoryMatch::AddHistoryMatch(

73 bool LengthGreater(const string16& string_a, const string16& string_b) {	567 const URLIndexPrivateData& private_data,

74 return string_a.length() > string_b.length();	568 const string16& lower_string,

	569 const String16Vector& lower_terms,

	570 const base::Time now)

	571 : private_data_(private_data),

	572 lower_string_(lower_string),

	573 lower_terms_(lower_terms),

	574 now_(now) {}

	575

	576 URLIndexPrivateData::AddHistoryMatch::~AddHistoryMatch() {}

	577

	578 void URLIndexPrivateData::AddHistoryMatch::operator()(

	579 const HistoryID history_id) {

	580 HistoryInfoMap::const_iterator hist_pos =

	581 private_data_.history_info_map_.find(history_id);

	582 if (hist_pos != private_data_.history_info_map_.end()) {

	583 const URLRow& hist_item = hist_pos->second;

	584 WordStartsMap::const_iterator starts_pos =

	585 private_data_.word_starts_map_.find(history_id);

	586 DCHECK(starts_pos != private_data_.word_starts_map_.end());

	587 ScoredHistoryMatch match(hist_item, lower_string_, lower_terms_,

	588 starts_pos->second, now_);

	589 if (match.raw_score > 0)

	590 scored_matches_.push_back(match);

	591 }

75 }	592 }

76	593

77 // InMemoryURLIndex's Private Data ---------------------------------------------	594 // URLIndexPrivateData::HistoryItemFactorGreater -------------------------------

78	595

79 URLIndexPrivateData::URLIndexPrivateData()	596 URLIndexPrivateData::HistoryItemFactorGreater::HistoryItemFactorGreater(

80 : restored_cache_version_(0),	597 const HistoryInfoMap& history_info_map)

81 saved_cache_version_(kCurrentCacheFileVersion),	598 : history_info_map_(history_info_map) {

	599 }

	600

	601 URLIndexPrivateData::HistoryItemFactorGreater::~HistoryItemFactorGreater() {}

	602

	603 bool URLIndexPrivateData::HistoryItemFactorGreater::operator()(

	604 const HistoryID h1,

	605 const HistoryID h2) {

	606 HistoryInfoMap::const_iterator entry1(history_info_map_.find(h1));

	607 if (entry1 == history_info_map_.end())

	608 return false;

	609 HistoryInfoMap::const_iterator entry2(history_info_map_.find(h2));

	610 if (entry2 == history_info_map_.end())

	611 return true;

	612 const URLRow& r1(entry1->second);

	613 const URLRow& r2(entry2->second);

	614 // First cut: typed count, visit count, recency.

	615 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks

	616 // recently visited (within the last 12/24 hours) as highly important. Get

	617 // input from mpearson.

	618 if (r1.typed_count() != r2.typed_count())

	619 return (r1.typed_count() > r2.typed_count());

	620 if (r1.visit_count() != r2.visit_count())

	621 return (r1.visit_count() > r2.visit_count());

	622 return (r1.last_visit() > r2.last_visit());

	623 }

	624

	625 // URLIndexPrivateData Private Functions ---------------------------------------

	626

	627 URLIndexPrivateData::URLIndexPrivateData(const URLIndexPrivateData& old_data)

	628 : history_dir_(old_data.history_dir_),

	629 languages_(old_data.languages_),

	630 cache_enabled_(old_data.cache_enabled_),

	631 shutdown_(old_data.IsShutdown()),

	632 lock_(),

	633 scheme_whitelist_(old_data.scheme_whitelist_),

82 pre_filter_item_count_(0),	634 pre_filter_item_count_(0),

83 post_filter_item_count_(0),	635 post_filter_item_count_(0),

84 post_scoring_item_count_(0) {	636 post_scoring_item_count_(0) {

	637 cache_db_ = old_data.cache_db_;

	638 }

	639

	640 // Called only by unit tests.

	641 URLIndexPrivateData::URLIndexPrivateData()

	642 : cache_enabled_(true),

	643 shutdown_(false),

	644 lock_(),

	645 pre_filter_item_count_(0),

	646 post_filter_item_count_(0),

	647 post_scoring_item_count_(0) {

	648 InitializeSchemeWhitelist(&scheme_whitelist_);

85 }	649 }

86	650

87 URLIndexPrivateData::~URLIndexPrivateData() {}	651 URLIndexPrivateData::~URLIndexPrivateData() {}

88	652

	653 bool URLIndexPrivateData::IsShutdown() const {

	654 base::AutoLock lock(lock_);

	655 return shutdown_;
	sky 2012/08/22 19:42:34 nit: spacing. nit: spacing.
	656 }

	657

89 void URLIndexPrivateData::Clear() {	658 void URLIndexPrivateData::Clear() {

90 word_list_.clear();	659 word_list_.clear();

91 available_words_.clear();	660 available_words_.clear();

92 word_map_.clear();	661 word_map_.clear();

93 char_word_map_.clear();	662 char_word_map_.clear();

94 word_id_history_map_.clear();	663 word_id_history_map_.clear();

95 history_id_word_map_.clear();	664 history_id_word_map_.clear();

96 history_info_map_.clear();	665 history_info_map_.clear();

97 word_starts_map_.clear();	666 word_starts_map_.clear();

98 }	667 }

99	668

100 bool URLIndexPrivateData::Empty() const {

101 return history_info_map_.empty();

102 }

103

104 scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::Duplicate() const {

105 scoped_refptr<URLIndexPrivateData> data_copy = new URLIndexPrivateData;

106 data_copy->word_list_ = word_list_;

107 data_copy->available_words_ = available_words_;

108 data_copy->word_map_ = word_map_;

109 data_copy->char_word_map_ = char_word_map_;

110 data_copy->word_id_history_map_ = word_id_history_map_;

111 data_copy->history_id_word_map_ = history_id_word_map_;

112 data_copy->history_info_map_ = history_info_map_;

113 data_copy->word_starts_map_ = word_starts_map_;

114 return data_copy;

115 // Not copied:

116 // search_term_cache_

117 // pre_filter_item_count_

118 // post_filter_item_count_

119 // post_scoring_item_count_

120 };

121

122 // Cache Updating --------------------------------------------------------------	669 // Cache Updating --------------------------------------------------------------

123	670

124 bool URLIndexPrivateData::IndexRow(	671 bool URLIndexPrivateData::IndexRow(const URLRow& row) {

125 const URLRow& row,

126 const std::string& languages,

127 const std::set<std::string>& scheme_whitelist) {

128 const GURL& gurl(row.url());	672 const GURL& gurl(row.url());

129	673

130 // Index only URLs with a whitelisted scheme.	674 // Index only URLs with a whitelisted scheme.

131 if (!URLSchemeIsWhitelisted(gurl, scheme_whitelist))	675 if (!URLSchemeIsWhitelisted(gurl, scheme_whitelist_))

132 return false;	676 return false;

133	677

134 URLID row_id = row.id();

135 // Strip out username and password before saving and indexing.	678 // Strip out username and password before saving and indexing.

136 string16 url(net::FormatUrl(gurl, languages,	679 string16 url(net::FormatUrl(gurl, languages_,

137 net::kFormatUrlOmitUsernamePassword,	680 net::kFormatUrlOmitUsernamePassword,

138 net::UnescapeRule::SPACES \| net::UnescapeRule::URL_SPECIAL_CHARS,	681 net::UnescapeRule::SPACES \| net::UnescapeRule::URL_SPECIAL_CHARS,

139 NULL, NULL, NULL));	682 NULL, NULL, NULL));

140	683

	684 URLID row_id = row.id();

141 HistoryID history_id = static_cast<HistoryID>(row_id);	685 HistoryID history_id = static_cast<HistoryID>(row_id);

	686 DCHECK(history_info_map_.find(history_id) == history_info_map_.end());

142 DCHECK_LT(history_id, std::numeric_limits<HistoryID>::max());	687 DCHECK_LT(history_id, std::numeric_limits<HistoryID>::max());

143	688

144 // Add the row for quick lookup in the history info store.	689 // Add the row for quick lookup in the history info store.

145 URLRow new_row(GURL(url), row_id);	690 URLRow new_row(GURL(url), row_id);

146 new_row.set_visit_count(row.visit_count());	691 new_row.set_visit_count(row.visit_count());

147 new_row.set_typed_count(row.typed_count());	692 new_row.set_typed_count(row.typed_count());

148 new_row.set_last_visit(row.last_visit());	693 new_row.set_last_visit(row.last_visit());

149 new_row.set_title(row.title());	694 new_row.set_title(row.title());

	695

	696 CacheTransaction cache_transaction(cache_enabled() ? cache_db_.get() : NULL);

150 history_info_map_[history_id] = new_row;	697 history_info_map_[history_id] = new_row;

	698 if (cache_enabled())

	699 cache_db_->AddHistoryToURLs(history_id, row);

151	700

152 // Index the words contained in the URL and title of the row.	701 // Index the words contained in the URL and title of the row.

153 RowWordStarts word_starts;	702 RowWordStarts word_starts;

154 AddRowWordsToIndex(new_row, &word_starts, languages);	703 AddRowWordsToIndex(new_row, &word_starts);

155 word_starts_map_[history_id] = word_starts;	704 word_starts_map_[history_id] = word_starts;

	705 if (cache_enabled())

	706 cache_db_->AddRowWordStarts(history_id, word_starts);

156 return true;	707 return true;

157 }	708 }

158	709

159 void URLIndexPrivateData::AddRowWordsToIndex(const URLRow& row,	710 void URLIndexPrivateData::AddRowWordsToIndex(const URLRow& row,

160 RowWordStarts* word_starts,	711 RowWordStarts* word_starts) {

161 const std::string& languages) {

162 HistoryID history_id = static_cast<HistoryID>(row.id());	712 HistoryID history_id = static_cast<HistoryID>(row.id());

163 // Split URL into individual, unique words then add in the title words.	713 // Split URL into individual, unique words then add in the title words.

164 const GURL& gurl(row.url());	714 const GURL& gurl(row.url());

165 string16 url(net::FormatUrl(gurl, languages,	715 string16 url(net::FormatUrl(gurl, languages_,

166 net::kFormatUrlOmitUsernamePassword,	716 net::kFormatUrlOmitUsernamePassword,

167 net::UnescapeRule::SPACES \| net::UnescapeRule::URL_SPECIAL_CHARS,	717 net::UnescapeRule::SPACES \| net::UnescapeRule::URL_SPECIAL_CHARS,

168 NULL, NULL, NULL));	718 NULL, NULL, NULL));

169 url = base::i18n::ToLower(url);	719 url = base::i18n::ToLower(url);

170 String16Set url_words = String16SetFromString16(url,	720 String16Set url_words = String16SetFromString16(url,

171 word_starts ? &word_starts->url_word_starts_ : NULL);	721 word_starts ? &word_starts->url_word_starts_ : NULL);

172 String16Set title_words = String16SetFromString16(row.title(),	722 String16Set title_words = String16SetFromString16(row.title(),

173 word_starts ? &word_starts->title_word_starts_ : NULL);	723 word_starts ? &word_starts->title_word_starts_ : NULL);

174 String16Set words;	724 String16Set words;

175 std::set_union(url_words.begin(), url_words.end(),	725 std::set_union(url_words.begin(), url_words.end(),

(...skipping 12 matching lines...) Expand all Loading...
188 if (word_pos != word_map_.end())	738 if (word_pos != word_map_.end())

189 UpdateWordHistory(word_pos->second, history_id);	739 UpdateWordHistory(word_pos->second, history_id);

190 else	740 else

191 AddWordHistory(term, history_id);	741 AddWordHistory(term, history_id);

192 }	742 }

193	743

194 void URLIndexPrivateData::UpdateWordHistory(WordID word_id,	744 void URLIndexPrivateData::UpdateWordHistory(WordID word_id,

195 HistoryID history_id) {	745 HistoryID history_id) {

196 WordIDHistoryMap::iterator history_pos = word_id_history_map_.find(word_id);	746 WordIDHistoryMap::iterator history_pos = word_id_history_map_.find(word_id);

197 DCHECK(history_pos != word_id_history_map_.end());	747 DCHECK(history_pos != word_id_history_map_.end());

	748 // There is no need to record changes to the word_id_history_map_ in the cache

	749 // because it can be rebuilt from the history_id_word_map_'s table.

198 HistoryIDSet& history_id_set(history_pos->second);	750 HistoryIDSet& history_id_set(history_pos->second);

199 history_id_set.insert(history_id);	751 history_id_set.insert(history_id);

200 AddToHistoryIDWordMap(history_id, word_id);	752 AddToHistoryIDWordMap(history_id, word_id);

	753

	754 // Add word_id/history_id entry to the word_history table

	755 if (cache_enabled())

	756 cache_db_->AddHistoryToWordHistory(word_id, history_id);

201 }	757 }

202	758

203 void URLIndexPrivateData::AddWordHistory(const string16& term,	759 void URLIndexPrivateData::AddWordHistory(const string16& term,

204 HistoryID history_id) {	760 HistoryID history_id) {

205 WordID word_id = word_list_.size();	761 WordID word_id = word_list_.size();

206 if (available_words_.empty()) {	762 if (available_words_.empty()) {

207 word_list_.push_back(term);	763 word_list_.push_back(term);

208 } else {	764 } else {

209 word_id = *(available_words_.begin());	765 word_id = *(available_words_.begin());

210 word_list_[word_id] = term;	766 word_list_[word_id] = term;

211 available_words_.erase(word_id);	767 available_words_.erase(word_id);

212 }	768 }

213 word_map_[term] = word_id;	769 word_map_[term] = word_id;

214	770

	771 // Add word_id/term entry to the words table;

	772 if (cache_enabled())

	773 cache_db_->AddWordToWords(word_id, term);

	774

215 HistoryIDSet history_id_set;	775 HistoryIDSet history_id_set;

216 history_id_set.insert(history_id);	776 history_id_set.insert(history_id);

217 word_id_history_map_[word_id] = history_id_set;	777 word_id_history_map_[word_id] = history_id_set;

218 AddToHistoryIDWordMap(history_id, word_id);	778 AddToHistoryIDWordMap(history_id, word_id);

219	779

	780 // Add word_id/history_id entry to the word_history table

	781 if (cache_enabled())

	782 cache_db_->AddHistoryToWordHistory(word_id, history_id);

	783

220 // For each character in the newly added word (i.e. a word that is not	784 // For each character in the newly added word (i.e. a word that is not

221 // already in the word index), add the word to the character index.	785 // already in the word index), add the word to the character index.

222 Char16Set characters = Char16SetFromString16(term);	786 Char16Set characters = Char16SetFromString16(term);

223 for (Char16Set::iterator uni_char_iter = characters.begin();	787 for (Char16Set::iterator uni_char_iter = characters.begin();

224 uni_char_iter != characters.end(); ++uni_char_iter) {	788 uni_char_iter != characters.end(); ++uni_char_iter) {

225 char16 uni_char = *uni_char_iter;	789 char16 uni_char = *uni_char_iter;

226 CharWordIDMap::iterator char_iter = char_word_map_.find(uni_char);	790 CharWordIDMap::iterator char_iter = char_word_map_.find(uni_char);

227 if (char_iter != char_word_map_.end()) {	791 if (char_iter != char_word_map_.end()) {

228 // Update existing entry in the char/word index.	792 // Update existing entry in the char/word index.

229 WordIDSet& word_id_set(char_iter->second);	793 WordIDSet& word_id_set(char_iter->second);

230 word_id_set.insert(word_id);	794 word_id_set.insert(word_id);

231 } else {	795 } else {

232 // Create a new entry in the char/word index.	796 // Create a new entry in the char/word index.

233 WordIDSet word_id_set;	797 WordIDSet word_id_set;

234 word_id_set.insert(word_id);	798 word_id_set.insert(word_id);

235 char_word_map_[uni_char] = word_id_set;	799 char_word_map_[uni_char] = word_id_set;

236 }	800 }

	801 // Add uni_char/word_id entry to the char_words database table.

	802 if (cache_enabled())

	803 cache_db_->AddWordToCharWords(uni_char, word_id);

237 }	804 }

238 }	805 }

239	806

240 void URLIndexPrivateData::RemoveRowFromIndex(const URLRow& row) {	807 void URLIndexPrivateData::RemoveRowFromIndex(const URLRow& row) {

	808 CacheTransaction cache_transaction(cache_enabled() ? cache_db_.get() : NULL);

241 RemoveRowWordsFromIndex(row);	809 RemoveRowWordsFromIndex(row);

242 HistoryID history_id = static_cast<HistoryID>(row.id());	810 HistoryID history_id = static_cast<HistoryID>(row.id());

243 history_info_map_.erase(history_id);	811 history_info_map_.erase(history_id);

244 word_starts_map_.erase(history_id);	812 word_starts_map_.erase(history_id);

	813 if (cache_enabled())

	814 cache_db_->RemoveHistoryIDFromURLs(history_id);

245 }	815 }

246	816

247 void URLIndexPrivateData::RemoveRowWordsFromIndex(const URLRow& row) {	817 void URLIndexPrivateData::RemoveRowWordsFromIndex(const URLRow& row) {

248 // Remove the entries in history_id_word_map_ and word_id_history_map_ for	818 // Remove the entries in history_id_word_map_ and word_id_history_map_ for

249 // this row.	819 // this row.

250 HistoryID history_id = static_cast<HistoryID>(row.id());	820 HistoryID history_id = static_cast<HistoryID>(row.id());

251 WordIDSet word_id_set = history_id_word_map_[history_id];	821 WordIDSet word_id_set = history_id_word_map_[history_id];

252 history_id_word_map_.erase(history_id);	822 history_id_word_map_.erase(history_id);

253	823

254 // Reconcile any changes to word usage.	824 // Reconcile any changes to word usage.

(...skipping 13 matching lines...) Expand all Loading...
268 char_word_map_[uni_char].erase(word_id);	838 char_word_map_[uni_char].erase(word_id);

269 if (char_word_map_[uni_char].empty())	839 if (char_word_map_[uni_char].empty())

270 char_word_map_.erase(uni_char); // No longer in use.	840 char_word_map_.erase(uni_char); // No longer in use.

271 }	841 }

272	842

273 // Complete the removal of references to the word.	843 // Complete the removal of references to the word.

274 word_id_history_map_.erase(word_id);	844 word_id_history_map_.erase(word_id);

275 word_map_.erase(word);	845 word_map_.erase(word);

276 word_list_[word_id] = string16();	846 word_list_[word_id] = string16();

277 available_words_.insert(word_id);	847 available_words_.insert(word_id);

	848 if (cache_enabled())

	849 cache_db_->RemoveWordFromWords(word_id);

278 }	850 }

	851 if (cache_enabled())

	852 cache_db_->RemoveHistoryIDFromWordHistory(history_id);

279 }	853 }

280	854

281 void URLIndexPrivateData::AddToHistoryIDWordMap(HistoryID history_id,	855 void URLIndexPrivateData::AddToHistoryIDWordMap(HistoryID history_id,

282 WordID word_id) {	856 WordID word_id) {

283 HistoryIDWordMap::iterator iter = history_id_word_map_.find(history_id);	857 HistoryIDWordMap::iterator iter = history_id_word_map_.find(history_id);

284 if (iter != history_id_word_map_.end()) {	858 if (iter != history_id_word_map_.end()) {

285 WordIDSet& word_id_set(iter->second);	859 WordIDSet& word_id_set(iter->second);

286 word_id_set.insert(word_id);	860 word_id_set.insert(word_id);

287 } else {	861 } else {

288 WordIDSet word_id_set;	862 WordIDSet word_id_set;

289 word_id_set.insert(word_id);	863 word_id_set.insert(word_id);

290 history_id_word_map_[history_id] = word_id_set;	864 history_id_word_map_[history_id] = word_id_set;

291 }	865 }

292 }	866 }

293	867

294 bool URLIndexPrivateData::UpdateURL(

295 const URLRow& row,

296 const std::string& languages,

297 const std::set<std::string>& scheme_whitelist) {

298 // The row may or may not already be in our index. If it is not already

299 // indexed and it qualifies then it gets indexed. If it is already

300 // indexed and still qualifies then it gets updated, otherwise it

301 // is deleted from the index.

302 bool row_was_updated = false;

303 URLID row_id = row.id();

304 HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id);

305 if (row_pos == history_info_map_.end()) {

306 // This new row should be indexed if it qualifies.

307 URLRow new_row(row);

308 new_row.set_id(row_id);

309 row_was_updated = RowQualifiesAsSignificant(new_row, base::Time()) &&

310 IndexRow(new_row, languages, scheme_whitelist);

311 } else if (RowQualifiesAsSignificant(row, base::Time())) {

312 // This indexed row still qualifies and will be re-indexed.

313 // The url won't have changed but the title, visit count, etc.

314 // might have changed.

315 URLRow& row_to_update = row_pos->second;

316 bool title_updated = row_to_update.title() != row.title();

317 if (row_to_update.visit_count() != row.visit_count() \|\|

318 row_to_update.typed_count() != row.typed_count() \|\|

319 row_to_update.last_visit() != row.last_visit() \|\| title_updated) {

320 row_to_update.set_visit_count(row.visit_count());

321 row_to_update.set_typed_count(row.typed_count());

322 row_to_update.set_last_visit(row.last_visit());

323 // While the URL is guaranteed to remain stable, the title may have

324 // changed. If so, then update the index with the changed words.

325 if (title_updated) {

326 // Clear all words associated with this row and re-index both the

327 // URL and title.

328 RemoveRowWordsFromIndex(row_to_update);

329 row_to_update.set_title(row.title());

330 RowWordStarts word_starts;

331 AddRowWordsToIndex(row_to_update, &word_starts, languages);

332 word_starts_map_[row_id] = word_starts;

333 }

334 row_was_updated = true;

335 }

336 } else {

337 // This indexed row no longer qualifies and will be de-indexed by

338 // clearing all words associated with this row.

339 RemoveRowFromIndex(row);

340 row_was_updated = true;

341 }

342 if (row_was_updated)

343 search_term_cache_.clear(); // This invalidates the cache.

344 return row_was_updated;

345 }

346

347 // Helper functor for DeleteURL.

348 class HistoryInfoMapItemHasURL {

349 public:

350 explicit HistoryInfoMapItemHasURL(const GURL& url): url_(url) {}

351

352 bool operator()(const std::pair<const HistoryID, URLRow>& item) {

353 return item.second.url() == url_;

354 }

355

356 private:

357 const GURL& url_;

358 };

359

360 bool URLIndexPrivateData::DeleteURL(const GURL& url) {

361 // Find the matching entry in the history_info_map_.

362 HistoryInfoMap::iterator pos = std::find_if(

363 history_info_map_.begin(),

364 history_info_map_.end(),

365 HistoryInfoMapItemHasURL(url));

366 if (pos == history_info_map_.end())

367 return false;

368 RemoveRowFromIndex(pos->second);

369 search_term_cache_.clear(); // This invalidates the cache.

370 return true;

371 }

372

373 // URLIndexPrivateData::HistoryItemFactorGreater -------------------------------

374

375 URLIndexPrivateData::HistoryItemFactorGreater::HistoryItemFactorGreater(

376 const HistoryInfoMap& history_info_map)

377 : history_info_map_(history_info_map) {

378 }

379

380 URLIndexPrivateData::HistoryItemFactorGreater::~HistoryItemFactorGreater() {}

381

382 bool URLIndexPrivateData::HistoryItemFactorGreater::operator()(

383 const HistoryID h1,

384 const HistoryID h2) {

385 HistoryInfoMap::const_iterator entry1(history_info_map_.find(h1));

386 if (entry1 == history_info_map_.end())

387 return false;

388 HistoryInfoMap::const_iterator entry2(history_info_map_.find(h2));

389 if (entry2 == history_info_map_.end())

390 return true;

391 const URLRow& r1(entry1->second);

392 const URLRow& r2(entry2->second);

393 // First cut: typed count, visit count, recency.

394 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks

395 // recently visited (within the last 12/24 hours) as highly important. Get

396 // input from mpearson.

397 if (r1.typed_count() != r2.typed_count())

398 return (r1.typed_count() > r2.typed_count());

399 if (r1.visit_count() != r2.visit_count())

400 return (r1.visit_count() > r2.visit_count());

401 return (r1.last_visit() > r2.last_visit());

402 }

403

404 // Cache Searching -------------------------------------------------------------

405

406 // NOTE: This is the main public search function.

407 ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms(

408 const string16& search_string) {

409 pre_filter_item_count_ = 0;

410 post_filter_item_count_ = 0;

411 post_scoring_item_count_ = 0;

412 // The search string we receive may contain escaped characters. For reducing

413 // the index we need individual, lower-cased words, ignoring escapings. For

414 // the final filtering we need whitespace separated substrings possibly

415 // containing escaped characters.

416 string16 lower_raw_string(base::i18n::ToLower(search_string));

417 string16 lower_unescaped_string =

418 net::UnescapeURLComponent(lower_raw_string,

419 net::UnescapeRule::SPACES \| net::UnescapeRule::URL_SPECIAL_CHARS);

420 // Extract individual 'words' (as opposed to 'terms'; see below) from the

421 // search string. When the user types "colspec=ID%20Mstone Release" we get

422 // four 'words': "colspec", "id", "mstone" and "release".

423 String16Vector lower_words(

424 history::String16VectorFromString16(lower_unescaped_string, false, NULL));

425 ScoredHistoryMatches scored_items;

426

427 // Do nothing if we have indexed no words (probably because we've not been

428 // initialized yet) or the search string has no words.

429 if (word_list_.empty() \|\| lower_words.empty()) {

430 search_term_cache_.clear(); // Invalidate the term cache.

431 return scored_items;

432 }

433

434 // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep

435 // approach.

436 ResetSearchTermCache();

437

438 HistoryIDSet history_id_set = HistoryIDSetFromWords(lower_words);

439

440 // Trim the candidate pool if it is large. Note that we do not filter out

441 // items that do not contain the search terms as proper substrings -- doing

442 // so is the performance-costly operation we are trying to avoid in order

443 // to maintain omnibox responsiveness.

444 const size_t kItemsToScoreLimit = 500;

445 pre_filter_item_count_ = history_id_set.size();

446 // If we trim the results set we do not want to cache the results for next

447 // time as the user's ultimately desired result could easily be eliminated

448 // in this early rough filter.

449 bool was_trimmed = (pre_filter_item_count_ > kItemsToScoreLimit);

450 if (was_trimmed) {

451 HistoryIDVector history_ids;

452 std::copy(history_id_set.begin(), history_id_set.end(),

453 std::back_inserter(history_ids));

454 // Trim down the set by sorting by typed-count, visit-count, and last

455 // visit.

456 HistoryItemFactorGreater

457 item_factor_functor(history_info_map_);

458 std::partial_sort(history_ids.begin(),

459 history_ids.begin() + kItemsToScoreLimit,

460 history_ids.end(),

461 item_factor_functor);

462 history_id_set.clear();

463 std::copy(history_ids.begin(), history_ids.begin() + kItemsToScoreLimit,

464 std::inserter(history_id_set, history_id_set.end()));

465 post_filter_item_count_ = history_id_set.size();

466 }

467

468 // Pass over all of the candidates filtering out any without a proper

469 // substring match, inserting those which pass in order by score. Note that

470 // in this step we are using the raw search string complete with escaped

471 // URL elements. When the user has specifically typed something akin to

472 // "sort=pri&colspec=ID%20Mstone%20Release" we want to make sure that that

473 // specific substring appears in the URL or page title.

474

475 // We call these 'terms' (as opposed to 'words'; see above) as in this case

476 // we only want to break up the search string on 'true' whitespace rather than

477 // escaped whitespace. When the user types "colspec=ID%20Mstone Release" we

478 // get two 'terms': "colspec=id%20mstone" and "release".

479 history::String16Vector lower_raw_terms;

480 Tokenize(lower_raw_string, kWhitespaceUTF16, &lower_raw_terms);

481 scored_items = std::for_each(history_id_set.begin(), history_id_set.end(),

482 AddHistoryMatch(*this, lower_raw_string,

483 lower_raw_terms, base::Time::Now())).ScoredMatches();

484

485 // Select and sort only the top kMaxMatches results.

486 if (scored_items.size() > AutocompleteProvider::kMaxMatches) {

487 std::partial_sort(scored_items.begin(),

488 scored_items.begin() +

489 AutocompleteProvider::kMaxMatches,

490 scored_items.end(),

491 ScoredHistoryMatch::MatchScoreGreater);

492 scored_items.resize(AutocompleteProvider::kMaxMatches);

493 } else {

494 std::sort(scored_items.begin(), scored_items.end(),

495 ScoredHistoryMatch::MatchScoreGreater);

496 }

497 post_scoring_item_count_ = scored_items.size();

498

499 if (was_trimmed) {

500 search_term_cache_.clear(); // Invalidate the term cache.

501 } else {

502 // Remove any stale SearchTermCacheItems.

503 for (SearchTermCacheMap::iterator cache_iter = search_term_cache_.begin();

504 cache_iter != search_term_cache_.end(); ) {

505 if (!cache_iter->second.used_)

506 search_term_cache_.erase(cache_iter++);

507 else

508 ++cache_iter;

509 }

510 }

511

512 return scored_items;

513 }

514

515 // URLIndexPrivateData::AddHistoryMatch ----------------------------------------

516

517 URLIndexPrivateData::AddHistoryMatch::AddHistoryMatch(

518 const URLIndexPrivateData& private_data,

519 const string16& lower_string,

520 const String16Vector& lower_terms,

521 const base::Time now)

522 : private_data_(private_data),

523 lower_string_(lower_string),

524 lower_terms_(lower_terms),

525 now_(now) {}

526

527 URLIndexPrivateData::AddHistoryMatch::~AddHistoryMatch() {}

528

529 void URLIndexPrivateData::AddHistoryMatch::operator()(

530 const HistoryID history_id) {

531 HistoryInfoMap::const_iterator hist_pos =

532 private_data_.history_info_map_.find(history_id);

533 if (hist_pos != private_data_.history_info_map_.end()) {

534 const URLRow& hist_item = hist_pos->second;

535 WordStartsMap::const_iterator starts_pos =

536 private_data_.word_starts_map_.find(history_id);

537 DCHECK(starts_pos != private_data_.word_starts_map_.end());

538 ScoredHistoryMatch match(hist_item, lower_string_, lower_terms_,

539 starts_pos->second, now_);

540 if (match.raw_score > 0)

541 scored_matches_.push_back(match);

542 }

543 }

544

545 void URLIndexPrivateData::ResetSearchTermCache() {	868 void URLIndexPrivateData::ResetSearchTermCache() {

546 for (SearchTermCacheMap::iterator iter = search_term_cache_.begin();	869 for (SearchTermCacheMap::iterator iter = search_term_cache_.begin();

547 iter != search_term_cache_.end(); ++iter)	870 iter != search_term_cache_.end(); ++iter)

548 iter->second.used_ = false;	871 iter->second.used_ = false;

549 }	872 }

550	873

551 HistoryIDSet URLIndexPrivateData::HistoryIDSetFromWords(	874 HistoryIDSet URLIndexPrivateData::HistoryIDSetFromWords(

552 const String16Vector& unsorted_words) {	875 const String16Vector& unsorted_words) {

553 // Break the terms down into individual terms (words), get the candidate	876 // Break the terms down into individual terms (words), get the candidate

554 // set for each term, and intersect each to get a final candidate list.	877 // set for each term, and intersect each to get a final candidate list.

(...skipping 168 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
723 std::set_intersection(word_id_set.begin(), word_id_set.end(),	1046 std::set_intersection(word_id_set.begin(), word_id_set.end(),

724 char_word_id_set.begin(), char_word_id_set.end(),	1047 char_word_id_set.begin(), char_word_id_set.end(),

725 std::inserter(new_word_id_set,	1048 std::inserter(new_word_id_set,

726 new_word_id_set.begin()));	1049 new_word_id_set.begin()));

727 word_id_set.swap(new_word_id_set);	1050 word_id_set.swap(new_word_id_set);

728 }	1051 }

729 }	1052 }

730 return word_id_set;	1053 return word_id_set;

731 }	1054 }

732	1055

733 // Cache Saving ----------------------------------------------------------------	1056 bool URLIndexPrivateData::RestoreFromCache(InMemoryURLCacheDatabase* cache_db) {

734	1057 DCHECK(cache_db);

735 // static	1058 if (!cache_db->RestorePrivateData(this)) {

736 void URLIndexPrivateData::WritePrivateDataToCacheFileTask(	1059 cache_db->Reset();

737 scoped_refptr<URLIndexPrivateData> private_data,	1060 return false;

738 const FilePath& file_path,	1061 }

739 scoped_refptr<RefCountedBool> succeeded) {	1062 return !Empty() && ValidateConsistency();

740 DCHECK(private_data.get());

741 DCHECK(!file_path.empty());

742 succeeded->set_value(private_data->SaveToFile(file_path));

743 }	1063 }

744	1064

745 bool URLIndexPrivateData::SaveToFile(const FilePath& file_path) {	1065 void URLIndexPrivateData::DeleteOldVersionCacheFile() const {

746 base::TimeTicks beginning_time = base::TimeTicks::Now();	1066 if (history_dir_.empty())

747 InMemoryURLIndexCacheItem index_cache;	1067 return;

748 SavePrivateData(&index_cache);	1068 FilePath path = history_dir_.Append(chrome::kHQPCacheFilename);

749 std::string data;	1069 file_util::Delete(path, false);

750 if (!index_cache.SerializeToString(&data)) {	1070 }

751 LOG(WARNING) << "Failed to serialize the InMemoryURLIndex cache.";	1071

	1072 bool URLIndexPrivateData::GetCacheDBPath(FilePath* file_path) {

	1073 DCHECK(file_path);

	1074 if (history_dir_.empty())

752 return false;	1075 return false;

753 }	1076 *file_path = history_dir_.Append(chrome::kHQPCacheDBFilename);

754

755 int size = data.size();

756 if (file_util::WriteFile(file_path, data.c_str(), size) != size) {

757 LOG(WARNING) << "Failed to write " << file_path.value();

758 return false;

759 }

760 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexSaveCacheTime",

761 base::TimeTicks::Now() - beginning_time);

762 return true;	1077 return true;

763 }	1078 }

764	1079

765 void URLIndexPrivateData::SavePrivateData(	1080 void URLIndexPrivateData::SetCacheDatabaseForTesting(

766 InMemoryURLIndexCacheItem* cache) const {	1081 InMemoryURLCacheDatabase* test_db) {

767 DCHECK(cache);	1082 cache_db_ = test_db;

768 cache->set_timestamp(base::Time::Now().ToInternalValue());

769 cache->set_version(saved_cache_version_);

770 // history_item_count_ is no longer used but rather than change the protobuf

771 // definition use a placeholder. This will go away with the switch to SQLite.

772 cache->set_history_item_count(0);

773 SaveWordList(cache);

774 SaveWordMap(cache);

775 SaveCharWordMap(cache);

776 SaveWordIDHistoryMap(cache);

777 SaveHistoryInfoMap(cache);

778 SaveWordStartsMap(cache);

779 }

780

781 void URLIndexPrivateData::SaveWordList(InMemoryURLIndexCacheItem* cache) const {

782 if (word_list_.empty())

783 return;

784 WordListItem* list_item = cache->mutable_word_list();

785 list_item->set_word_count(word_list_.size());

786 for (String16Vector::const_iterator iter = word_list_.begin();

787 iter != word_list_.end(); ++iter)

788 list_item->add_word(UTF16ToUTF8(*iter));

789 }

790

791 void URLIndexPrivateData::SaveWordMap(InMemoryURLIndexCacheItem* cache) const {

792 if (word_map_.empty())

793 return;

794 WordMapItem* map_item = cache->mutable_word_map();

795 map_item->set_item_count(word_map_.size());

796 for (WordMap::const_iterator iter = word_map_.begin();

797 iter != word_map_.end(); ++iter) {

798 WordMapEntry* map_entry = map_item->add_word_map_entry();

799 map_entry->set_word(UTF16ToUTF8(iter->first));

800 map_entry->set_word_id(iter->second);

801 }

802 }

803

804 void URLIndexPrivateData::SaveCharWordMap(

805 InMemoryURLIndexCacheItem* cache) const {

806 if (char_word_map_.empty())

807 return;

808 CharWordMapItem* map_item = cache->mutable_char_word_map();

809 map_item->set_item_count(char_word_map_.size());

810 for (CharWordIDMap::const_iterator iter = char_word_map_.begin();

811 iter != char_word_map_.end(); ++iter) {

812 CharWordMapEntry* map_entry = map_item->add_char_word_map_entry();

813 map_entry->set_char_16(iter->first);

814 const WordIDSet& word_id_set(iter->second);

815 map_entry->set_item_count(word_id_set.size());

816 for (WordIDSet::const_iterator set_iter = word_id_set.begin();

817 set_iter != word_id_set.end(); ++set_iter)

818 map_entry->add_word_id(*set_iter);

819 }

820 }

821

822 void URLIndexPrivateData::SaveWordIDHistoryMap(

823 InMemoryURLIndexCacheItem* cache) const {

824 if (word_id_history_map_.empty())

825 return;

826 WordIDHistoryMapItem* map_item = cache->mutable_word_id_history_map();

827 map_item->set_item_count(word_id_history_map_.size());

828 for (WordIDHistoryMap::const_iterator iter = word_id_history_map_.begin();

829 iter != word_id_history_map_.end(); ++iter) {

830 WordIDHistoryMapEntry* map_entry =

831 map_item->add_word_id_history_map_entry();

832 map_entry->set_word_id(iter->first);

833 const HistoryIDSet& history_id_set(iter->second);

834 map_entry->set_item_count(history_id_set.size());

835 for (HistoryIDSet::const_iterator set_iter = history_id_set.begin();

836 set_iter != history_id_set.end(); ++set_iter)

837 map_entry->add_history_id(*set_iter);

838 }

839 }

840

841 void URLIndexPrivateData::SaveHistoryInfoMap(

842 InMemoryURLIndexCacheItem* cache) const {

843 if (history_info_map_.empty())

844 return;

845 HistoryInfoMapItem* map_item = cache->mutable_history_info_map();

846 map_item->set_item_count(history_info_map_.size());

847 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin();

848 iter != history_info_map_.end(); ++iter) {

849 HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry();

850 map_entry->set_history_id(iter->first);

851 const URLRow& url_row(iter->second);

852 // Note: We only save information that contributes to the index so there

853 // is no need to save search_term_cache_ (not persistent).

854 map_entry->set_visit_count(url_row.visit_count());

855 map_entry->set_typed_count(url_row.typed_count());

856 map_entry->set_last_visit(url_row.last_visit().ToInternalValue());

857 map_entry->set_url(url_row.url().spec());

858 map_entry->set_title(UTF16ToUTF8(url_row.title()));

859 }

860 }

861

862 void URLIndexPrivateData::SaveWordStartsMap(

863 InMemoryURLIndexCacheItem* cache) const {

864 if (word_starts_map_.empty())

865 return;

866 // For unit testing: Enable saving of the cache as an earlier version to

867 // allow testing of cache file upgrading in ReadFromFile().

868 // TODO(mrossetti): Instead of intruding on production code with this kind of

869 // test harness, save a copy of an older version cache with known results.

870 // Implement this when switching the caching over to SQLite.

871 if (saved_cache_version_ < 1)

872 return;

873

874 WordStartsMapItem* map_item = cache->mutable_word_starts_map();

875 map_item->set_item_count(word_starts_map_.size());

876 for (WordStartsMap::const_iterator iter = word_starts_map_.begin();

877 iter != word_starts_map_.end(); ++iter) {

878 WordStartsMapEntry* map_entry = map_item->add_word_starts_map_entry();

879 map_entry->set_history_id(iter->first);

880 const RowWordStarts& word_starts(iter->second);

881 for (WordStarts::const_iterator i = word_starts.url_word_starts_.begin();

882 i != word_starts.url_word_starts_.end(); ++i)

883 map_entry->add_url_word_starts(*i);

884 for (WordStarts::const_iterator i = word_starts.title_word_starts_.begin();

885 i != word_starts.title_word_starts_.end(); ++i)

886 map_entry->add_title_word_starts(*i);

887 }

888 }

889

890 // Cache Restoring -------------------------------------------------------------

891

892 // static

893 void URLIndexPrivateData::RestoreFromFileTask(

894 const FilePath& file_path,

895 scoped_refptr<URLIndexPrivateData> private_data,

896 const std::string& languages) {

897 private_data = URLIndexPrivateData::RestoreFromFile(file_path, languages);

898 }

899

900 // static

901 scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::RestoreFromFile(

902 const FilePath& file_path,

903 const std::string& languages) {

904 base::TimeTicks beginning_time = base::TimeTicks::Now();

905 if (!file_util::PathExists(file_path))

906 return NULL;

907 std::string data;

908 // If there is no cache file then simply give up. This will cause us to

909 // attempt to rebuild from the history database.

910 if (!file_util::ReadFileToString(file_path, &data))

911 return NULL;

912

913 scoped_refptr<URLIndexPrivateData> restored_data(new URLIndexPrivateData);

914 InMemoryURLIndexCacheItem index_cache;

915 if (!index_cache.ParseFromArray(data.c_str(), data.size())) {

916 LOG(WARNING) << "Failed to parse URLIndexPrivateData cache data read from "

917 << file_path.value();

918 return restored_data;

919 }

920

921 if (!restored_data->RestorePrivateData(index_cache, languages))

922 return NULL;

923

924 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime",

925 base::TimeTicks::Now() - beginning_time);

926 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems",

927 restored_data->history_id_word_map_.size());

928 UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size());

929 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords",

930 restored_data->word_map_.size());

931 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars",

932 restored_data->char_word_map_.size());

933 if (restored_data->Empty())

934 return NULL; // 'No data' is the same as a failed reload.

935 return restored_data;

936 }

937

938 // static

939 scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::RebuildFromHistory(

940 HistoryDatabase* history_db,

941 const std::string& languages,

942 const std::set<std::string>& scheme_whitelist) {

943 if (!history_db)

944 return NULL;

945

946 base::TimeTicks beginning_time = base::TimeTicks::Now();

947

948 scoped_refptr<URLIndexPrivateData> rebuilt_data(new URLIndexPrivateData);

949 URLDatabase::URLEnumerator history_enum;

950 if (!history_db->InitURLEnumeratorForSignificant(&history_enum))

951 return NULL;

952 for (URLRow row; history_enum.GetNextURL(&row); )

953 rebuilt_data->IndexRow(row, languages, scheme_whitelist);

954

955 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime",

956 base::TimeTicks::Now() - beginning_time);

957 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems",

958 rebuilt_data->history_id_word_map_.size());

959 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords",

960 rebuilt_data->word_map_.size());

961 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars",

962 rebuilt_data->char_word_map_.size());

963 return rebuilt_data;

964 }

965

966 bool URLIndexPrivateData::RestorePrivateData(

967 const InMemoryURLIndexCacheItem& cache,

968 const std::string& languages) {

969 if (cache.has_version())

970 restored_cache_version_ = cache.version();

971 return RestoreWordList(cache) && RestoreWordMap(cache) &&

972 RestoreCharWordMap(cache) && RestoreWordIDHistoryMap(cache) &&

973 RestoreHistoryInfoMap(cache) && RestoreWordStartsMap(cache, languages);

974 }

975

976 bool URLIndexPrivateData::RestoreWordList(

977 const InMemoryURLIndexCacheItem& cache) {

978 if (!cache.has_word_list())

979 return false;

980 const WordListItem& list_item(cache.word_list());

981 uint32 expected_item_count = list_item.word_count();

982 uint32 actual_item_count = list_item.word_size();

983 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)

984 return false;

985 const RepeatedPtrField<std::string>& words(list_item.word());

986 for (RepeatedPtrField<std::string>::const_iterator iter = words.begin();

987 iter != words.end(); ++iter)

988 word_list_.push_back(UTF8ToUTF16(*iter));

989 return true;

990 }

991

992 bool URLIndexPrivateData::RestoreWordMap(

993 const InMemoryURLIndexCacheItem& cache) {

994 if (!cache.has_word_map())

995 return false;

996 const WordMapItem& list_item(cache.word_map());

997 uint32 expected_item_count = list_item.item_count();

998 uint32 actual_item_count = list_item.word_map_entry_size();

999 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)

1000 return false;

1001 const RepeatedPtrField<WordMapEntry>& entries(list_item.word_map_entry());

1002 for (RepeatedPtrField<WordMapEntry>::const_iterator iter = entries.begin();

1003 iter != entries.end(); ++iter)

1004 word_map_[UTF8ToUTF16(iter->word())] = iter->word_id();

1005 return true;

1006 }

1007

1008 bool URLIndexPrivateData::RestoreCharWordMap(

1009 const InMemoryURLIndexCacheItem& cache) {

1010 if (!cache.has_char_word_map())

1011 return false;

1012 const CharWordMapItem& list_item(cache.char_word_map());

1013 uint32 expected_item_count = list_item.item_count();

1014 uint32 actual_item_count = list_item.char_word_map_entry_size();

1015 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)

1016 return false;

1017 const RepeatedPtrField<CharWordMapEntry>&

1018 entries(list_item.char_word_map_entry());

1019 for (RepeatedPtrField<CharWordMapEntry>::const_iterator iter =

1020 entries.begin(); iter != entries.end(); ++iter) {

1021 expected_item_count = iter->item_count();

1022 actual_item_count = iter->word_id_size();

1023 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)

1024 return false;

1025 char16 uni_char = static_cast<char16>(iter->char_16());

1026 WordIDSet word_id_set;

1027 const RepeatedField<int32>& word_ids(iter->word_id());

1028 for (RepeatedField<int32>::const_iterator jiter = word_ids.begin();

1029 jiter != word_ids.end(); ++jiter)

1030 word_id_set.insert(*jiter);

1031 char_word_map_[uni_char] = word_id_set;

1032 }

1033 return true;

1034 }

1035

1036 bool URLIndexPrivateData::RestoreWordIDHistoryMap(

1037 const InMemoryURLIndexCacheItem& cache) {

1038 if (!cache.has_word_id_history_map())

1039 return false;

1040 const WordIDHistoryMapItem& list_item(cache.word_id_history_map());

1041 uint32 expected_item_count = list_item.item_count();

1042 uint32 actual_item_count = list_item.word_id_history_map_entry_size();

1043 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)

1044 return false;

1045 const RepeatedPtrField<WordIDHistoryMapEntry>&

1046 entries(list_item.word_id_history_map_entry());

1047 for (RepeatedPtrField<WordIDHistoryMapEntry>::const_iterator iter =

1048 entries.begin(); iter != entries.end(); ++iter) {

1049 expected_item_count = iter->item_count();

1050 actual_item_count = iter->history_id_size();

1051 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)

1052 return false;

1053 WordID word_id = iter->word_id();

1054 HistoryIDSet history_id_set;

1055 const RepeatedField<int64>& history_ids(iter->history_id());

1056 for (RepeatedField<int64>::const_iterator jiter = history_ids.begin();

1057 jiter != history_ids.end(); ++jiter) {

1058 history_id_set.insert(*jiter);

1059 AddToHistoryIDWordMap(*jiter, word_id);

1060 }

1061 word_id_history_map_[word_id] = history_id_set;

1062 }

1063 return true;

1064 }

1065

1066 bool URLIndexPrivateData::RestoreHistoryInfoMap(

1067 const InMemoryURLIndexCacheItem& cache) {

1068 if (!cache.has_history_info_map())

1069 return false;

1070 const HistoryInfoMapItem& list_item(cache.history_info_map());

1071 uint32 expected_item_count = list_item.item_count();

1072 uint32 actual_item_count = list_item.history_info_map_entry_size();

1073 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)

1074 return false;

1075 const RepeatedPtrField<HistoryInfoMapEntry>&

1076 entries(list_item.history_info_map_entry());

1077 for (RepeatedPtrField<HistoryInfoMapEntry>::const_iterator iter =

1078 entries.begin(); iter != entries.end(); ++iter) {

1079 HistoryID history_id = iter->history_id();

1080 GURL url(iter->url());

1081 URLRow url_row(url, history_id);

1082 url_row.set_visit_count(iter->visit_count());

1083 url_row.set_typed_count(iter->typed_count());

1084 url_row.set_last_visit(base::Time::FromInternalValue(iter->last_visit()));

1085 if (iter->has_title()) {

1086 string16 title(UTF8ToUTF16(iter->title()));

1087 url_row.set_title(title);

1088 }

1089 history_info_map_[history_id] = url_row;

1090 }

1091 return true;

1092 }

1093

1094 bool URLIndexPrivateData::RestoreWordStartsMap(

1095 const InMemoryURLIndexCacheItem& cache,

1096 const std::string& languages) {

1097 // Note that this function must be called after RestoreHistoryInfoMap() has

1098 // been run as the word starts may have to be recalculated from the urls and

1099 // page titles.

1100 if (cache.has_word_starts_map()) {

1101 const WordStartsMapItem& list_item(cache.word_starts_map());

1102 uint32 expected_item_count = list_item.item_count();

1103 uint32 actual_item_count = list_item.word_starts_map_entry_size();

1104 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)

1105 return false;

1106 const RepeatedPtrField<WordStartsMapEntry>&

1107 entries(list_item.word_starts_map_entry());

1108 for (RepeatedPtrField<WordStartsMapEntry>::const_iterator iter =

1109 entries.begin(); iter != entries.end(); ++iter) {

1110 HistoryID history_id = iter->history_id();

1111 RowWordStarts word_starts;

1112 // Restore the URL word starts.

1113 const RepeatedField<int32>& url_starts(iter->url_word_starts());

1114 for (RepeatedField<int32>::const_iterator jiter = url_starts.begin();

1115 jiter != url_starts.end(); ++jiter)

1116 word_starts.url_word_starts_.push_back(*jiter);

1117 // Restore the page title word starts.

1118 const RepeatedField<int32>& title_starts(iter->title_word_starts());

1119 for (RepeatedField<int32>::const_iterator jiter = title_starts.begin();

1120 jiter != title_starts.end(); ++jiter)

1121 word_starts.title_word_starts_.push_back(*jiter);

1122 word_starts_map_[history_id] = word_starts;

1123 }

1124 } else {

1125 // Since the cache did not contain any word starts we must rebuild then from

1126 // the URL and page titles.

1127 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin();

1128 iter != history_info_map_.end(); ++iter) {

1129 RowWordStarts word_starts;

1130 const URLRow& row(iter->second);

1131 string16 url(net::FormatUrl(row.url(), languages,

1132 net::kFormatUrlOmitUsernamePassword,

1133 net::UnescapeRule::SPACES \| net::UnescapeRule::URL_SPECIAL_CHARS,

1134 NULL, NULL, NULL));

1135 url = base::i18n::ToLower(url);

1136 String16VectorFromString16(url, false, &word_starts.url_word_starts_);

1137 String16VectorFromString16(

1138 row.title(), false, &word_starts.title_word_starts_);

1139 word_starts_map_[iter->first] = word_starts;

1140 }

1141 }

1142 return true;

1143 }	1083 }

1144	1084

1145 // static	1085 // static

1146 bool URLIndexPrivateData::URLSchemeIsWhitelisted(	1086 bool URLIndexPrivateData::URLSchemeIsWhitelisted(

1147 const GURL& gurl,	1087 const GURL& gurl,

1148 const std::set<std::string>& whitelist) {	1088 const std::set<std::string>& whitelist) {

1149 return whitelist.find(gurl.scheme()) != whitelist.end();	1089 return whitelist.find(gurl.scheme()) != whitelist.end();

1150 }	1090 }

1151	1091

1152 } // namespace history	1092 } // namespace history

OLD	NEW

« no previous file with comments | « chrome/browser/history/url_index_private_data.h ('k') | chrome/browser/history/url_index_private_data_unittest.cc » ('j') | no next file with comments »