| Index: chrome/browser/history/url_index_private_data.cc
|
| ===================================================================
|
| --- chrome/browser/history/url_index_private_data.cc (revision 125621)
|
| +++ chrome/browser/history/url_index_private_data.cc (working copy)
|
| @@ -42,6 +42,9 @@
|
| typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem HistoryInfoMapItem;
|
| typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem_HistoryInfoMapEntry
|
| HistoryInfoMapEntry;
|
| +typedef imui::InMemoryURLIndexCacheItem_WordStartsMapItem WordStartsMapItem;
|
| +typedef imui::InMemoryURLIndexCacheItem_WordStartsMapItem_WordStartsMapEntry
|
| + WordStartsMapEntry;
|
|
|
| // The maximum score any candidate result can achieve.
|
| const int kMaxTotalScore = 1425;
|
| @@ -115,7 +118,9 @@
|
| // InMemoryURLIndex's Private Data ---------------------------------------------
|
|
|
| URLIndexPrivateData::URLIndexPrivateData()
|
| - : pre_filter_item_count_(0),
|
| + : restored_cache_version_(0),
|
| + saved_cache_version_(kCurrentCacheFileVersion),
|
| + pre_filter_item_count_(0),
|
| post_filter_item_count_(0),
|
| post_scoring_item_count_(0) {
|
| URLIndexPrivateData::InitializeSchemeWhitelist(&scheme_whitelist_);
|
| @@ -131,6 +136,7 @@
|
| word_id_history_map_.clear();
|
| history_id_word_map_.clear();
|
| history_info_map_.clear();
|
| + word_starts_map_.clear();
|
| }
|
|
|
| // Cache Updating --------------------------------------------------------------
|
| @@ -161,11 +167,14 @@
|
| history_info_map_[history_id] = new_row;
|
|
|
| // Index the words contained in the URL and title of the row.
|
| - AddRowWordsToIndex(new_row);
|
| + RowWordStarts word_starts;
|
| + AddRowWordsToIndex(new_row, &word_starts);
|
| + word_starts_map_[history_id] = word_starts;
|
| return true;
|
| }
|
|
|
| -void URLIndexPrivateData::AddRowWordsToIndex(const URLRow& row) {
|
| +void URLIndexPrivateData::AddRowWordsToIndex(const URLRow& row,
|
| + RowWordStarts* word_starts) {
|
| HistoryID history_id = static_cast<HistoryID>(row.id());
|
| // Split URL into individual, unique words then add in the title words.
|
| const GURL& gurl(row.url());
|
| @@ -174,8 +183,10 @@
|
| net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS,
|
| NULL, NULL, NULL));
|
| url = base::i18n::ToLower(url);
|
| - String16Set url_words = String16SetFromString16(url);
|
| - String16Set title_words = String16SetFromString16(row.title());
|
| + String16Set url_words = String16SetFromString16(url,
|
| + word_starts ? &word_starts->url_word_starts_ : NULL);
|
| + String16Set title_words = String16SetFromString16(row.title(),
|
| + word_starts ? &word_starts->title_word_starts_ : NULL);
|
| String16Set words;
|
| std::set_union(url_words.begin(), url_words.end(),
|
| title_words.begin(), title_words.end(),
|
| @@ -246,6 +257,7 @@
|
| RemoveRowWordsFromIndex(row);
|
| HistoryID history_id = static_cast<HistoryID>(row.id());
|
| history_info_map_.erase(history_id);
|
| + word_starts_map_.erase(history_id);
|
| }
|
|
|
| void URLIndexPrivateData::RemoveRowWordsFromIndex(const URLRow& row) {
|
| @@ -328,7 +340,9 @@
|
| // URL and title.
|
| RemoveRowWordsFromIndex(row_to_update);
|
| row_to_update.set_title(row.title());
|
| - AddRowWordsToIndex(row_to_update);
|
| + RowWordStarts word_starts;
|
| + AddRowWordsToIndex(row_to_update, &word_starts);
|
| + word_starts_map_[row_id] = word_starts;
|
| }
|
| row_was_updated = true;
|
| }
|
| @@ -424,7 +438,7 @@
|
| // search string. When the user types "colspec=ID%20Mstone Release" we get
|
| // four 'words': "colspec", "id", "mstone" and "release".
|
| String16Vector lower_words(
|
| - history::String16VectorFromString16(lower_unescaped_string, false));
|
| + history::String16VectorFromString16(lower_unescaped_string, false, NULL));
|
| ScoredHistoryMatches scored_items;
|
|
|
| // Do nothing if we have indexed no words (probably because we've not been
|
| @@ -536,8 +550,12 @@
|
| // deleted by the user or the item no longer qualifies as a quick result.
|
| if (hist_pos != private_data_.history_info_map_.end()) {
|
| const URLRow& hist_item = hist_pos->second;
|
| - ScoredHistoryMatch match(
|
| - ScoredMatchForURL(hist_item, lower_string_, lower_terms_));
|
| + WordStartsMap::const_iterator starts_pos =
|
| + private_data_.word_starts_map_.find(history_id);
|
| + DCHECK(starts_pos != private_data_.word_starts_map_.end());
|
| + ScoredHistoryMatch match(ScoredMatchForURL(hist_item, lower_string_,
|
| + lower_terms_,
|
| + starts_pos->second));
|
| if (match.raw_score > 0)
|
| scored_matches_.push_back(match);
|
| }
|
| @@ -548,7 +566,8 @@
|
| ScoredHistoryMatch URLIndexPrivateData::ScoredMatchForURL(
|
| const URLRow& row,
|
| const string16& lower_string,
|
| - const String16Vector& terms) {
|
| + const String16Vector& terms,
|
| + const RowWordStarts& word_starts) {
|
| ScoredHistoryMatch match(row);
|
| GURL gurl = row.url();
|
| if (!gurl.is_valid())
|
| @@ -664,14 +683,13 @@
|
|
|
| // Score component for how early in the match string the first search term
|
| // appears. Start with kStartMaxValue points and discount by
|
| - // kStartMaxValue/kMaxSignificantStart points for each character later than
|
| + // kStartMaxValue/kMaxSignificantChars points for each character later than
|
| // the first at which the term begins. No points are earned if the start of
|
| - // the match occurs at or after kMaxSignificantStart.
|
| - const size_t kMaxSignificantStart = 50;
|
| + // the match occurs at or after kMaxSignificantChars.
|
| const int kStartMaxValue = 1000;
|
| - int start_value = (kMaxSignificantStart -
|
| - std::min(kMaxSignificantStart, matches[0].offset)) * kStartMaxValue /
|
| - kMaxSignificantStart;
|
| + int start_value = (kMaxSignificantChars -
|
| + std::min(kMaxSignificantChars, matches[0].offset)) * kStartMaxValue /
|
| + kMaxSignificantChars;
|
|
|
| // Score component for how much of the matched string the input terms cover.
|
| // kCompleteMaxValue points times the fraction of the URL/page title string
|
| @@ -928,6 +946,7 @@
|
| InMemoryURLIndexCacheItem* cache) const {
|
| DCHECK(cache);
|
| cache->set_timestamp(base::Time::Now().ToInternalValue());
|
| + cache->set_version(saved_cache_version_);
|
| // history_item_count_ is no longer used but rather than change the protobuf
|
| // definition use a placeholder. This will go away with the switch to SQLite.
|
| cache->set_history_item_count(0);
|
| @@ -936,6 +955,7 @@
|
| SaveCharWordMap(cache);
|
| SaveWordIDHistoryMap(cache);
|
| SaveHistoryInfoMap(cache);
|
| + SaveWordStartsMap(cache);
|
| }
|
|
|
| void URLIndexPrivateData::SaveWordList(InMemoryURLIndexCacheItem* cache) const {
|
| @@ -1020,6 +1040,34 @@
|
| }
|
| }
|
|
|
| +void URLIndexPrivateData::SaveWordStartsMap(
|
| + InMemoryURLIndexCacheItem* cache) const {
|
| + if (word_starts_map_.empty())
|
| + return;
|
| + // For unit testing: Enable saving of the cache as an earlier version to
|
| + // allow testing of cache file upgrading in ReadFromFile().
|
| + // TODO(mrossetti): Instead of intruding on production code with this kind of
|
| + // test harness, save a copy of an older version cache with known results.
|
| + // Implement this when switching the caching over to SQLite.
|
| + if (saved_cache_version_ < 1)
|
| + return;
|
| +
|
| + WordStartsMapItem* map_item = cache->mutable_word_starts_map();
|
| + map_item->set_item_count(word_starts_map_.size());
|
| + for (WordStartsMap::const_iterator iter = word_starts_map_.begin();
|
| + iter != word_starts_map_.end(); ++iter) {
|
| + WordStartsMapEntry* map_entry = map_item->add_word_starts_map_entry();
|
| + map_entry->set_history_id(iter->first);
|
| + const RowWordStarts& word_starts(iter->second);
|
| + for (WordStarts::const_iterator i = word_starts.url_word_starts_.begin();
|
| + i != word_starts.url_word_starts_.end(); ++i)
|
| + map_entry->add_url_word_starts(*i);
|
| + for (WordStarts::const_iterator i = word_starts.title_word_starts_.begin();
|
| + i != word_starts.title_word_starts_.end(); ++i)
|
| + map_entry->add_title_word_starts(*i);
|
| + }
|
| +}
|
| +
|
| // Cache Restoring -------------------------------------------------------------
|
|
|
| bool URLIndexPrivateData::RestoreFromFile(const FilePath& file_path) {
|
| @@ -1090,9 +1138,11 @@
|
|
|
| bool URLIndexPrivateData::RestorePrivateData(
|
| const InMemoryURLIndexCacheItem& cache) {
|
| + if (cache.has_version())
|
| + restored_cache_version_ = cache.version();
|
| return RestoreWordList(cache) && RestoreWordMap(cache) &&
|
| RestoreCharWordMap(cache) && RestoreWordIDHistoryMap(cache) &&
|
| - RestoreHistoryInfoMap(cache);
|
| + RestoreHistoryInfoMap(cache) && RestoreWordStartsMap(cache);
|
| }
|
|
|
| bool URLIndexPrivateData::RestoreWordList(
|
| @@ -1213,4 +1263,54 @@
|
| return true;
|
| }
|
|
|
| +bool URLIndexPrivateData::RestoreWordStartsMap(
|
| + const InMemoryURLIndexCacheItem& cache) {
|
| + // Note that this function must be called after RestoreHistoryInfoMap() has
|
| + // been run as the word starts may have to be recalculated from the urls and
|
| + // page titles.
|
| + if (cache.has_word_starts_map()) {
|
| + const WordStartsMapItem& list_item(cache.word_starts_map());
|
| + uint32 expected_item_count = list_item.item_count();
|
| + uint32 actual_item_count = list_item.word_starts_map_entry_size();
|
| + if (actual_item_count == 0 || actual_item_count != expected_item_count)
|
| + return false;
|
| + const RepeatedPtrField<WordStartsMapEntry>&
|
| + entries(list_item.word_starts_map_entry());
|
| + for (RepeatedPtrField<WordStartsMapEntry>::const_iterator iter =
|
| + entries.begin(); iter != entries.end(); ++iter) {
|
| + HistoryID history_id = iter->history_id();
|
| + RowWordStarts word_starts;
|
| + // Restore the URL word starts.
|
| + const RepeatedField<int32>& url_starts(iter->url_word_starts());
|
| + for (RepeatedField<int32>::const_iterator jiter = url_starts.begin();
|
| + jiter != url_starts.end(); ++jiter)
|
| + word_starts.url_word_starts_.push_back(*jiter);
|
| + // Restore the page title word starts.
|
| + const RepeatedField<int32>& title_starts(iter->title_word_starts());
|
| + for (RepeatedField<int32>::const_iterator jiter = title_starts.begin();
|
| + jiter != title_starts.end(); ++jiter)
|
| + word_starts.title_word_starts_.push_back(*jiter);
|
| + word_starts_map_[history_id] = word_starts;
|
| + }
|
| + } else {
|
| + // Since the cache did not contain any word starts we must rebuild then from
|
| + // the URL and page titles.
|
| + for (HistoryInfoMap::const_iterator iter = history_info_map_.begin();
|
| + iter != history_info_map_.end(); ++iter) {
|
| + RowWordStarts word_starts;
|
| + const URLRow& row(iter->second);
|
| + string16 url(net::FormatUrl(row.url(), languages_,
|
| + net::kFormatUrlOmitUsernamePassword,
|
| + net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS,
|
| + NULL, NULL, NULL));
|
| + url = base::i18n::ToLower(url);
|
| + String16VectorFromString16(url, false, &word_starts.url_word_starts_);
|
| + String16VectorFromString16(
|
| + row.title(), false, &word_starts.title_word_starts_);
|
| + word_starts_map_[iter->first] = word_starts;
|
| + }
|
| + }
|
| + return true;
|
| +}
|
| +
|
| } // namespace history
|
|
|