Chromium Code Reviews| Index: chrome/browser/history/url_index_private_data.cc |
| =================================================================== |
| --- chrome/browser/history/url_index_private_data.cc (revision 125621) |
| +++ chrome/browser/history/url_index_private_data.cc (working copy) |
| @@ -42,6 +42,9 @@ |
| typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem HistoryInfoMapItem; |
| typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem_HistoryInfoMapEntry |
| HistoryInfoMapEntry; |
| +typedef imui::InMemoryURLIndexCacheItem_WordStartsMapItem WordStartsMapItem; |
| +typedef imui::InMemoryURLIndexCacheItem_WordStartsMapItem_WordStartsMapEntry |
| + WordStartsMapEntry; |
| // The maximum score any candidate result can achieve. |
| const int kMaxTotalScore = 1425; |
| @@ -115,7 +118,9 @@ |
| // InMemoryURLIndex's Private Data --------------------------------------------- |
| URLIndexPrivateData::URLIndexPrivateData() |
| - : pre_filter_item_count_(0), |
| + : restored_cache_version_(0), |
| + saved_cache_version_(kCurrentCacheFileVersion), |
| + pre_filter_item_count_(0), |
| post_filter_item_count_(0), |
| post_scoring_item_count_(0) { |
| URLIndexPrivateData::InitializeSchemeWhitelist(&scheme_whitelist_); |
| @@ -131,6 +136,7 @@ |
| word_id_history_map_.clear(); |
| history_id_word_map_.clear(); |
| history_info_map_.clear(); |
| + word_starts_map_.clear(); |
| } |
| // Cache Updating -------------------------------------------------------------- |
| @@ -161,11 +167,14 @@ |
| history_info_map_[history_id] = new_row; |
| // Index the words contained in the URL and title of the row. |
| - AddRowWordsToIndex(new_row); |
| + WordStarts word_starts; |
| + AddRowWordsToIndex(new_row, &word_starts); |
| + word_starts_map_[history_id] = word_starts; |
| return true; |
| } |
| -void URLIndexPrivateData::AddRowWordsToIndex(const URLRow& row) { |
| +void URLIndexPrivateData::AddRowWordsToIndex(const URLRow& row, |
| + WordStarts* word_starts) { |
| HistoryID history_id = static_cast<HistoryID>(row.id()); |
| // Split URL into individual, unique words then add in the title words. |
| const GURL& gurl(row.url()); |
| @@ -174,8 +183,10 @@ |
| net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS, |
| NULL, NULL, NULL)); |
| url = base::i18n::ToLower(url); |
| - String16Set url_words = String16SetFromString16(url); |
| - String16Set title_words = String16SetFromString16(row.title()); |
| + String16Set url_words = String16SetFromString16(url, |
| + word_starts ? &word_starts->url_word_starts_ : NULL); |
| + String16Set title_words = String16SetFromString16(row.title(), |
| + word_starts ? &word_starts->title_word_starts_ : NULL); |
| String16Set words; |
| std::set_union(url_words.begin(), url_words.end(), |
| title_words.begin(), title_words.end(), |
| @@ -246,6 +257,7 @@ |
| RemoveRowWordsFromIndex(row); |
| HistoryID history_id = static_cast<HistoryID>(row.id()); |
| history_info_map_.erase(history_id); |
| + word_starts_map_.erase(history_id); |
| } |
| void URLIndexPrivateData::RemoveRowWordsFromIndex(const URLRow& row) { |
| @@ -328,7 +340,9 @@ |
| // URL and title. |
| RemoveRowWordsFromIndex(row_to_update); |
| row_to_update.set_title(row.title()); |
| - AddRowWordsToIndex(row_to_update); |
| + WordStarts word_starts; |
| + AddRowWordsToIndex(row_to_update, &word_starts); |
| + word_starts_map_[row_id] = word_starts; |
| } |
| row_was_updated = true; |
| } |
| @@ -424,7 +438,7 @@ |
| // search string. When the user types "colspec=ID%20Mstone Release" we get |
| // four 'words': "colspec", "id", "mstone" and "release". |
| String16Vector lower_words( |
| - history::String16VectorFromString16(lower_unescaped_string, false)); |
| + history::String16VectorFromString16(lower_unescaped_string, false, NULL)); |
| ScoredHistoryMatches scored_items; |
| // Do nothing if we have indexed no words (probably because we've not been |
| @@ -536,8 +550,12 @@ |
| // deleted by the user or the item no longer qualifies as a quick result. |
| if (hist_pos != private_data_.history_info_map_.end()) { |
| const URLRow& hist_item = hist_pos->second; |
| - ScoredHistoryMatch match( |
| - ScoredMatchForURL(hist_item, lower_string_, lower_terms_)); |
| + WordStartsMap::const_iterator starts_pos = |
| + private_data_.word_starts_map_.find(history_id); |
| + DCHECK(starts_pos != private_data_.word_starts_map_.end()); |
| + ScoredHistoryMatch match(ScoredMatchForURL(hist_item, lower_string_, |
| + lower_terms_, |
| + starts_pos->second)); |
| if (match.raw_score > 0) |
| scored_matches_.push_back(match); |
| } |
| @@ -548,7 +566,8 @@ |
| ScoredHistoryMatch URLIndexPrivateData::ScoredMatchForURL( |
| const URLRow& row, |
| const string16& lower_string, |
| - const String16Vector& terms) { |
| + const String16Vector& terms, |
| + const WordStarts& word_starts) { |
| ScoredHistoryMatch match(row); |
| GURL gurl = row.url(); |
| if (!gurl.is_valid()) |
| @@ -664,14 +683,13 @@ |
| // Score component for how early in the match string the first search term |
| // appears. Start with kStartMaxValue points and discount by |
| - // kStartMaxValue/kMaxSignificantStart points for each character later than |
| + // kStartMaxValue/kMaxSignificantChars points for each character later than |
| // the first at which the term begins. No points are earned if the start of |
| - // the match occurs at or after kMaxSignificantStart. |
| - const size_t kMaxSignificantStart = 50; |
| + // the match occurs at or after kMaxSignificantChars. |
| const int kStartMaxValue = 1000; |
| - int start_value = (kMaxSignificantStart - |
| - std::min(kMaxSignificantStart, matches[0].offset)) * kStartMaxValue / |
| - kMaxSignificantStart; |
| + int start_value = (kMaxSignificantChars - |
| + std::min(kMaxSignificantChars, matches[0].offset)) * kStartMaxValue / |
| + kMaxSignificantChars; |
| // Score component for how much of the matched string the input terms cover. |
| // kCompleteMaxValue points times the fraction of the URL/page title string |
| @@ -928,6 +946,7 @@ |
| InMemoryURLIndexCacheItem* cache) const { |
| DCHECK(cache); |
| cache->set_timestamp(base::Time::Now().ToInternalValue()); |
| + cache->set_version(saved_cache_version_); |
| // history_item_count_ is no longer used but rather than change the protobuf |
| // definition use a placeholder. This will go away with the switch to SQLite. |
| cache->set_history_item_count(0); |
| @@ -936,6 +955,7 @@ |
| SaveCharWordMap(cache); |
| SaveWordIDHistoryMap(cache); |
| SaveHistoryInfoMap(cache); |
| + SaveWordStartsMap(cache); |
| } |
| void URLIndexPrivateData::SaveWordList(InMemoryURLIndexCacheItem* cache) const { |
| @@ -1020,6 +1040,33 @@ |
| } |
| } |
| +void URLIndexPrivateData::SaveWordStartsMap( |
| + InMemoryURLIndexCacheItem* cache) const { |
| + if (word_starts_map_.empty()) |
| + return; |
| + // For unit testing: Enable saving of the cache as an earlier version to |
| + // allow testing of cache file upgrading in ReadFromFile(). |
|
Peter Kasting
2012/03/09 02:37:43
Nit: Is this the best way of doing this? For most
mrossetti
2012/03/14 23:23:49
I hear you, and understand. Since saving using pro
|
| + if (saved_cache_version_ < 1) |
| + return; |
| + |
| + WordStartsMapItem* map_item = cache->mutable_word_starts_map(); |
| + map_item->set_item_count(word_starts_map_.size()); |
| + for (WordStartsMap::const_iterator iter = word_starts_map_.begin(); |
| + iter != word_starts_map_.end(); ++iter) { |
| + WordStartsMapEntry* map_entry = map_item->add_word_starts_map_entry(); |
| + map_entry->set_history_id(iter->first); |
| + const WordStarts& word_starts(iter->second); |
| + for (std::vector<int>::const_iterator siter = |
| + word_starts.url_word_starts_.begin(); |
| + siter != word_starts.url_word_starts_.end(); ++siter) |
| + map_entry->add_url_word_starts(*siter); |
| + for (std::vector<int>::const_iterator siter = |
| + word_starts.title_word_starts_.begin(); |
| + siter != word_starts.title_word_starts_.end(); ++siter) |
| + map_entry->add_title_word_starts(*siter); |
| + } |
| +} |
| + |
| // Cache Restoring ------------------------------------------------------------- |
| bool URLIndexPrivateData::RestoreFromFile(const FilePath& file_path) { |
| @@ -1090,9 +1137,11 @@ |
| bool URLIndexPrivateData::RestorePrivateData( |
| const InMemoryURLIndexCacheItem& cache) { |
| + if (cache.has_version()) |
| + restored_cache_version_ = cache.version(); |
| return RestoreWordList(cache) && RestoreWordMap(cache) && |
| RestoreCharWordMap(cache) && RestoreWordIDHistoryMap(cache) && |
| - RestoreHistoryInfoMap(cache); |
| + RestoreHistoryInfoMap(cache) && RestoreWordStartsMap(cache); |
| } |
| bool URLIndexPrivateData::RestoreWordList( |
| @@ -1213,4 +1262,54 @@ |
| return true; |
| } |
| +bool URLIndexPrivateData::RestoreWordStartsMap( |
| + const InMemoryURLIndexCacheItem& cache) { |
| + // Note that this function must be called after RestoreHistoryInfoMap() has |
| + // been run as the word starts may have to be recalculated from the urls and |
| + // page titles. |
| + if (cache.has_word_starts_map()) { |
| + const WordStartsMapItem& list_item(cache.word_starts_map()); |
| + uint32 expected_item_count = list_item.item_count(); |
| + uint32 actual_item_count = list_item.word_starts_map_entry_size(); |
| + if (actual_item_count == 0 || actual_item_count != expected_item_count) |
| + return false; |
| + const RepeatedPtrField<WordStartsMapEntry>& |
| + entries(list_item.word_starts_map_entry()); |
| + for (RepeatedPtrField<WordStartsMapEntry>::const_iterator iter = |
| + entries.begin(); iter != entries.end(); ++iter) { |
| + HistoryID history_id = iter->history_id(); |
| + WordStarts word_starts; |
| + // Restore the URL word starts. |
| + const RepeatedField<int32>& url_starts(iter->url_word_starts()); |
| + for (RepeatedField<int32>::const_iterator jiter = url_starts.begin(); |
|
Peter Kasting
2012/03/09 02:37:43
Nit: Argh... use |i| and |j| rather than |iter| an
|
| + jiter != url_starts.end(); ++jiter) |
| + word_starts.url_word_starts_.push_back(*jiter); |
| + // Restore the page title word starts. |
| + const RepeatedField<int32>& title_starts(iter->title_word_starts()); |
| + for (RepeatedField<int32>::const_iterator jiter = title_starts.begin(); |
| + jiter != title_starts.end(); ++jiter) |
| + word_starts.title_word_starts_.push_back(*jiter); |
| + word_starts_map_[history_id] = word_starts; |
| + } |
| + } else { |
| + // Since the cache did not contain any word starts we must rebuild then from |
| + // the URL and page titles. |
| + for (HistoryInfoMap::const_iterator iter = history_info_map_.begin(); |
| + iter != history_info_map_.end(); ++iter) { |
| + WordStarts word_starts; |
| + const URLRow& row(iter->second); |
| + string16 url(net::FormatUrl(row.url(), languages_, |
| + net::kFormatUrlOmitUsernamePassword, |
| + net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS, |
| + NULL, NULL, NULL)); |
| + url = base::i18n::ToLower(url); |
| + String16VectorFromString16(url, false, &word_starts.url_word_starts_); |
| + String16VectorFromString16( |
| + row.title(), false, &word_starts.title_word_starts_); |
| + word_starts_map_[iter->first] = word_starts; |
| + } |
| + } |
| + return true; |
| +} |
| + |
| } // namespace history |