chrome/browser/history/url_index_private_data.cc - Issue 963823003: Move InMemoryURLIndex into chrome/browser/autocomplete

Unified Diff: chrome/browser/history/url_index_private_data.cc

Issue 963823003: Move InMemoryURLIndex into chrome/browser/autocomplete (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@shortcut-database

Patch Set: Fixing win_chromium_x64_rel_ng build Created 5 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: chrome/browser/history/url_index_private_data.cc

diff --git a/chrome/browser/history/url_index_private_data.cc b/chrome/browser/history/url_index_private_data.cc

deleted file mode 100644

index 88ef8e52a2493eb469b0a3f518ac401ff257b15c..0000000000000000000000000000000000000000

--- a/chrome/browser/history/url_index_private_data.cc

+++ /dev/null

@@ -1,1350 +0,0 @@

-// Use of this source code is governed by a BSD-style license that can be

-// found in the LICENSE file.

-#include "chrome/browser/history/url_index_private_data.h"

-#include <functional>

-#include <iterator>

-#include <limits>

-#include <numeric>

-#include <string>

-#include <vector>

-#include "base/basictypes.h"

-#include "base/files/file_util.h"

-#include "base/i18n/break_iterator.h"

-#include "base/i18n/case_conversion.h"

-#include "base/metrics/histogram.h"

-#include "base/strings/string_util.h"

-#include "base/strings/utf_string_conversions.h"

-#include "base/time/time.h"

-#include "chrome/browser/history/history_service.h"

-#include "chrome/browser/history/in_memory_url_index.h"

-#include "components/bookmarks/browser/bookmark_utils.h"

-#include "components/history/core/browser/history_database.h"

-#include "components/history/core/browser/history_db_task.h"

-#include "net/base/net_util.h"

-#if defined(USE_SYSTEM_PROTOBUF)

-#include <google/protobuf/repeated_field.h>

-#else

-#include "third_party/protobuf/src/google/protobuf/repeated_field.h"

-#endif

-using google::protobuf::RepeatedField;

-using google::protobuf::RepeatedPtrField;

-using in_memory_url_index::InMemoryURLIndexCacheItem;

-namespace {

-static const size_t kMaxVisitsToStoreInCache = 10u;

-} // anonymous namespace

-namespace history {

-typedef imui::InMemoryURLIndexCacheItem_WordListItem WordListItem;

-typedef imui::InMemoryURLIndexCacheItem_WordMapItem_WordMapEntry WordMapEntry;

-typedef imui::InMemoryURLIndexCacheItem_WordMapItem WordMapItem;

-typedef imui::InMemoryURLIndexCacheItem_CharWordMapItem CharWordMapItem;

-typedef imui::InMemoryURLIndexCacheItem_CharWordMapItem_CharWordMapEntry

- CharWordMapEntry;

-typedef imui::InMemoryURLIndexCacheItem_WordIDHistoryMapItem

- WordIDHistoryMapItem;

-typedef imui::

- InMemoryURLIndexCacheItem_WordIDHistoryMapItem_WordIDHistoryMapEntry

- WordIDHistoryMapEntry;

-typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem HistoryInfoMapItem;

-typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem_HistoryInfoMapEntry

- HistoryInfoMapEntry;

-typedef imui::

- InMemoryURLIndexCacheItem_HistoryInfoMapItem_HistoryInfoMapEntry_VisitInfo

- HistoryInfoMapEntry_VisitInfo;

-typedef imui::InMemoryURLIndexCacheItem_WordStartsMapItem WordStartsMapItem;

-typedef imui::InMemoryURLIndexCacheItem_WordStartsMapItem_WordStartsMapEntry

- WordStartsMapEntry;

-// Algorithm Functions ---------------------------------------------------------

-// Comparison function for sorting search terms by descending length.

-bool LengthGreater(const base::string16& string_a,

- const base::string16& string_b) {

- return string_a.length() > string_b.length();

-// UpdateRecentVisitsFromHistoryDBTask -----------------------------------------

-// HistoryDBTask used to update the recent visit data for a particular

-// row from the history database.

-class UpdateRecentVisitsFromHistoryDBTask : public HistoryDBTask {

- public:

- explicit UpdateRecentVisitsFromHistoryDBTask(

- URLIndexPrivateData* private_data,

- URLID url_id);

- bool RunOnDBThread(HistoryBackend* backend,

- history::HistoryDatabase* db) override;

- void DoneRunOnMainThread() override;

- private:

- ~UpdateRecentVisitsFromHistoryDBTask() override;

- // The URLIndexPrivateData that gets updated after the historyDB

- // task returns.

- URLIndexPrivateData* private_data_;

- // The ID of the URL to get visits for and then update.

- URLID url_id_;

- // Whether fetching the recent visits for the URL succeeded.

- bool succeeded_;

- // The awaited data that's shown to private_data_ for it to copy and

- // store.

- VisitVector recent_visits_;

- DISALLOW_COPY_AND_ASSIGN(UpdateRecentVisitsFromHistoryDBTask);

-};

-UpdateRecentVisitsFromHistoryDBTask::UpdateRecentVisitsFromHistoryDBTask(

- URLIndexPrivateData* private_data,

- URLID url_id)

- : private_data_(private_data),

- url_id_(url_id),

- succeeded_(false) {

-bool UpdateRecentVisitsFromHistoryDBTask::RunOnDBThread(

- HistoryBackend* backend,

- HistoryDatabase* db) {

- // Make sure the private data is going to get as many recent visits as

- // ScoredHistoryMatch::GetFrequency() hopes to use.

- DCHECK_GE(kMaxVisitsToStoreInCache, ScoredHistoryMatch::kMaxVisitsToScore);

- succeeded_ = db->GetMostRecentVisitsForURL(url_id_,

- kMaxVisitsToStoreInCache,

- &recent_visits_);

- if (!succeeded_)

- recent_visits_.clear();

- return true; // Always claim to be done; do not retry failures.

-void UpdateRecentVisitsFromHistoryDBTask::DoneRunOnMainThread() {

- if (succeeded_)

- private_data_->UpdateRecentVisits(url_id_, recent_visits_);

-UpdateRecentVisitsFromHistoryDBTask::~UpdateRecentVisitsFromHistoryDBTask() {

-// URLIndexPrivateData ---------------------------------------------------------

-URLIndexPrivateData::URLIndexPrivateData()

- : restored_cache_version_(0),

- saved_cache_version_(kCurrentCacheFileVersion),

- pre_filter_item_count_(0),

- post_filter_item_count_(0),

- post_scoring_item_count_(0) {

-ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms(

- base::string16 search_string,

- size_t cursor_position,

- size_t max_matches,

- const std::string& languages,

- const history::ScoredHistoryMatch::Builder& builder) {

- // If cursor position is set and useful (not at either end of the

- // string), allow the search string to be broken at cursor position.

- // We do this by pretending there's a space where the cursor is.

- if ((cursor_position != base::string16::npos) &&

- (cursor_position < search_string.length()) &&

- (cursor_position > 0)) {

- search_string.insert(cursor_position, base::ASCIIToUTF16(" "));

- }

- pre_filter_item_count_ = 0;

- post_filter_item_count_ = 0;

- post_scoring_item_count_ = 0;

- // The search string we receive may contain escaped characters. For reducing

- // the index we need individual, lower-cased words, ignoring escapings. For

- // the final filtering we need whitespace separated substrings possibly

- // containing escaped characters.

- base::string16 lower_raw_string(base::i18n::ToLower(search_string));

- base::string16 lower_unescaped_string =

- net::UnescapeURLComponent(lower_raw_string,

- net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS);

- // Extract individual 'words' (as opposed to 'terms'; see below) from the

- // search string. When the user types "colspec=ID%20Mstone Release" we get

- // four 'words': "colspec", "id", "mstone" and "release".

- String16Vector lower_words(

- history::String16VectorFromString16(lower_unescaped_string, false, NULL));

- ScoredHistoryMatches scored_items;

- // Do nothing if we have indexed no words (probably because we've not been

- // initialized yet) or the search string has no words.

- if (word_list_.empty() || lower_words.empty()) {

- search_term_cache_.clear(); // Invalidate the term cache.

- return scored_items;

- }

- // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep

- // approach.

- ResetSearchTermCache();

- HistoryIDSet history_id_set = HistoryIDSetFromWords(lower_words);

- // Trim the candidate pool if it is large. Note that we do not filter out

- // items that do not contain the search terms as proper substrings -- doing

- // so is the performance-costly operation we are trying to avoid in order

- // to maintain omnibox responsiveness.

- const size_t kItemsToScoreLimit = 500;

- pre_filter_item_count_ = history_id_set.size();

- // If we trim the results set we do not want to cache the results for next

- // time as the user's ultimately desired result could easily be eliminated

- // in this early rough filter.

- bool was_trimmed = (pre_filter_item_count_ > kItemsToScoreLimit);

- if (was_trimmed) {

- HistoryIDVector history_ids;

- std::copy(history_id_set.begin(), history_id_set.end(),

- std::back_inserter(history_ids));

- // Trim down the set by sorting by typed-count, visit-count, and last

- // visit.

- HistoryItemFactorGreater

- item_factor_functor(history_info_map_);

- std::partial_sort(history_ids.begin(),

- history_ids.begin() + kItemsToScoreLimit,

- history_ids.end(),

- item_factor_functor);

- history_id_set.clear();

- std::copy(history_ids.begin(), history_ids.begin() + kItemsToScoreLimit,

- std::inserter(history_id_set, history_id_set.end()));

- post_filter_item_count_ = history_id_set.size();

- }

- // Pass over all of the candidates filtering out any without a proper

- // substring match, inserting those which pass in order by score. Note that

- // in this step we are using the raw search string complete with escaped

- // URL elements. When the user has specifically typed something akin to

- // "sort=pri&colspec=ID%20Mstone%20Release" we want to make sure that that

- // specific substring appears in the URL or page title.

- // We call these 'terms' (as opposed to 'words'; see above) as in this case

- // we only want to break up the search string on 'true' whitespace rather than

- // escaped whitespace. When the user types "colspec=ID%20Mstone Release" we

- // get two 'terms': "colspec=id%20mstone" and "release".

- history::String16Vector lower_raw_terms;

- if (Tokenize(lower_raw_string, base::kWhitespaceUTF16,

- &lower_raw_terms) == 0) {

- // Don't score matches when there are no terms to score against. (It's

- // possible that the word break iterater that extracts words to search

- // for in the database allows some whitespace "words" whereas Tokenize

- // excludes a long list of whitespace.) One could write a scoring

- // function that gives a reasonable order to matches when there

- // are no terms (i.e., all the words are some form of whitespace),

- // but this is such a rare edge case that it's not worth the time.

- return scored_items;

- }

- scored_items =

- std::for_each(

- history_id_set.begin(), history_id_set.end(),

- AddHistoryMatch(*this, languages, lower_raw_string, lower_raw_terms,

- base::Time::Now(), builder)).ScoredMatches();

- // Select and sort only the top |max_matches| results.

- if (scored_items.size() > max_matches) {

- std::partial_sort(scored_items.begin(),

- scored_items.begin() +

- max_matches,

- scored_items.end(),

- ScoredHistoryMatch::MatchScoreGreater);

- scored_items.resize(max_matches);

- } else {

- std::sort(scored_items.begin(), scored_items.end(),

- ScoredHistoryMatch::MatchScoreGreater);

- }

- post_scoring_item_count_ = scored_items.size();

- if (was_trimmed) {

- search_term_cache_.clear(); // Invalidate the term cache.

- } else {

- // Remove any stale SearchTermCacheItems.

- for (SearchTermCacheMap::iterator cache_iter = search_term_cache_.begin();

- cache_iter != search_term_cache_.end(); ) {

- if (!cache_iter->second.used_)

- search_term_cache_.erase(cache_iter++);

- else

- ++cache_iter;

- }

- return scored_items;

-bool URLIndexPrivateData::UpdateURL(

- HistoryService* history_service,

- const URLRow& row,

- const std::string& languages,

- const std::set<std::string>& scheme_whitelist,

- base::CancelableTaskTracker* tracker) {

- // The row may or may not already be in our index. If it is not already

- // indexed and it qualifies then it gets indexed. If it is already

- // indexed and still qualifies then it gets updated, otherwise it

- // is deleted from the index.

- bool row_was_updated = false;

- URLID row_id = row.id();

- HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id);

- if (row_pos == history_info_map_.end()) {

- // This new row should be indexed if it qualifies.

- URLRow new_row(row);

- new_row.set_id(row_id);

- row_was_updated = RowQualifiesAsSignificant(new_row, base::Time()) &&

- IndexRow(NULL,

- history_service,

- new_row,

- languages,

- scheme_whitelist,

- tracker);

- } else if (RowQualifiesAsSignificant(row, base::Time())) {

- // This indexed row still qualifies and will be re-indexed.

- // The url won't have changed but the title, visit count, etc.

- // might have changed.

- URLRow& row_to_update = row_pos->second.url_row;

- bool title_updated = row_to_update.title() != row.title();

- if (row_to_update.visit_count() != row.visit_count() ||

- row_to_update.typed_count() != row.typed_count() ||

- row_to_update.last_visit() != row.last_visit() || title_updated) {

- row_to_update.set_visit_count(row.visit_count());

- row_to_update.set_typed_count(row.typed_count());

- row_to_update.set_last_visit(row.last_visit());

- // If something appears to have changed, update the recent visits

- // information.

- ScheduleUpdateRecentVisits(history_service, row_id, tracker);

- // While the URL is guaranteed to remain stable, the title may have

- // changed. If so, then update the index with the changed words.

- if (title_updated) {

- // Clear all words associated with this row and re-index both the

- // URL and title.

- RemoveRowWordsFromIndex(row_to_update);

- row_to_update.set_title(row.title());

- RowWordStarts word_starts;

- AddRowWordsToIndex(row_to_update, &word_starts, languages);

- word_starts_map_[row_id] = word_starts;

- }

- row_was_updated = true;

- }

- } else {

- // This indexed row no longer qualifies and will be de-indexed by

- // clearing all words associated with this row.

- RemoveRowFromIndex(row);

- row_was_updated = true;

- }

- if (row_was_updated)

- search_term_cache_.clear(); // This invalidates the cache.

- return row_was_updated;

-void URLIndexPrivateData::UpdateRecentVisits(

- URLID url_id,

- const VisitVector& recent_visits) {

- HistoryInfoMap::iterator row_pos = history_info_map_.find(url_id);

- if (row_pos != history_info_map_.end()) {

- VisitInfoVector* visits = &row_pos->second.visits;

- visits->clear();

- const size_t size =

- std::min(recent_visits.size(), kMaxVisitsToStoreInCache);

- visits->reserve(size);

- for (size_t i = 0; i < size; i++) {

- // Copy from the VisitVector the only fields visits needs.

- visits->push_back(std::make_pair(recent_visits[i].visit_time,

- recent_visits[i].transition));

- }

- // Else: Oddly, the URL doesn't seem to exist in the private index.

- // Ignore this update. This can happen if, for instance, the user

- // removes the URL from URLIndexPrivateData before the historyDB call

- // returns.

-void URLIndexPrivateData::ScheduleUpdateRecentVisits(

- HistoryService* history_service,

- URLID url_id,

- base::CancelableTaskTracker* tracker) {

- history_service->ScheduleDBTask(

- scoped_ptr<history::HistoryDBTask>(

- new UpdateRecentVisitsFromHistoryDBTask(this, url_id)), tracker);

-// Helper functor for DeleteURL.

-class HistoryInfoMapItemHasURL {

- public:

- explicit HistoryInfoMapItemHasURL(const GURL& url): url_(url) {}

- bool operator()(const std::pair<const HistoryID, HistoryInfoMapValue>& item) {

- return item.second.url_row.url() == url_;

- }

- private:

- const GURL& url_;

-};

-bool URLIndexPrivateData::DeleteURL(const GURL& url) {

- // Find the matching entry in the history_info_map_.

- HistoryInfoMap::iterator pos = std::find_if(

- history_info_map_.begin(),

- history_info_map_.end(),

- HistoryInfoMapItemHasURL(url));

- if (pos == history_info_map_.end())

- return false;

- RemoveRowFromIndex(pos->second.url_row);

- search_term_cache_.clear(); // This invalidates the cache.

- return true;

-// static

-scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::RestoreFromFile(

- const base::FilePath& file_path,

- const std::string& languages) {

- base::TimeTicks beginning_time = base::TimeTicks::Now();

- if (!base::PathExists(file_path))

- return NULL;

- std::string data;

- // If there is no cache file then simply give up. This will cause us to

- // attempt to rebuild from the history database.

- if (!base::ReadFileToString(file_path, &data))

- return NULL;

- scoped_refptr<URLIndexPrivateData> restored_data(new URLIndexPrivateData);

- InMemoryURLIndexCacheItem index_cache;

- if (!index_cache.ParseFromArray(data.c_str(), data.size())) {

- LOG(WARNING) << "Failed to parse URLIndexPrivateData cache data read from "

- << file_path.value();

- return restored_data;

- }

- if (!restored_data->RestorePrivateData(index_cache, languages))

- return NULL;

- UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime",

- base::TimeTicks::Now() - beginning_time);

- UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems",

- restored_data->history_id_word_map_.size());

- UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size());

- UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords",

- restored_data->word_map_.size());

- UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars",

- restored_data->char_word_map_.size());

- if (restored_data->Empty())

- return NULL; // 'No data' is the same as a failed reload.

- return restored_data;

-// static

-scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::RebuildFromHistory(

- HistoryDatabase* history_db,

- const std::string& languages,

- const std::set<std::string>& scheme_whitelist) {

- if (!history_db)

- return NULL;

- base::TimeTicks beginning_time = base::TimeTicks::Now();

- scoped_refptr<URLIndexPrivateData>

- rebuilt_data(new URLIndexPrivateData);

- URLDatabase::URLEnumerator history_enum;

- if (!history_db->InitURLEnumeratorForSignificant(&history_enum))

- return NULL;

- rebuilt_data->last_time_rebuilt_from_history_ = base::Time::Now();

- for (URLRow row; history_enum.GetNextURL(&row); ) {

- rebuilt_data->IndexRow(

- history_db, NULL, row, languages, scheme_whitelist, NULL);

- }

- UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime",

- base::TimeTicks::Now() - beginning_time);

- UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems",

- rebuilt_data->history_id_word_map_.size());

- UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords",

- rebuilt_data->word_map_.size());

- UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars",

- rebuilt_data->char_word_map_.size());

- return rebuilt_data;

-// static

-bool URLIndexPrivateData::WritePrivateDataToCacheFileTask(

- scoped_refptr<URLIndexPrivateData> private_data,

- const base::FilePath& file_path) {

- DCHECK(private_data.get());

- DCHECK(!file_path.empty());

- return private_data->SaveToFile(file_path);

-scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::Duplicate() const {

- scoped_refptr<URLIndexPrivateData> data_copy = new URLIndexPrivateData;

- data_copy->last_time_rebuilt_from_history_ = last_time_rebuilt_from_history_;

- data_copy->word_list_ = word_list_;

- data_copy->available_words_ = available_words_;

- data_copy->word_map_ = word_map_;

- data_copy->char_word_map_ = char_word_map_;

- data_copy->word_id_history_map_ = word_id_history_map_;

- data_copy->history_id_word_map_ = history_id_word_map_;

- data_copy->history_info_map_ = history_info_map_;

- data_copy->word_starts_map_ = word_starts_map_;

- return data_copy;

- // Not copied:

- // search_term_cache_

- // pre_filter_item_count_

- // post_filter_item_count_

- // post_scoring_item_count_

-bool URLIndexPrivateData::Empty() const {

- return history_info_map_.empty();

-void URLIndexPrivateData::Clear() {

- last_time_rebuilt_from_history_ = base::Time();

- word_list_.clear();

- available_words_.clear();

- word_map_.clear();

- char_word_map_.clear();

- word_id_history_map_.clear();

- history_id_word_map_.clear();

- history_info_map_.clear();

- word_starts_map_.clear();

-URLIndexPrivateData::~URLIndexPrivateData() {}

-HistoryIDSet URLIndexPrivateData::HistoryIDSetFromWords(

- const String16Vector& unsorted_words) {

- // Break the terms down into individual terms (words), get the candidate

- // set for each term, and intersect each to get a final candidate list.

- // Note that a single 'term' from the user's perspective might be

- // a string like "http://www.somewebsite.com" which, from our perspective,

- // is four words: 'http', 'www', 'somewebsite', and 'com'.

- HistoryIDSet history_id_set;

- String16Vector words(unsorted_words);

- // Sort the words into the longest first as such are likely to narrow down

- // the results quicker. Also, single character words are the most expensive

- // to process so save them for last.

- std::sort(words.begin(), words.end(), LengthGreater);

- for (String16Vector::iterator iter = words.begin(); iter != words.end();

- ++iter) {

- base::string16 uni_word = *iter;

- HistoryIDSet term_history_set = HistoryIDsForTerm(uni_word);

- if (term_history_set.empty()) {

- history_id_set.clear();

- break;

- }

- if (iter == words.begin()) {

- history_id_set.swap(term_history_set);

- } else {

- HistoryIDSet new_history_id_set = base::STLSetIntersection<HistoryIDSet>(

- history_id_set, term_history_set);

- history_id_set.swap(new_history_id_set);

- }

- return history_id_set;

-HistoryIDSet URLIndexPrivateData::HistoryIDsForTerm(

- const base::string16& term) {

- if (term.empty())

- return HistoryIDSet();

- // TODO(mrossetti): Consider optimizing for very common terms such as

- // 'http[s]', 'www', 'com', etc. Or collect the top 100 more frequently

- // occuring words in the user's searches.

- size_t term_length = term.length();

- WordIDSet word_id_set;

- if (term_length > 1) {

- // See if this term or a prefix thereof is present in the cache.

- SearchTermCacheMap::iterator best_prefix(search_term_cache_.end());

- for (SearchTermCacheMap::iterator cache_iter = search_term_cache_.begin();

- cache_iter != search_term_cache_.end(); ++cache_iter) {

- if (StartsWith(term, cache_iter->first, false) &&

- (best_prefix == search_term_cache_.end() ||

- cache_iter->first.length() > best_prefix->first.length()))

- best_prefix = cache_iter;

- }

- // If a prefix was found then determine the leftover characters to be used

- // for further refining the results from that prefix.

- Char16Set prefix_chars;

- base::string16 leftovers(term);

- if (best_prefix != search_term_cache_.end()) {

- // If the prefix is an exact match for the term then grab the cached

- // results and we're done.

- size_t prefix_length = best_prefix->first.length();

- if (prefix_length == term_length) {

- best_prefix->second.used_ = true;

- return best_prefix->second.history_id_set_;

- }

- // Otherwise we have a handy starting point.

- // If there are no history results for this prefix then we can bail early

- // as there will be no history results for the full term.

- if (best_prefix->second.history_id_set_.empty()) {

- search_term_cache_[term] = SearchTermCacheItem();

- return HistoryIDSet();

- }

- word_id_set = best_prefix->second.word_id_set_;

- prefix_chars = Char16SetFromString16(best_prefix->first);

- leftovers = term.substr(prefix_length);

- }

- // Filter for each remaining, unique character in the term.

- Char16Set leftover_chars = Char16SetFromString16(leftovers);

- Char16Set unique_chars =

- base::STLSetDifference<Char16Set>(leftover_chars, prefix_chars);

- // Reduce the word set with any leftover, unprocessed characters.

- if (!unique_chars.empty()) {

- WordIDSet leftover_set(WordIDSetForTermChars(unique_chars));

- // We might come up empty on the leftovers.

- if (leftover_set.empty()) {

- search_term_cache_[term] = SearchTermCacheItem();

- return HistoryIDSet();

- }

- // Or there may not have been a prefix from which to start.

- if (prefix_chars.empty()) {

- word_id_set.swap(leftover_set);

- } else {

- WordIDSet new_word_id_set = base::STLSetIntersection<WordIDSet>(

- word_id_set, leftover_set);

- word_id_set.swap(new_word_id_set);

- }

- // We must filter the word list because the resulting word set surely

- // contains words which do not have the search term as a proper subset.

- for (WordIDSet::iterator word_set_iter = word_id_set.begin();

- word_set_iter != word_id_set.end(); ) {

- if (word_list_[*word_set_iter].find(term) == base::string16::npos)

- word_id_set.erase(word_set_iter++);

- else

- ++word_set_iter;

- }

- } else {

- word_id_set = WordIDSetForTermChars(Char16SetFromString16(term));

- }

- // If any words resulted then we can compose a set of history IDs by unioning

- // the sets from each word.

- HistoryIDSet history_id_set;

- if (!word_id_set.empty()) {

- for (WordIDSet::iterator word_id_iter = word_id_set.begin();

- word_id_iter != word_id_set.end(); ++word_id_iter) {

- WordID word_id = *word_id_iter;

- WordIDHistoryMap::iterator word_iter = word_id_history_map_.find(word_id);

- if (word_iter != word_id_history_map_.end()) {

- HistoryIDSet& word_history_id_set(word_iter->second);

- history_id_set.insert(word_history_id_set.begin(),

- word_history_id_set.end());

- }

- // Record a new cache entry for this word if the term is longer than

- // a single character.

- if (term_length > 1)

- search_term_cache_[term] = SearchTermCacheItem(word_id_set, history_id_set);

- return history_id_set;

-WordIDSet URLIndexPrivateData::WordIDSetForTermChars(

- const Char16Set& term_chars) {

- WordIDSet word_id_set;

- for (Char16Set::const_iterator c_iter = term_chars.begin();

- c_iter != term_chars.end(); ++c_iter) {

- CharWordIDMap::iterator char_iter = char_word_map_.find(*c_iter);

- if (char_iter == char_word_map_.end()) {

- // A character was not found so there are no matching results: bail.

- word_id_set.clear();

- break;

- }

- WordIDSet& char_word_id_set(char_iter->second);

- // It is possible for there to no longer be any words associated with

- // a particular character. Give up in that case.

- if (char_word_id_set.empty()) {

- word_id_set.clear();

- break;

- }

- if (c_iter == term_chars.begin()) {

- // First character results becomes base set of results.

- word_id_set = char_word_id_set;

- } else {

- // Subsequent character results get intersected in.

- WordIDSet new_word_id_set = base::STLSetIntersection<WordIDSet>(

- word_id_set, char_word_id_set);

- word_id_set.swap(new_word_id_set);

- }

- return word_id_set;

-bool URLIndexPrivateData::IndexRow(

- HistoryDatabase* history_db,

- HistoryService* history_service,

- const URLRow& row,

- const std::string& languages,

- const std::set<std::string>& scheme_whitelist,

- base::CancelableTaskTracker* tracker) {

- const GURL& gurl(row.url());

- // Index only URLs with a whitelisted scheme.

- if (!URLSchemeIsWhitelisted(gurl, scheme_whitelist))

- return false;

- URLID row_id = row.id();

- // Strip out username and password before saving and indexing.

- base::string16 url(net::FormatUrl(gurl, languages,

- net::kFormatUrlOmitUsernamePassword,

- net::UnescapeRule::NONE,

- NULL, NULL, NULL));

- HistoryID history_id = static_cast<HistoryID>(row_id);

- DCHECK_LT(history_id, std::numeric_limits<HistoryID>::max());

- // Add the row for quick lookup in the history info store.

- URLRow new_row(GURL(url), row_id);

- new_row.set_visit_count(row.visit_count());

- new_row.set_typed_count(row.typed_count());

- new_row.set_last_visit(row.last_visit());

- new_row.set_title(row.title());

- history_info_map_[history_id].url_row = new_row;

- // Index the words contained in the URL and title of the row.

- RowWordStarts word_starts;

- AddRowWordsToIndex(new_row, &word_starts, languages);

- word_starts_map_[history_id] = word_starts;

- // Update the recent visits information or schedule the update

- // as appropriate.

- if (history_db) {

- // We'd like to check that we're on the history DB thread.

- // However, unittest code actually calls this on the UI thread.

- // So we don't do any thread checks.

- VisitVector recent_visits;

- // Make sure the private data is going to get as many recent visits as

- // ScoredHistoryMatch::GetFrequency() hopes to use.

- DCHECK_GE(kMaxVisitsToStoreInCache, ScoredHistoryMatch::kMaxVisitsToScore);

- if (history_db->GetMostRecentVisitsForURL(row_id,

- kMaxVisitsToStoreInCache,

- &recent_visits))

- UpdateRecentVisits(row_id, recent_visits);

- } else {

- DCHECK(tracker);

- DCHECK(history_service);

- ScheduleUpdateRecentVisits(history_service, row_id, tracker);

- }

- return true;

-void URLIndexPrivateData::AddRowWordsToIndex(const URLRow& row,

- RowWordStarts* word_starts,

- const std::string& languages) {

- HistoryID history_id = static_cast<HistoryID>(row.id());

- // Split URL into individual, unique words then add in the title words.

- const GURL& gurl(row.url());

- const base::string16& url =

- bookmarks::CleanUpUrlForMatching(gurl, languages, NULL);

- String16Set url_words = String16SetFromString16(url,

- word_starts ? &word_starts->url_word_starts_ : NULL);

- const base::string16& title = bookmarks::CleanUpTitleForMatching(row.title());

- String16Set title_words = String16SetFromString16(title,

- word_starts ? &word_starts->title_word_starts_ : NULL);

- String16Set words = base::STLSetUnion<String16Set>(url_words, title_words);

- for (String16Set::iterator word_iter = words.begin();

- word_iter != words.end(); ++word_iter)

- AddWordToIndex(*word_iter, history_id);

- search_term_cache_.clear(); // Invalidate the term cache.

-void URLIndexPrivateData::AddWordToIndex(const base::string16& term,

- HistoryID history_id) {

- WordMap::iterator word_pos = word_map_.find(term);

- if (word_pos != word_map_.end())

- UpdateWordHistory(word_pos->second, history_id);

- else

- AddWordHistory(term, history_id);

-void URLIndexPrivateData::AddWordHistory(const base::string16& term,

- HistoryID history_id) {

- WordID word_id = word_list_.size();

- if (available_words_.empty()) {

- word_list_.push_back(term);

- } else {

- word_id = *(available_words_.begin());

- word_list_[word_id] = term;

- available_words_.erase(word_id);

- }

- word_map_[term] = word_id;

- HistoryIDSet history_id_set;

- history_id_set.insert(history_id);

- word_id_history_map_[word_id] = history_id_set;

- AddToHistoryIDWordMap(history_id, word_id);

- // For each character in the newly added word (i.e. a word that is not

- // already in the word index), add the word to the character index.

- Char16Set characters = Char16SetFromString16(term);

- for (Char16Set::iterator uni_char_iter = characters.begin();

- uni_char_iter != characters.end(); ++uni_char_iter) {

- base::char16 uni_char = *uni_char_iter;

- CharWordIDMap::iterator char_iter = char_word_map_.find(uni_char);

- if (char_iter != char_word_map_.end()) {

- // Update existing entry in the char/word index.

- WordIDSet& word_id_set(char_iter->second);

- word_id_set.insert(word_id);

- } else {

- // Create a new entry in the char/word index.

- WordIDSet word_id_set;

- word_id_set.insert(word_id);

- char_word_map_[uni_char] = word_id_set;

- }

-void URLIndexPrivateData::UpdateWordHistory(WordID word_id,

- HistoryID history_id) {

- WordIDHistoryMap::iterator history_pos = word_id_history_map_.find(word_id);

- DCHECK(history_pos != word_id_history_map_.end());

- HistoryIDSet& history_id_set(history_pos->second);

- history_id_set.insert(history_id);

- AddToHistoryIDWordMap(history_id, word_id);

-void URLIndexPrivateData::AddToHistoryIDWordMap(HistoryID history_id,

- WordID word_id) {

- HistoryIDWordMap::iterator iter = history_id_word_map_.find(history_id);

- if (iter != history_id_word_map_.end()) {

- WordIDSet& word_id_set(iter->second);

- word_id_set.insert(word_id);

- } else {

- WordIDSet word_id_set;

- word_id_set.insert(word_id);

- history_id_word_map_[history_id] = word_id_set;

- }

-void URLIndexPrivateData::RemoveRowFromIndex(const URLRow& row) {

- RemoveRowWordsFromIndex(row);

- HistoryID history_id = static_cast<HistoryID>(row.id());

- history_info_map_.erase(history_id);

- word_starts_map_.erase(history_id);

-void URLIndexPrivateData::RemoveRowWordsFromIndex(const URLRow& row) {

- // Remove the entries in history_id_word_map_ and word_id_history_map_ for

- // this row.

- HistoryID history_id = static_cast<HistoryID>(row.id());

- WordIDSet word_id_set = history_id_word_map_[history_id];

- history_id_word_map_.erase(history_id);

- // Reconcile any changes to word usage.

- for (WordIDSet::iterator word_id_iter = word_id_set.begin();

- word_id_iter != word_id_set.end(); ++word_id_iter) {

- WordID word_id = *word_id_iter;

- word_id_history_map_[word_id].erase(history_id);

- if (!word_id_history_map_[word_id].empty())

- continue; // The word is still in use.

- // The word is no longer in use. Reconcile any changes to character usage.

- base::string16 word = word_list_[word_id];

- Char16Set characters = Char16SetFromString16(word);

- for (Char16Set::iterator uni_char_iter = characters.begin();

- uni_char_iter != characters.end(); ++uni_char_iter) {

- base::char16 uni_char = *uni_char_iter;

- char_word_map_[uni_char].erase(word_id);

- if (char_word_map_[uni_char].empty())

- char_word_map_.erase(uni_char); // No longer in use.

- }

- // Complete the removal of references to the word.

- word_id_history_map_.erase(word_id);

- word_map_.erase(word);

- word_list_[word_id] = base::string16();

- available_words_.insert(word_id);

- }

-void URLIndexPrivateData::ResetSearchTermCache() {

- for (SearchTermCacheMap::iterator iter = search_term_cache_.begin();

- iter != search_term_cache_.end(); ++iter)

- iter->second.used_ = false;

-bool URLIndexPrivateData::SaveToFile(const base::FilePath& file_path) {

- base::TimeTicks beginning_time = base::TimeTicks::Now();

- InMemoryURLIndexCacheItem index_cache;

- SavePrivateData(&index_cache);

- std::string data;

- if (!index_cache.SerializeToString(&data)) {

- LOG(WARNING) << "Failed to serialize the InMemoryURLIndex cache.";

- return false;

- }

- int size = data.size();

- if (base::WriteFile(file_path, data.c_str(), size) != size) {

- LOG(WARNING) << "Failed to write " << file_path.value();

- return false;

- }

- UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexSaveCacheTime",

- base::TimeTicks::Now() - beginning_time);

- return true;

-void URLIndexPrivateData::SavePrivateData(

- InMemoryURLIndexCacheItem* cache) const {

- DCHECK(cache);

- cache->set_last_rebuild_timestamp(

- last_time_rebuilt_from_history_.ToInternalValue());

- cache->set_version(saved_cache_version_);

- // history_item_count_ is no longer used but rather than change the protobuf

- // definition use a placeholder. This will go away with the switch to SQLite.

- cache->set_history_item_count(0);

- SaveWordList(cache);

- SaveWordMap(cache);

- SaveCharWordMap(cache);

- SaveWordIDHistoryMap(cache);

- SaveHistoryInfoMap(cache);

- SaveWordStartsMap(cache);

-void URLIndexPrivateData::SaveWordList(InMemoryURLIndexCacheItem* cache) const {

- if (word_list_.empty())

- return;

- WordListItem* list_item = cache->mutable_word_list();

- list_item->set_word_count(word_list_.size());

- for (String16Vector::const_iterator iter = word_list_.begin();

- iter != word_list_.end(); ++iter)

- list_item->add_word(base::UTF16ToUTF8(*iter));

-void URLIndexPrivateData::SaveWordMap(InMemoryURLIndexCacheItem* cache) const {

- if (word_map_.empty())

- return;

- WordMapItem* map_item = cache->mutable_word_map();

- map_item->set_item_count(word_map_.size());

- for (WordMap::const_iterator iter = word_map_.begin();

- iter != word_map_.end(); ++iter) {

- WordMapEntry* map_entry = map_item->add_word_map_entry();

- map_entry->set_word(base::UTF16ToUTF8(iter->first));

- map_entry->set_word_id(iter->second);

- }

-void URLIndexPrivateData::SaveCharWordMap(

- InMemoryURLIndexCacheItem* cache) const {

- if (char_word_map_.empty())

- return;

- CharWordMapItem* map_item = cache->mutable_char_word_map();

- map_item->set_item_count(char_word_map_.size());

- for (CharWordIDMap::const_iterator iter = char_word_map_.begin();

- iter != char_word_map_.end(); ++iter) {

- CharWordMapEntry* map_entry = map_item->add_char_word_map_entry();

- map_entry->set_char_16(iter->first);

- const WordIDSet& word_id_set(iter->second);

- map_entry->set_item_count(word_id_set.size());

- for (WordIDSet::const_iterator set_iter = word_id_set.begin();

- set_iter != word_id_set.end(); ++set_iter)

- map_entry->add_word_id(*set_iter);

- }

-void URLIndexPrivateData::SaveWordIDHistoryMap(

- InMemoryURLIndexCacheItem* cache) const {

- if (word_id_history_map_.empty())

- return;

- WordIDHistoryMapItem* map_item = cache->mutable_word_id_history_map();

- map_item->set_item_count(word_id_history_map_.size());

- for (WordIDHistoryMap::const_iterator iter = word_id_history_map_.begin();

- iter != word_id_history_map_.end(); ++iter) {

- WordIDHistoryMapEntry* map_entry =

- map_item->add_word_id_history_map_entry();

- map_entry->set_word_id(iter->first);

- const HistoryIDSet& history_id_set(iter->second);

- map_entry->set_item_count(history_id_set.size());

- for (HistoryIDSet::const_iterator set_iter = history_id_set.begin();

- set_iter != history_id_set.end(); ++set_iter)

- map_entry->add_history_id(*set_iter);

- }

-void URLIndexPrivateData::SaveHistoryInfoMap(

- InMemoryURLIndexCacheItem* cache) const {

- if (history_info_map_.empty())

- return;

- HistoryInfoMapItem* map_item = cache->mutable_history_info_map();

- map_item->set_item_count(history_info_map_.size());

- for (HistoryInfoMap::const_iterator iter = history_info_map_.begin();

- iter != history_info_map_.end(); ++iter) {

- HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry();

- map_entry->set_history_id(iter->first);

- const URLRow& url_row(iter->second.url_row);

- // Note: We only save information that contributes to the index so there

- // is no need to save search_term_cache_ (not persistent).

- map_entry->set_visit_count(url_row.visit_count());

- map_entry->set_typed_count(url_row.typed_count());

- map_entry->set_last_visit(url_row.last_visit().ToInternalValue());

- map_entry->set_url(url_row.url().spec());

- map_entry->set_title(base::UTF16ToUTF8(url_row.title()));

- const VisitInfoVector& visits(iter->second.visits);

- for (VisitInfoVector::const_iterator visit_iter = visits.begin();

- visit_iter != visits.end(); ++visit_iter) {

- HistoryInfoMapEntry_VisitInfo* visit_info = map_entry->add_visits();

- visit_info->set_visit_time(visit_iter->first.ToInternalValue());

- visit_info->set_transition_type(visit_iter->second);

- }

-void URLIndexPrivateData::SaveWordStartsMap(

- InMemoryURLIndexCacheItem* cache) const {

- if (word_starts_map_.empty())

- return;

- // For unit testing: Enable saving of the cache as an earlier version to

- // allow testing of cache file upgrading in ReadFromFile().

- // TODO(mrossetti): Instead of intruding on production code with this kind of

- // test harness, save a copy of an older version cache with known results.

- // Implement this when switching the caching over to SQLite.

- if (saved_cache_version_ < 1)

- return;

- WordStartsMapItem* map_item = cache->mutable_word_starts_map();

- map_item->set_item_count(word_starts_map_.size());

- for (WordStartsMap::const_iterator iter = word_starts_map_.begin();

- iter != word_starts_map_.end(); ++iter) {

- WordStartsMapEntry* map_entry = map_item->add_word_starts_map_entry();

- map_entry->set_history_id(iter->first);

- const RowWordStarts& word_starts(iter->second);

- for (WordStarts::const_iterator i = word_starts.url_word_starts_.begin();

- i != word_starts.url_word_starts_.end(); ++i)

- map_entry->add_url_word_starts(*i);

- for (WordStarts::const_iterator i = word_starts.title_word_starts_.begin();

- i != word_starts.title_word_starts_.end(); ++i)

- map_entry->add_title_word_starts(*i);

- }

-bool URLIndexPrivateData::RestorePrivateData(

- const InMemoryURLIndexCacheItem& cache,

- const std::string& languages) {

- last_time_rebuilt_from_history_ =

- base::Time::FromInternalValue(cache.last_rebuild_timestamp());

- const base::TimeDelta rebuilt_ago =

- base::Time::Now() - last_time_rebuilt_from_history_;

- if ((rebuilt_ago > base::TimeDelta::FromDays(7)) ||

- (rebuilt_ago < base::TimeDelta::FromDays(-1))) {

- // Cache is more than a week old or, somehow, from some time in the future.

- // It's probably a good time to rebuild the index from history to

- // allow synced entries to now appear, expired entries to disappear, etc.

- // Allow one day in the future to make the cache not rebuild on simple

- // system clock changes such as time zone changes.

- return false;

- }

- if (cache.has_version()) {

- if (cache.version() < kCurrentCacheFileVersion) {

- // Don't try to restore an old format cache file. (This will cause

- // the InMemoryURLIndex to schedule rebuilding the URLIndexPrivateData

- // from history.)

- return false;

- }

- restored_cache_version_ = cache.version();

- }

- return RestoreWordList(cache) && RestoreWordMap(cache) &&

- RestoreCharWordMap(cache) && RestoreWordIDHistoryMap(cache) &&

- RestoreHistoryInfoMap(cache) && RestoreWordStartsMap(cache, languages);

-bool URLIndexPrivateData::RestoreWordList(

- const InMemoryURLIndexCacheItem& cache) {

- if (!cache.has_word_list())

- return false;

- const WordListItem& list_item(cache.word_list());

- uint32 expected_item_count = list_item.word_count();

- uint32 actual_item_count = list_item.word_size();

- if (actual_item_count == 0 || actual_item_count != expected_item_count)

- return false;

- const RepeatedPtrField<std::string>& words(list_item.word());

- for (RepeatedPtrField<std::string>::const_iterator iter = words.begin();

- iter != words.end(); ++iter)

- word_list_.push_back(base::UTF8ToUTF16(*iter));

- return true;

-bool URLIndexPrivateData::RestoreWordMap(

- const InMemoryURLIndexCacheItem& cache) {

- if (!cache.has_word_map())

- return false;

- const WordMapItem& list_item(cache.word_map());

- uint32 expected_item_count = list_item.item_count();

- uint32 actual_item_count = list_item.word_map_entry_size();

- if (actual_item_count == 0 || actual_item_count != expected_item_count)

- return false;

- const RepeatedPtrField<WordMapEntry>& entries(list_item.word_map_entry());

- for (RepeatedPtrField<WordMapEntry>::const_iterator iter = entries.begin();

- iter != entries.end(); ++iter)

- word_map_[base::UTF8ToUTF16(iter->word())] = iter->word_id();

- return true;

-bool URLIndexPrivateData::RestoreCharWordMap(

- const InMemoryURLIndexCacheItem& cache) {

- if (!cache.has_char_word_map())

- return false;

- const CharWordMapItem& list_item(cache.char_word_map());

- uint32 expected_item_count = list_item.item_count();

- uint32 actual_item_count = list_item.char_word_map_entry_size();

- if (actual_item_count == 0 || actual_item_count != expected_item_count)

- return false;

- const RepeatedPtrField<CharWordMapEntry>&

- entries(list_item.char_word_map_entry());

- for (RepeatedPtrField<CharWordMapEntry>::const_iterator iter =

- entries.begin(); iter != entries.end(); ++iter) {

- expected_item_count = iter->item_count();

- actual_item_count = iter->word_id_size();

- if (actual_item_count == 0 || actual_item_count != expected_item_count)

- return false;

- base::char16 uni_char = static_cast<base::char16>(iter->char_16());

- WordIDSet word_id_set;

- const RepeatedField<int32>& word_ids(iter->word_id());

- for (RepeatedField<int32>::const_iterator jiter = word_ids.begin();

- jiter != word_ids.end(); ++jiter)

- word_id_set.insert(*jiter);

- char_word_map_[uni_char] = word_id_set;

- }

- return true;

-bool URLIndexPrivateData::RestoreWordIDHistoryMap(

- const InMemoryURLIndexCacheItem& cache) {

- if (!cache.has_word_id_history_map())

- return false;

- const WordIDHistoryMapItem& list_item(cache.word_id_history_map());

- uint32 expected_item_count = list_item.item_count();

- uint32 actual_item_count = list_item.word_id_history_map_entry_size();

- if (actual_item_count == 0 || actual_item_count != expected_item_count)

- return false;

- const RepeatedPtrField<WordIDHistoryMapEntry>&

- entries(list_item.word_id_history_map_entry());

- for (RepeatedPtrField<WordIDHistoryMapEntry>::const_iterator iter =

- entries.begin(); iter != entries.end(); ++iter) {

- expected_item_count = iter->item_count();

- actual_item_count = iter->history_id_size();

- if (actual_item_count == 0 || actual_item_count != expected_item_count)

- return false;

- WordID word_id = iter->word_id();

- HistoryIDSet history_id_set;

- const RepeatedField<int64>& history_ids(iter->history_id());

- for (RepeatedField<int64>::const_iterator jiter = history_ids.begin();

- jiter != history_ids.end(); ++jiter) {

- history_id_set.insert(*jiter);

- AddToHistoryIDWordMap(*jiter, word_id);

- }

- word_id_history_map_[word_id] = history_id_set;

- }

- return true;

-bool URLIndexPrivateData::RestoreHistoryInfoMap(

- const InMemoryURLIndexCacheItem& cache) {

- if (!cache.has_history_info_map())

- return false;

- const HistoryInfoMapItem& list_item(cache.history_info_map());

- uint32 expected_item_count = list_item.item_count();

- uint32 actual_item_count = list_item.history_info_map_entry_size();

- if (actual_item_count == 0 || actual_item_count != expected_item_count)

- return false;

- const RepeatedPtrField<HistoryInfoMapEntry>&

- entries(list_item.history_info_map_entry());

- for (RepeatedPtrField<HistoryInfoMapEntry>::const_iterator iter =

- entries.begin(); iter != entries.end(); ++iter) {

- HistoryID history_id = iter->history_id();

- GURL url(iter->url());

- URLRow url_row(url, history_id);

- url_row.set_visit_count(iter->visit_count());

- url_row.set_typed_count(iter->typed_count());

- url_row.set_last_visit(base::Time::FromInternalValue(iter->last_visit()));

- if (iter->has_title()) {

- base::string16 title(base::UTF8ToUTF16(iter->title()));

- url_row.set_title(title);

- }

- history_info_map_[history_id].url_row = url_row;

- // Restore visits list.

- VisitInfoVector visits;

- visits.reserve(iter->visits_size());

- for (int i = 0; i < iter->visits_size(); ++i) {

- visits.push_back(std::make_pair(

- base::Time::FromInternalValue(iter->visits(i).visit_time()),

- ui::PageTransitionFromInt(iter->visits(i).transition_type())));

- }

- history_info_map_[history_id].visits = visits;

- }

- return true;

-bool URLIndexPrivateData::RestoreWordStartsMap(

- const InMemoryURLIndexCacheItem& cache,

- const std::string& languages) {

- // Note that this function must be called after RestoreHistoryInfoMap() has

- // been run as the word starts may have to be recalculated from the urls and

- // page titles.

- if (cache.has_word_starts_map()) {

- const WordStartsMapItem& list_item(cache.word_starts_map());

- uint32 expected_item_count = list_item.item_count();

- uint32 actual_item_count = list_item.word_starts_map_entry_size();

- if (actual_item_count == 0 || actual_item_count != expected_item_count)

- return false;

- const RepeatedPtrField<WordStartsMapEntry>&

- entries(list_item.word_starts_map_entry());

- for (RepeatedPtrField<WordStartsMapEntry>::const_iterator iter =

- entries.begin(); iter != entries.end(); ++iter) {

- HistoryID history_id = iter->history_id();

- RowWordStarts word_starts;

- // Restore the URL word starts.

- const RepeatedField<int32>& url_starts(iter->url_word_starts());

- for (RepeatedField<int32>::const_iterator jiter = url_starts.begin();

- jiter != url_starts.end(); ++jiter)

- word_starts.url_word_starts_.push_back(*jiter);

- // Restore the page title word starts.

- const RepeatedField<int32>& title_starts(iter->title_word_starts());

- for (RepeatedField<int32>::const_iterator jiter = title_starts.begin();

- jiter != title_starts.end(); ++jiter)

- word_starts.title_word_starts_.push_back(*jiter);

- word_starts_map_[history_id] = word_starts;

- }

- } else {

- // Since the cache did not contain any word starts we must rebuild then from

- // the URL and page titles.

- for (HistoryInfoMap::const_iterator iter = history_info_map_.begin();

- iter != history_info_map_.end(); ++iter) {

- RowWordStarts word_starts;

- const URLRow& row(iter->second.url_row);

- const base::string16& url =

- bookmarks::CleanUpUrlForMatching(row.url(), languages, NULL);

- String16VectorFromString16(url, false, &word_starts.url_word_starts_);

- const base::string16& title =

- bookmarks::CleanUpTitleForMatching(row.title());

- String16VectorFromString16(title, false, &word_starts.title_word_starts_);

- word_starts_map_[iter->first] = word_starts;

- }

- return true;

-// static

-bool URLIndexPrivateData::URLSchemeIsWhitelisted(

- const GURL& gurl,

- const std::set<std::string>& whitelist) {

- return whitelist.find(gurl.scheme()) != whitelist.end();

-// SearchTermCacheItem ---------------------------------------------------------

-URLIndexPrivateData::SearchTermCacheItem::SearchTermCacheItem(

- const WordIDSet& word_id_set,

- const HistoryIDSet& history_id_set)

- : word_id_set_(word_id_set),

- history_id_set_(history_id_set),

- used_(true) {}

-URLIndexPrivateData::SearchTermCacheItem::SearchTermCacheItem()

- : used_(true) {}

-URLIndexPrivateData::SearchTermCacheItem::~SearchTermCacheItem() {}

-// URLIndexPrivateData::AddHistoryMatch ----------------------------------------

-URLIndexPrivateData::AddHistoryMatch::AddHistoryMatch(

- const URLIndexPrivateData& private_data,

- const std::string& languages,

- const base::string16& lower_string,

- const String16Vector& lower_terms,

- const base::Time now,

- const ScoredHistoryMatch::Builder& builder)

- : private_data_(private_data),

- languages_(languages),

- builder_(builder),

- lower_string_(lower_string),

- lower_terms_(lower_terms),

- now_(now) {

- // Calculate offsets for each term. For instance, the offset for

- // ".net" should be 1, indicating that the actual word-part of the term

- // starts at offset 1.

- lower_terms_to_word_starts_offsets_.resize(lower_terms_.size(), 0u);

- for (size_t i = 0; i < lower_terms_.size(); ++i) {

- base::i18n::BreakIterator iter(lower_terms_[i],

- base::i18n::BreakIterator::BREAK_WORD);

- // If the iterator doesn't work, assume an offset of 0.

- if (!iter.Init())

- continue;

- // Find the first word start.

- while (iter.Advance() && !iter.IsWord()) {}

- if (iter.IsWord())

- lower_terms_to_word_starts_offsets_[i] = iter.prev();

- // Else: the iterator didn't find a word break. Assume an offset of 0.

- }

-URLIndexPrivateData::AddHistoryMatch::~AddHistoryMatch() {}

-void URLIndexPrivateData::AddHistoryMatch::operator()(

- const HistoryID history_id) {

- HistoryInfoMap::const_iterator hist_pos =

- private_data_.history_info_map_.find(history_id);

- if (hist_pos != private_data_.history_info_map_.end()) {

- const URLRow& hist_item = hist_pos->second.url_row;

- const VisitInfoVector& visits = hist_pos->second.visits;

- WordStartsMap::const_iterator starts_pos =

- private_data_.word_starts_map_.find(history_id);

- DCHECK(starts_pos != private_data_.word_starts_map_.end());

- ScoredHistoryMatch match = builder_.Build(

- hist_item, visits, languages_, lower_string_, lower_terms_,

- lower_terms_to_word_starts_offsets_, starts_pos->second, now_);

- if (match.raw_score > 0)

- scored_matches_.push_back(match);

- }

-// URLIndexPrivateData::HistoryItemFactorGreater -------------------------------

-URLIndexPrivateData::HistoryItemFactorGreater::HistoryItemFactorGreater(

- const HistoryInfoMap& history_info_map)

- : history_info_map_(history_info_map) {

-URLIndexPrivateData::HistoryItemFactorGreater::~HistoryItemFactorGreater() {}

-bool URLIndexPrivateData::HistoryItemFactorGreater::operator()(

- const HistoryID h1,

- const HistoryID h2) {

- HistoryInfoMap::const_iterator entry1(history_info_map_.find(h1));

- if (entry1 == history_info_map_.end())

- return false;

- HistoryInfoMap::const_iterator entry2(history_info_map_.find(h2));

- if (entry2 == history_info_map_.end())

- return true;

- const URLRow& r1(entry1->second.url_row);

- const URLRow& r2(entry2->second.url_row);

- // First cut: typed count, visit count, recency.

- // TODO(mrossetti): This is too simplistic. Consider an approach which ranks

- // recently visited (within the last 12/24 hours) as highly important. Get

- // input from mpearson.

- if (r1.typed_count() != r2.typed_count())

- return (r1.typed_count() > r2.typed_count());

- if (r1.visit_count() != r2.visit_count())

- return (r1.visit_count() > r2.visit_count());

- return (r1.last_visit() > r2.last_visit());

-} // namespace history

« no previous file with comments | « chrome/browser/history/url_index_private_data.h ('k') | chrome/browser/ui/BUILD.gn » ('j') | no next file with comments »