chrome/browser/history/url_index_private_data.cc - Issue 10837244: Replace HistoryQuickProvider protobuf-based caching with an SQLite-based database.

Unified Diff: chrome/browser/history/url_index_private_data.cc

Issue 10837244: Replace HistoryQuickProvider protobuf-based caching with an SQLite-based database. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/

Patch Set: Tweak suppression. Created 8 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: chrome/browser/history/url_index_private_data.cc

===================================================================

--- chrome/browser/history/url_index_private_data.cc (revision 151481)

+++ chrome/browser/history/url_index_private_data.cc (working copy)

@@ -14,45 +14,539 @@

#include "base/basictypes.h"

#include "base/file_util.h"

#include "base/i18n/case_conversion.h"

-#include "base/metrics/histogram.h"

#include "base/string_util.h"

-#include "base/time.h"

#include "base/utf_string_conversions.h"

#include "chrome/browser/autocomplete/autocomplete_provider.h"

-#include "chrome/browser/autocomplete/url_prefix.h"

#include "chrome/browser/history/history_database.h"

-#include "chrome/browser/history/in_memory_url_index.h"

+#include "chrome/browser/history/in_memory_url_cache_database.h"

+#include "chrome/common/chrome_constants.h"

+#include "chrome/common/url_constants.h"

#include "content/public/browser/browser_thread.h"

#include "content/public/browser/notification_details.h"

#include "content/public/browser/notification_service.h"

#include "content/public/browser/notification_source.h"

#include "net/base/net_util.h"

-#include "third_party/protobuf/src/google/protobuf/repeated_field.h"

-using google::protobuf::RepeatedField;

-using google::protobuf::RepeatedPtrField;

-using in_memory_url_index::InMemoryURLIndexCacheItem;

+using content::BrowserThread;

namespace history {

-typedef imui::InMemoryURLIndexCacheItem_WordListItem WordListItem;

-typedef imui::InMemoryURLIndexCacheItem_WordMapItem_WordMapEntry WordMapEntry;

-typedef imui::InMemoryURLIndexCacheItem_WordMapItem WordMapItem;

-typedef imui::InMemoryURLIndexCacheItem_CharWordMapItem CharWordMapItem;

-typedef imui::InMemoryURLIndexCacheItem_CharWordMapItem_CharWordMapEntry

- CharWordMapEntry;

-typedef imui::InMemoryURLIndexCacheItem_WordIDHistoryMapItem

- WordIDHistoryMapItem;

-typedef imui::

- InMemoryURLIndexCacheItem_WordIDHistoryMapItem_WordIDHistoryMapEntry

- WordIDHistoryMapEntry;

-typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem HistoryInfoMapItem;

-typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem_HistoryInfoMapEntry

- HistoryInfoMapEntry;

-typedef imui::InMemoryURLIndexCacheItem_WordStartsMapItem WordStartsMapItem;

-typedef imui::InMemoryURLIndexCacheItem_WordStartsMapItem_WordStartsMapEntry

- WordStartsMapEntry;

+// Comparison function for sorting search terms by descending length.

+bool LengthGreater(const string16& string_a, const string16& string_b) {

+ return string_a.length() > string_b.length();

+// Initializes a whitelist of URL schemes.

+void InitializeSchemeWhitelist(std::set<std::string>* whitelist) {

+ DCHECK(whitelist);

+ if (!whitelist->empty())

+ return; // Nothing to do, already initialized.

+ whitelist->insert(std::string(chrome::kAboutScheme));

+ whitelist->insert(std::string(chrome::kChromeUIScheme));

+ whitelist->insert(std::string(chrome::kFileScheme));

+ whitelist->insert(std::string(chrome::kFtpScheme));

+ whitelist->insert(std::string(chrome::kHttpScheme));

+ whitelist->insert(std::string(chrome::kHttpsScheme));

+ whitelist->insert(std::string(chrome::kMailToScheme));

+// CacheTransaction ------------------------------------------------------------

+// Simple automatic helper class encapsulating calls to BeginTransaction/

+// CommitTransaction.

+class CacheTransaction {

+ public:

+ explicit CacheTransaction(InMemoryURLCacheDatabase* db);

+ ~CacheTransaction();

+ private:

+ InMemoryURLCacheDatabase* db_;

+ DISALLOW_COPY_AND_ASSIGN(CacheTransaction);

+};

+CacheTransaction::CacheTransaction(InMemoryURLCacheDatabase* db)

+ : db_(db) {

+ if (db_)

+ db_->BeginTransaction();

+CacheTransaction::~CacheTransaction() {

+ if (db_)

+ db_->CommitTransaction();

+// URLIndexPrivateData Public (but only to InMemoryURLIndex) Functions ---------

+URLIndexPrivateData::URLIndexPrivateData(const FilePath& history_dir,

+ const std::string& languages)

+ : history_dir_(history_dir),

+ languages_(languages),

+ cache_enabled_(true),

+ shutdown_(false),

+ lock_(),

+ pre_filter_item_count_(0),

+ post_filter_item_count_(0),

+ post_scoring_item_count_(0) {

+ InitializeSchemeWhitelist(&scheme_whitelist_);

+bool URLIndexPrivateData::Init(

+ base::SequencedWorkerPool::SequenceToken sequence_token) {

+ // Since init occurs on the DB thread, it is possible that the profile was

+ // told to shutdown before this task had a chance to kick off.

+ base::AutoLock lock(lock_);

+ if (shutdown_)

+ return false;

+ cache_db_ = new InMemoryURLCacheDatabase();

+ FilePath dir_path;

+ set_cache_enabled(GetCacheDBPath(&dir_path) &&

+ cache_db_->Init(dir_path, sequence_token));

+ return cache_enabled();

+void URLIndexPrivateData::Reset() {

+ Clear();

+ if (cache_enabled())

+ cache_db_->Reset();

+bool URLIndexPrivateData::Empty() const {

+ return history_info_map_.empty();

+URLIndexPrivateData* URLIndexPrivateData::Snapshot() const {

+ return new URLIndexPrivateData(*this);

+void URLIndexPrivateData::Shutdown() {

+ base::AutoLock lock(lock_);

+ shutdown_ = true;

+ if (cache_enabled())

+ cache_db_->Shutdown();

+// Helper predicates for ValidateConsistency.

+template <typename Pair>

+struct SelectFirst : std::unary_function<Pair, typename Pair::first_type> {

+ const typename Pair::first_type& operator()(const Pair& x) const {

+ return x.first;

+ }

+};

+template <typename Pair>

+struct SelectSecond : std::unary_function<Pair, typename Pair::second_type> {

+ const typename Pair::second_type& operator()(const Pair& x) const {

+ return x.second;

+ }

+};

+template <typename SourcePair, typename Target>

+struct TargetInserter : std::unary_function<SourcePair, void> {

+ explicit TargetInserter(Target* target) : target_(target) {}

+ void operator()(const SourcePair& source) {

+ target_->insert(source.second.begin(), source.second.end());

+ }

+ Target* target_;

+};

+bool URLIndexPrivateData::ValidateConsistency() const {

+ // Scope things so that a large data set doesn't unnecessarily accumulate and

+ // hang around.

+ {

+ // Make a set of WordIDs from word_map_.

+ WordIDSet word_id_set_a;

+ std::transform(word_map_.begin(), word_map_.end(),

+ std::inserter(word_id_set_a, word_id_set_a.begin()),

+ SelectSecond<WordMap::value_type>());

+ // Compare word_map_'s word set to the words from word_id_history_map_.

+ {

+ WordIDSet word_id_set_b;

+ std::transform(word_id_history_map_.begin(), word_id_history_map_.end(),

+ std::inserter(word_id_set_b, word_id_set_b.begin()),

+ SelectFirst<WordIDHistoryMap::value_type>());

+ WordIDSet leftovers;

+ std::set_symmetric_difference(

+ word_id_set_a.begin(), word_id_set_a.end(),

+ word_id_set_b.begin(), word_id_set_b.end(),

+ std::inserter(leftovers, leftovers.begin()));

+ if (!leftovers.empty())

+ return false;

+ }

+ // Compare word_map_'s word set to the words from history_id_word_map_.

+ {

+ WordIDSet word_id_set_b;

+ std::for_each(history_id_word_map_.begin(), history_id_word_map_.end(),

+ TargetInserter<HistoryIDWordMap::value_type,

+ WordIDSet>(&word_id_set_b));

+ WordIDSet leftovers;

+ std::set_symmetric_difference(

+ word_id_set_a.begin(), word_id_set_a.end(),

+ word_id_set_b.begin(), word_id_set_b.end(),

+ std::inserter(leftovers, leftovers.begin()));

+ if (!leftovers.empty())

+ return false;

+ }

+ // Compare word_map_'s word set to the words from char_word_map_.

+ {

+ WordIDSet word_id_set_b;

+ std::for_each(char_word_map_.begin(), char_word_map_.end(),

+ TargetInserter<CharWordIDMap::value_type,

+ WordIDSet>(&word_id_set_b));

+ WordIDSet leftovers;

+ std::set_symmetric_difference(

+ word_id_set_a.begin(), word_id_set_a.end(),

+ word_id_set_b.begin(), word_id_set_b.end(),

+ std::inserter(leftovers, leftovers.begin()));

+ if (!leftovers.empty())

+ return false;

+ }

+ // Intersect available_words_ with set of WordIDs (created above from

+ // word_map_) and test for no common WordIDs.

+ {

+ WordIDSet leftovers;

+ std::set_intersection(word_id_set_a.begin(), word_id_set_a.end(),

+ available_words_.begin(), available_words_.end(),

+ std::inserter(leftovers, leftovers.begin()));

+ if (!leftovers.empty())

+ return false;

+ }

+ {

+ // Make a set of HistoryIDs from history_info_map_.

+ HistoryIDSet history_id_set_a;

+ std::transform(history_info_map_.begin(), history_info_map_.end(),

+ std::inserter(history_id_set_a, history_id_set_a.begin()),

+ SelectFirst<HistoryInfoMap::value_type>());

+ // Compare history_info_map_'s set to word_id_history_map_'s HistoryIDs.

+ {

+ HistoryIDSet history_id_set_b;

+ std::transform(history_info_map_.begin(), history_info_map_.end(),

+ std::inserter(history_id_set_b, history_id_set_b.begin()),

+ SelectFirst<HistoryInfoMap::value_type>());

+ HistoryIDSet leftovers;

+ std::set_symmetric_difference(

+ history_id_set_a.begin(), history_id_set_a.end(),

+ history_id_set_b.begin(), history_id_set_b.end(),

+ std::inserter(leftovers, leftovers.begin()));

+ if (!leftovers.empty())

+ return false;

+ }

+ // Compare history_info_map_'s set to word_id_history_map_'s HistoryIDs.

+ {

+ HistoryIDSet history_id_set_b;

+ std::for_each(word_id_history_map_.begin(), word_id_history_map_.end(),

+ TargetInserter<WordIDHistoryMap::value_type,

+ HistoryIDSet>(&history_id_set_b));

+ HistoryIDSet leftovers;

+ std::set_symmetric_difference(

+ history_id_set_a.begin(), history_id_set_a.end(),

+ history_id_set_b.begin(), history_id_set_b.end(),

+ std::inserter(leftovers, leftovers.begin()));

+ if (!leftovers.empty())

+ return false;

+ }

+ // Compare history_info_map_'s set to word_starts_map_'s HistoryIDs.

+ {

+ HistoryIDSet history_id_set_b;

+ std::transform(word_starts_map_.begin(), word_starts_map_.end(),

+ std::inserter(history_id_set_b, history_id_set_b.begin()),

+ SelectFirst<WordStartsMap::value_type>());

+ HistoryIDSet leftovers;

+ std::set_symmetric_difference(

+ history_id_set_a.begin(), history_id_set_a.end(),

+ history_id_set_b.begin(), history_id_set_b.end(),

+ std::inserter(leftovers, leftovers.begin()));

+ if (!leftovers.empty())

+ return false;

+ }

+ // Make sets of words from word_list_ and word_map_ and test for equality.

+ {

+ String16Set word_list_set;

+ std::copy(word_list_.begin(), word_list_.end(),

+ std::inserter(word_list_set, word_list_set.end()));

+ String16Set word_map_set;

+ std::transform(word_map_.begin(), word_map_.end(),

+ std::inserter(word_map_set, word_map_set.begin()),

+ SelectFirst<WordMap::value_type>());

+ if (word_list_set != word_map_set)

+ return false;

+ }

+ return true;

+ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms(

+ const string16& search_string) {

+ pre_filter_item_count_ = 0;

+ post_filter_item_count_ = 0;

+ post_scoring_item_count_ = 0;

+ // The search string we receive may contain escaped characters. For reducing

+ // the index we need individual, lower-cased words, ignoring escapings. For

+ // the final filtering we need whitespace separated substrings possibly

+ // containing escaped characters.

+ string16 lower_raw_string(base::i18n::ToLower(search_string));

+ string16 lower_unescaped_string =

+ net::UnescapeURLComponent(lower_raw_string,

+ net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS);

+ // Extract individual 'words' (as opposed to 'terms'; see below) from the

+ // search string. When the user types "colspec=ID%20Mstone Release" we get

+ // four 'words': "colspec", "id", "mstone" and "release".

+ String16Vector lower_words(

+ history::String16VectorFromString16(lower_unescaped_string, false, NULL));

+ ScoredHistoryMatches scored_items;

+ // Do nothing if we have indexed no words (probably because we've not been

+ // initialized yet) or the search string has no words.

+ if (word_list_.empty() || lower_words.empty()) {

+ search_term_cache_.clear(); // Invalidate the term cache.

+ return scored_items;

+ }

+ // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep

+ // approach.

+ ResetSearchTermCache();

+ HistoryIDSet history_id_set = HistoryIDSetFromWords(lower_words);

+ // Trim the candidate pool if it is large. Note that we do not filter out

+ // items that do not contain the search terms as proper substrings -- doing

+ // so is the performance-costly operation we are trying to avoid in order

+ // to maintain omnibox responsiveness.

+ const size_t kItemsToScoreLimit = 500;

+ pre_filter_item_count_ = history_id_set.size();

+ // If we trim the results set we do not want to cache the results for next

+ // time as the user's ultimately desired result could easily be eliminated

+ // in this early rough filter.

+ bool was_trimmed = (pre_filter_item_count_ > kItemsToScoreLimit);

+ if (was_trimmed) {

+ HistoryIDVector history_ids;

+ std::copy(history_id_set.begin(), history_id_set.end(),

+ std::back_inserter(history_ids));

+ // Trim down the set by sorting by typed-count, visit-count, and last

+ // visit.

+ HistoryItemFactorGreater

+ item_factor_functor(history_info_map_);

+ std::partial_sort(history_ids.begin(),

+ history_ids.begin() + kItemsToScoreLimit,

+ history_ids.end(),

+ item_factor_functor);

+ history_id_set.clear();

+ std::copy(history_ids.begin(), history_ids.begin() + kItemsToScoreLimit,

+ std::inserter(history_id_set, history_id_set.end()));

+ post_filter_item_count_ = history_id_set.size();

+ }

+ // Pass over all of the candidates filtering out any without a proper

+ // substring match, inserting those which pass in order by score. Note that

+ // in this step we are using the raw search string complete with escaped

+ // URL elements. When the user has specifically typed something akin to

+ // "sort=pri&colspec=ID%20Mstone%20Release" we want to make sure that that

+ // specific substring appears in the URL or page title.

+ // We call these 'terms' (as opposed to 'words'; see above) as in this case

+ // we only want to break up the search string on 'true' whitespace rather than

+ // escaped whitespace. When the user types "colspec=ID%20Mstone Release" we

+ // get two 'terms': "colspec=id%20mstone" and "release".

+ history::String16Vector lower_raw_terms;

+ Tokenize(lower_raw_string, kWhitespaceUTF16, &lower_raw_terms);

+ scored_items = std::for_each(history_id_set.begin(), history_id_set.end(),

+ AddHistoryMatch(*this, lower_raw_string,

+ lower_raw_terms, base::Time::Now())).ScoredMatches();

+ // Select and sort only the top kMaxMatches results.

+ if (scored_items.size() > AutocompleteProvider::kMaxMatches) {

+ std::partial_sort(scored_items.begin(),

+ scored_items.begin() +

+ AutocompleteProvider::kMaxMatches,

+ scored_items.end(),

+ ScoredHistoryMatch::MatchScoreGreater);

+ scored_items.resize(AutocompleteProvider::kMaxMatches);

+ } else {

+ std::sort(scored_items.begin(), scored_items.end(),

+ ScoredHistoryMatch::MatchScoreGreater);

+ }

+ post_scoring_item_count_ = scored_items.size();

+ if (was_trimmed) {

+ search_term_cache_.clear(); // Invalidate the term cache.

+ } else {

+ // Remove any stale SearchTermCacheItems.

+ for (SearchTermCacheMap::iterator cache_iter = search_term_cache_.begin();

+ cache_iter != search_term_cache_.end(); ) {

+ if (!cache_iter->second.used_)

+ search_term_cache_.erase(cache_iter++);

+ else

+ ++cache_iter;

+ }

+ return scored_items;

+bool URLIndexPrivateData::UpdateURL(const URLRow& row) {

+ // The row may or may not already be in our index. If it is not already

+ // indexed and it qualifies then it gets indexed. If it is already

+ // indexed and still qualifies then it gets updated, otherwise it

+ // is deleted from the index.

+ bool row_was_updated = false;

+ URLID row_id = row.id();

+ HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id);

+ if (row_pos == history_info_map_.end()) {

+ // This new row should be indexed if it qualifies.

+ URLRow new_row(row);

+ new_row.set_id(row_id);

+ row_was_updated = RowQualifiesAsSignificant(new_row, base::Time()) &&

+ IndexRow(new_row);

+ } else if (RowQualifiesAsSignificant(row, base::Time())) {

+ // This indexed row still qualifies and will be re-indexed.

+ // The url won't have changed but the title, visit count, etc.

+ // might have changed.

+ URLRow& row_to_update = row_pos->second;

+ bool title_updated = row_to_update.title() != row.title();

+ if (row_to_update.visit_count() != row.visit_count() ||

+ row_to_update.typed_count() != row.typed_count() ||

+ row_to_update.last_visit() != row.last_visit() || title_updated) {

+ CacheTransaction cache_transaction(cache_enabled() ?

+ cache_db_.get() : NULL);

+ row_to_update.set_visit_count(row.visit_count());

+ row_to_update.set_typed_count(row.typed_count());

+ row_to_update.set_last_visit(row.last_visit());

+ row_to_update.set_hidden(row.hidden());

+ // While the URL is guaranteed to remain stable, the title may have

+ // changed. If so, then update the index with the changed words.

+ if (title_updated) {

+ // Clear all words associated with this row and re-index both the

+ // URL and title.

+ RemoveRowWordsFromIndex(row_to_update);

+ row_to_update.set_title(row.title());

+ RowWordStarts word_starts;

+ AddRowWordsToIndex(row_to_update, &word_starts);

+ word_starts_map_[row_id] = word_starts;

+ // Replace all word_starts for the row.

+ if (cache_enabled()) {

+ cache_db_->RemoveWordStarts(row_id);

+ cache_db_->AddRowWordStarts(row_id, word_starts);

+ }

+ if (cache_enabled()) {

+ cache_db_->RemoveHistoryIDFromURLs(row_id);

+ cache_db_->AddHistoryToURLs(row_id, row);

+ }

+ row_was_updated = true;

+ }

+ } else {

+ // This indexed row no longer qualifies and will be de-indexed by

+ // clearing all words associated with this row.

+ RemoveRowFromIndex(row);

+ row_was_updated = true;

+ }

+ if (row_was_updated)

+ search_term_cache_.clear(); // This invalidates the cache.

+ return row_was_updated;

+// Helper functor for DeleteURL.

+class HistoryInfoMapItemHasURL {

+ public:

+ explicit HistoryInfoMapItemHasURL(const GURL& url): url_(url) {}

+ bool operator()(const std::pair<const HistoryID, URLRow>& item) {

+ return item.second.url() == url_;

+ }

+ private:

+ const GURL& url_;

+};

+bool URLIndexPrivateData::DeleteURL(const GURL& url) {

+ DCHECK(!BrowserThread::IsWellKnownThread(BrowserThread::UI) ||

+ BrowserThread::CurrentlyOn(BrowserThread::UI));

+ // Find the matching entry in the history_info_map_.

+ HistoryInfoMap::iterator pos = std::find_if(

+ history_info_map_.begin(),

+ history_info_map_.end(),

+ HistoryInfoMapItemHasURL(url));

+ if (pos == history_info_map_.end())

+ return false;

+ RemoveRowFromIndex(pos->second);

+ search_term_cache_.clear(); // This invalidates the cache.

+ return true;

+bool URLIndexPrivateData::RestoreFromCacheTask() {

+ DCHECK(!BrowserThread::IsWellKnownThread(BrowserThread::UI) ||

+ !BrowserThread::CurrentlyOn(BrowserThread::UI));

+ DCHECK(cache_db_);

+ if (IsShutdown())

+ return false;

+ Clear();

+ if (RestoreFromCache(cache_db_))

+ return true;

+ // If there was no SQLite-based cache then there might be an old,

+ // deprecated, protobuf-based cache file laying around. Get rid of it.

+ DeleteOldVersionCacheFile();

+ return false;

+// static

+scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::RebuildFromHistory(

+ HistoryDatabase* history_db,

+ scoped_refptr<URLIndexPrivateData> old_data) {

+ DCHECK(!BrowserThread::IsWellKnownThread(BrowserThread::UI) ||

+ !BrowserThread::CurrentlyOn(BrowserThread::UI));

+ if (!history_db)

+ return NULL;

+ scoped_refptr<URLIndexPrivateData> rebuilt_data(

+ new URLIndexPrivateData(*(old_data.get())));

+ // NOTE: We disable the cache database until after the replacement private

+ // data has been created and then we re-enable the database. Once the private

+ // data has been swapped in the database is refreshed with the new data.

+ URLDatabase::URLEnumerator history_enum;

+ if (!history_db->InitURLEnumeratorForSignificant(&history_enum))

+ return NULL;

+ for (URLRow row; !old_data->IsShutdown() && history_enum.GetNextURL(&row); )

+ rebuilt_data->IndexRow(row);

+ if (old_data->IsShutdown()) {

+ rebuilt_data.release();

+ }

+ return rebuilt_data;

+void URLIndexPrivateData::RefreshCacheTask() {

+ DCHECK(!BrowserThread::IsWellKnownThread(BrowserThread::UI) ||

+ !BrowserThread::CurrentlyOn(BrowserThread::UI));

+ base::AutoLock lock(lock_);

+ if (shutdown_ || !cache_enabled())

+ return;

+ // Should a refresh fail for any reason we consider the database to be

+ // corrupt and unavailable; no further remedial action is possible.

+ set_cache_enabled(cache_db_->Reset() && cache_db_->Refresh(*this));

+// static

+void URLIndexPrivateData::InitializeSchemeWhitelistForTesting(

+ std::set<std::string>* whitelist) {

+ InitializeSchemeWhitelist(whitelist);

// SearchTermCacheItem ---------------------------------------------------------

URLIndexPrivateData::SearchTermCacheItem::SearchTermCacheItem(

@@ -67,25 +561,100 @@

URLIndexPrivateData::SearchTermCacheItem::~SearchTermCacheItem() {}

-// Algorithm Functions ---------------------------------------------------------

+// URLIndexPrivateData::AddHistoryMatch ----------------------------------------

-// Comparison function for sorting search terms by descending length.

-bool LengthGreater(const string16& string_a, const string16& string_b) {

- return string_a.length() > string_b.length();

+URLIndexPrivateData::AddHistoryMatch::AddHistoryMatch(

+ const URLIndexPrivateData& private_data,

+ const string16& lower_string,

+ const String16Vector& lower_terms,

+ const base::Time now)

+ : private_data_(private_data),

+ lower_string_(lower_string),

+ lower_terms_(lower_terms),

+ now_(now) {}

+URLIndexPrivateData::AddHistoryMatch::~AddHistoryMatch() {}

+void URLIndexPrivateData::AddHistoryMatch::operator()(

+ const HistoryID history_id) {

+ HistoryInfoMap::const_iterator hist_pos =

+ private_data_.history_info_map_.find(history_id);

+ if (hist_pos != private_data_.history_info_map_.end()) {

+ const URLRow& hist_item = hist_pos->second;

+ WordStartsMap::const_iterator starts_pos =

+ private_data_.word_starts_map_.find(history_id);

+ DCHECK(starts_pos != private_data_.word_starts_map_.end());

+ ScoredHistoryMatch match(hist_item, lower_string_, lower_terms_,

+ starts_pos->second, now_);

+ if (match.raw_score > 0)

+ scored_matches_.push_back(match);

+ }

}

-// InMemoryURLIndex's Private Data ---------------------------------------------

+// URLIndexPrivateData::HistoryItemFactorGreater -------------------------------

+URLIndexPrivateData::HistoryItemFactorGreater::HistoryItemFactorGreater(

+ const HistoryInfoMap& history_info_map)

+ : history_info_map_(history_info_map) {

+URLIndexPrivateData::HistoryItemFactorGreater::~HistoryItemFactorGreater() {}

+bool URLIndexPrivateData::HistoryItemFactorGreater::operator()(

+ const HistoryID h1,

+ const HistoryID h2) {

+ HistoryInfoMap::const_iterator entry1(history_info_map_.find(h1));

+ if (entry1 == history_info_map_.end())

+ return false;

+ HistoryInfoMap::const_iterator entry2(history_info_map_.find(h2));

+ if (entry2 == history_info_map_.end())

+ return true;

+ const URLRow& r1(entry1->second);

+ const URLRow& r2(entry2->second);

+ // First cut: typed count, visit count, recency.

+ // TODO(mrossetti): This is too simplistic. Consider an approach which ranks

+ // recently visited (within the last 12/24 hours) as highly important. Get

+ // input from mpearson.

+ if (r1.typed_count() != r2.typed_count())

+ return (r1.typed_count() > r2.typed_count());

+ if (r1.visit_count() != r2.visit_count())

+ return (r1.visit_count() > r2.visit_count());

+ return (r1.last_visit() > r2.last_visit());

+// URLIndexPrivateData Private Functions ---------------------------------------

+URLIndexPrivateData::URLIndexPrivateData(const URLIndexPrivateData& old_data)

+ : history_dir_(old_data.history_dir_),

+ languages_(old_data.languages_),

+ cache_enabled_(old_data.cache_enabled_),

+ shutdown_(old_data.IsShutdown()),

+ lock_(),

+ scheme_whitelist_(old_data.scheme_whitelist_),

+ pre_filter_item_count_(0),

+ post_filter_item_count_(0),

+ post_scoring_item_count_(0) {

+ cache_db_ = old_data.cache_db_;

+// Called only by unit tests.

URLIndexPrivateData::URLIndexPrivateData()

- : restored_cache_version_(0),

- saved_cache_version_(kCurrentCacheFileVersion),

+ : cache_enabled_(true),

+ shutdown_(false),

+ lock_(),

pre_filter_item_count_(0),

post_filter_item_count_(0),

post_scoring_item_count_(0) {

+ InitializeSchemeWhitelist(&scheme_whitelist_);

}

URLIndexPrivateData::~URLIndexPrivateData() {}

+bool URLIndexPrivateData::IsShutdown() const {

+ base::AutoLock lock(lock_);

+ return shutdown_;

sky 2012/08/22 19:42:34 nit: spacing.

void URLIndexPrivateData::Clear() {

word_list_.clear();

available_words_.clear();

@@ -97,48 +666,24 @@

word_starts_map_.clear();

}

-bool URLIndexPrivateData::Empty() const {

- return history_info_map_.empty();

-scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::Duplicate() const {

- scoped_refptr<URLIndexPrivateData> data_copy = new URLIndexPrivateData;

- data_copy->word_list_ = word_list_;

- data_copy->available_words_ = available_words_;

- data_copy->word_map_ = word_map_;

- data_copy->char_word_map_ = char_word_map_;

- data_copy->word_id_history_map_ = word_id_history_map_;

- data_copy->history_id_word_map_ = history_id_word_map_;

- data_copy->history_info_map_ = history_info_map_;

- data_copy->word_starts_map_ = word_starts_map_;

- return data_copy;

- // Not copied:

- // search_term_cache_

- // pre_filter_item_count_

- // post_filter_item_count_

- // post_scoring_item_count_

-};

// Cache Updating --------------------------------------------------------------

-bool URLIndexPrivateData::IndexRow(

- const URLRow& row,

- const std::string& languages,

- const std::set<std::string>& scheme_whitelist) {

+bool URLIndexPrivateData::IndexRow(const URLRow& row) {

const GURL& gurl(row.url());

// Index only URLs with a whitelisted scheme.

- if (!URLSchemeIsWhitelisted(gurl, scheme_whitelist))

+ if (!URLSchemeIsWhitelisted(gurl, scheme_whitelist_))

return false;

- URLID row_id = row.id();

// Strip out username and password before saving and indexing.

- string16 url(net::FormatUrl(gurl, languages,

+ string16 url(net::FormatUrl(gurl, languages_,

net::kFormatUrlOmitUsernamePassword,

net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS,

NULL, NULL, NULL));

+ URLID row_id = row.id();

HistoryID history_id = static_cast<HistoryID>(row_id);

+ DCHECK(history_info_map_.find(history_id) == history_info_map_.end());

DCHECK_LT(history_id, std::numeric_limits<HistoryID>::max());

// Add the row for quick lookup in the history info store.

@@ -147,22 +692,27 @@

new_row.set_typed_count(row.typed_count());

new_row.set_last_visit(row.last_visit());

new_row.set_title(row.title());

+ CacheTransaction cache_transaction(cache_enabled() ? cache_db_.get() : NULL);

history_info_map_[history_id] = new_row;

+ if (cache_enabled())

+ cache_db_->AddHistoryToURLs(history_id, row);

// Index the words contained in the URL and title of the row.

RowWordStarts word_starts;

- AddRowWordsToIndex(new_row, &word_starts, languages);

+ AddRowWordsToIndex(new_row, &word_starts);

word_starts_map_[history_id] = word_starts;

+ if (cache_enabled())

+ cache_db_->AddRowWordStarts(history_id, word_starts);

return true;

}

void URLIndexPrivateData::AddRowWordsToIndex(const URLRow& row,

- RowWordStarts* word_starts,

- const std::string& languages) {

+ RowWordStarts* word_starts) {

HistoryID history_id = static_cast<HistoryID>(row.id());

// Split URL into individual, unique words then add in the title words.

const GURL& gurl(row.url());

- string16 url(net::FormatUrl(gurl, languages,

+ string16 url(net::FormatUrl(gurl, languages_,

net::kFormatUrlOmitUsernamePassword,

net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS,

NULL, NULL, NULL));

@@ -195,9 +745,15 @@

HistoryID history_id) {

WordIDHistoryMap::iterator history_pos = word_id_history_map_.find(word_id);

DCHECK(history_pos != word_id_history_map_.end());

+ // There is no need to record changes to the word_id_history_map_ in the cache

+ // because it can be rebuilt from the history_id_word_map_'s table.

HistoryIDSet& history_id_set(history_pos->second);

history_id_set.insert(history_id);

AddToHistoryIDWordMap(history_id, word_id);

+ // Add word_id/history_id entry to the word_history table

+ if (cache_enabled())

+ cache_db_->AddHistoryToWordHistory(word_id, history_id);

}

void URLIndexPrivateData::AddWordHistory(const string16& term,

@@ -212,11 +768,19 @@

}

word_map_[term] = word_id;

+ // Add word_id/term entry to the words table;

+ if (cache_enabled())

+ cache_db_->AddWordToWords(word_id, term);

HistoryIDSet history_id_set;

history_id_set.insert(history_id);

word_id_history_map_[word_id] = history_id_set;

AddToHistoryIDWordMap(history_id, word_id);

+ // Add word_id/history_id entry to the word_history table

+ if (cache_enabled())

+ cache_db_->AddHistoryToWordHistory(word_id, history_id);

// For each character in the newly added word (i.e. a word that is not

// already in the word index), add the word to the character index.

Char16Set characters = Char16SetFromString16(term);

@@ -234,14 +798,20 @@

word_id_set.insert(word_id);

char_word_map_[uni_char] = word_id_set;

}

+ // Add uni_char/word_id entry to the char_words database table.

+ if (cache_enabled())

+ cache_db_->AddWordToCharWords(uni_char, word_id);

}

void URLIndexPrivateData::RemoveRowFromIndex(const URLRow& row) {

+ CacheTransaction cache_transaction(cache_enabled() ? cache_db_.get() : NULL);

RemoveRowWordsFromIndex(row);

HistoryID history_id = static_cast<HistoryID>(row.id());

history_info_map_.erase(history_id);

word_starts_map_.erase(history_id);

+ if (cache_enabled())

+ cache_db_->RemoveHistoryIDFromURLs(history_id);

}

void URLIndexPrivateData::RemoveRowWordsFromIndex(const URLRow& row) {

@@ -275,7 +845,11 @@

word_map_.erase(word);

word_list_[word_id] = string16();

available_words_.insert(word_id);

+ if (cache_enabled())

+ cache_db_->RemoveWordFromWords(word_id);

}

+ if (cache_enabled())

+ cache_db_->RemoveHistoryIDFromWordHistory(history_id);

}

void URLIndexPrivateData::AddToHistoryIDWordMap(HistoryID history_id,

@@ -291,257 +865,6 @@

}

-bool URLIndexPrivateData::UpdateURL(

- const URLRow& row,

- const std::string& languages,

- const std::set<std::string>& scheme_whitelist) {

- // The row may or may not already be in our index. If it is not already

- // indexed and it qualifies then it gets indexed. If it is already

- // indexed and still qualifies then it gets updated, otherwise it

- // is deleted from the index.

- bool row_was_updated = false;

- URLID row_id = row.id();

- HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id);

- if (row_pos == history_info_map_.end()) {

- // This new row should be indexed if it qualifies.

- URLRow new_row(row);

- new_row.set_id(row_id);

- row_was_updated = RowQualifiesAsSignificant(new_row, base::Time()) &&

- IndexRow(new_row, languages, scheme_whitelist);

- } else if (RowQualifiesAsSignificant(row, base::Time())) {

- // This indexed row still qualifies and will be re-indexed.

- // The url won't have changed but the title, visit count, etc.

- // might have changed.

- URLRow& row_to_update = row_pos->second;

- bool title_updated = row_to_update.title() != row.title();

- if (row_to_update.visit_count() != row.visit_count() ||

- row_to_update.typed_count() != row.typed_count() ||

- row_to_update.last_visit() != row.last_visit() || title_updated) {

- row_to_update.set_visit_count(row.visit_count());

- row_to_update.set_typed_count(row.typed_count());

- row_to_update.set_last_visit(row.last_visit());

- // While the URL is guaranteed to remain stable, the title may have

- // changed. If so, then update the index with the changed words.

- if (title_updated) {

- // Clear all words associated with this row and re-index both the

- // URL and title.

- RemoveRowWordsFromIndex(row_to_update);

- row_to_update.set_title(row.title());

- RowWordStarts word_starts;

- AddRowWordsToIndex(row_to_update, &word_starts, languages);

- word_starts_map_[row_id] = word_starts;

- }

- row_was_updated = true;

- }

- } else {

- // This indexed row no longer qualifies and will be de-indexed by

- // clearing all words associated with this row.

- RemoveRowFromIndex(row);

- row_was_updated = true;

- }

- if (row_was_updated)

- search_term_cache_.clear(); // This invalidates the cache.

- return row_was_updated;

-// Helper functor for DeleteURL.

-class HistoryInfoMapItemHasURL {

- public:

- explicit HistoryInfoMapItemHasURL(const GURL& url): url_(url) {}

- bool operator()(const std::pair<const HistoryID, URLRow>& item) {

- return item.second.url() == url_;

- }

- private:

- const GURL& url_;

-};

-bool URLIndexPrivateData::DeleteURL(const GURL& url) {

- // Find the matching entry in the history_info_map_.

- HistoryInfoMap::iterator pos = std::find_if(

- history_info_map_.begin(),

- history_info_map_.end(),

- HistoryInfoMapItemHasURL(url));

- if (pos == history_info_map_.end())

- return false;

- RemoveRowFromIndex(pos->second);

- search_term_cache_.clear(); // This invalidates the cache.

- return true;

-// URLIndexPrivateData::HistoryItemFactorGreater -------------------------------

-URLIndexPrivateData::HistoryItemFactorGreater::HistoryItemFactorGreater(

- const HistoryInfoMap& history_info_map)

- : history_info_map_(history_info_map) {

-URLIndexPrivateData::HistoryItemFactorGreater::~HistoryItemFactorGreater() {}

-bool URLIndexPrivateData::HistoryItemFactorGreater::operator()(

- const HistoryID h1,

- const HistoryID h2) {

- HistoryInfoMap::const_iterator entry1(history_info_map_.find(h1));

- if (entry1 == history_info_map_.end())

- return false;

- HistoryInfoMap::const_iterator entry2(history_info_map_.find(h2));

- if (entry2 == history_info_map_.end())

- return true;

- const URLRow& r1(entry1->second);

- const URLRow& r2(entry2->second);

- // First cut: typed count, visit count, recency.

- // TODO(mrossetti): This is too simplistic. Consider an approach which ranks

- // recently visited (within the last 12/24 hours) as highly important. Get

- // input from mpearson.

- if (r1.typed_count() != r2.typed_count())

- return (r1.typed_count() > r2.typed_count());

- if (r1.visit_count() != r2.visit_count())

- return (r1.visit_count() > r2.visit_count());

- return (r1.last_visit() > r2.last_visit());

-// Cache Searching -------------------------------------------------------------

-// NOTE: This is the main public search function.

-ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms(

- const string16& search_string) {

- pre_filter_item_count_ = 0;

- post_filter_item_count_ = 0;

- post_scoring_item_count_ = 0;

- // The search string we receive may contain escaped characters. For reducing

- // the index we need individual, lower-cased words, ignoring escapings. For

- // the final filtering we need whitespace separated substrings possibly

- // containing escaped characters.

- string16 lower_raw_string(base::i18n::ToLower(search_string));

- string16 lower_unescaped_string =

- net::UnescapeURLComponent(lower_raw_string,

- net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS);

- // Extract individual 'words' (as opposed to 'terms'; see below) from the

- // search string. When the user types "colspec=ID%20Mstone Release" we get

- // four 'words': "colspec", "id", "mstone" and "release".

- String16Vector lower_words(

- history::String16VectorFromString16(lower_unescaped_string, false, NULL));

- ScoredHistoryMatches scored_items;

- // Do nothing if we have indexed no words (probably because we've not been

- // initialized yet) or the search string has no words.

- if (word_list_.empty() || lower_words.empty()) {

- search_term_cache_.clear(); // Invalidate the term cache.

- return scored_items;

- }

- // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep

- // approach.

- ResetSearchTermCache();

- HistoryIDSet history_id_set = HistoryIDSetFromWords(lower_words);

- // Trim the candidate pool if it is large. Note that we do not filter out

- // items that do not contain the search terms as proper substrings -- doing

- // so is the performance-costly operation we are trying to avoid in order

- // to maintain omnibox responsiveness.

- const size_t kItemsToScoreLimit = 500;

- pre_filter_item_count_ = history_id_set.size();

- // If we trim the results set we do not want to cache the results for next

- // time as the user's ultimately desired result could easily be eliminated

- // in this early rough filter.

- bool was_trimmed = (pre_filter_item_count_ > kItemsToScoreLimit);

- if (was_trimmed) {

- HistoryIDVector history_ids;

- std::copy(history_id_set.begin(), history_id_set.end(),

- std::back_inserter(history_ids));

- // Trim down the set by sorting by typed-count, visit-count, and last

- // visit.

- HistoryItemFactorGreater

- item_factor_functor(history_info_map_);

- std::partial_sort(history_ids.begin(),

- history_ids.begin() + kItemsToScoreLimit,

- history_ids.end(),

- item_factor_functor);

- history_id_set.clear();

- std::copy(history_ids.begin(), history_ids.begin() + kItemsToScoreLimit,

- std::inserter(history_id_set, history_id_set.end()));

- post_filter_item_count_ = history_id_set.size();

- }

- // Pass over all of the candidates filtering out any without a proper

- // substring match, inserting those which pass in order by score. Note that

- // in this step we are using the raw search string complete with escaped

- // URL elements. When the user has specifically typed something akin to

- // "sort=pri&colspec=ID%20Mstone%20Release" we want to make sure that that

- // specific substring appears in the URL or page title.

- // We call these 'terms' (as opposed to 'words'; see above) as in this case

- // we only want to break up the search string on 'true' whitespace rather than

- // escaped whitespace. When the user types "colspec=ID%20Mstone Release" we

- // get two 'terms': "colspec=id%20mstone" and "release".

- history::String16Vector lower_raw_terms;

- Tokenize(lower_raw_string, kWhitespaceUTF16, &lower_raw_terms);

- scored_items = std::for_each(history_id_set.begin(), history_id_set.end(),

- AddHistoryMatch(*this, lower_raw_string,

- lower_raw_terms, base::Time::Now())).ScoredMatches();

- // Select and sort only the top kMaxMatches results.

- if (scored_items.size() > AutocompleteProvider::kMaxMatches) {

- std::partial_sort(scored_items.begin(),

- scored_items.begin() +

- AutocompleteProvider::kMaxMatches,

- scored_items.end(),

- ScoredHistoryMatch::MatchScoreGreater);

- scored_items.resize(AutocompleteProvider::kMaxMatches);

- } else {

- std::sort(scored_items.begin(), scored_items.end(),

- ScoredHistoryMatch::MatchScoreGreater);

- }

- post_scoring_item_count_ = scored_items.size();

- if (was_trimmed) {

- search_term_cache_.clear(); // Invalidate the term cache.

- } else {

- // Remove any stale SearchTermCacheItems.

- for (SearchTermCacheMap::iterator cache_iter = search_term_cache_.begin();

- cache_iter != search_term_cache_.end(); ) {

- if (!cache_iter->second.used_)

- search_term_cache_.erase(cache_iter++);

- else

- ++cache_iter;

- }

- return scored_items;

-// URLIndexPrivateData::AddHistoryMatch ----------------------------------------

-URLIndexPrivateData::AddHistoryMatch::AddHistoryMatch(

- const URLIndexPrivateData& private_data,

- const string16& lower_string,

- const String16Vector& lower_terms,

- const base::Time now)

- : private_data_(private_data),

- lower_string_(lower_string),

- lower_terms_(lower_terms),

- now_(now) {}

-URLIndexPrivateData::AddHistoryMatch::~AddHistoryMatch() {}

-void URLIndexPrivateData::AddHistoryMatch::operator()(

- const HistoryID history_id) {

- HistoryInfoMap::const_iterator hist_pos =

- private_data_.history_info_map_.find(history_id);

- if (hist_pos != private_data_.history_info_map_.end()) {

- const URLRow& hist_item = hist_pos->second;

- WordStartsMap::const_iterator starts_pos =

- private_data_.word_starts_map_.find(history_id);

- DCHECK(starts_pos != private_data_.word_starts_map_.end());

- ScoredHistoryMatch match(hist_item, lower_string_, lower_terms_,

- starts_pos->second, now_);

- if (match.raw_score > 0)

- scored_matches_.push_back(match);

- }

void URLIndexPrivateData::ResetSearchTermCache() {

for (SearchTermCacheMap::iterator iter = search_term_cache_.begin();

iter != search_term_cache_.end(); ++iter)

@@ -730,418 +1053,35 @@

return word_id_set;

}

-// Cache Saving ----------------------------------------------------------------

-// static

-void URLIndexPrivateData::WritePrivateDataToCacheFileTask(

- scoped_refptr<URLIndexPrivateData> private_data,

- const FilePath& file_path,

- scoped_refptr<RefCountedBool> succeeded) {

- DCHECK(private_data.get());

- DCHECK(!file_path.empty());

- succeeded->set_value(private_data->SaveToFile(file_path));

-bool URLIndexPrivateData::SaveToFile(const FilePath& file_path) {

- base::TimeTicks beginning_time = base::TimeTicks::Now();

- InMemoryURLIndexCacheItem index_cache;

- SavePrivateData(&index_cache);

- std::string data;

- if (!index_cache.SerializeToString(&data)) {

- LOG(WARNING) << "Failed to serialize the InMemoryURLIndex cache.";

+bool URLIndexPrivateData::RestoreFromCache(InMemoryURLCacheDatabase* cache_db) {

+ DCHECK(cache_db);

+ if (!cache_db->RestorePrivateData(this)) {

+ cache_db->Reset();

return false;

}

- int size = data.size();

- if (file_util::WriteFile(file_path, data.c_str(), size) != size) {

- LOG(WARNING) << "Failed to write " << file_path.value();

- return false;

- }

- UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexSaveCacheTime",

- base::TimeTicks::Now() - beginning_time);

- return true;

+ return !Empty() && ValidateConsistency();

}

-void URLIndexPrivateData::SavePrivateData(

- InMemoryURLIndexCacheItem* cache) const {

- DCHECK(cache);

- cache->set_timestamp(base::Time::Now().ToInternalValue());

- cache->set_version(saved_cache_version_);

- // history_item_count_ is no longer used but rather than change the protobuf

- // definition use a placeholder. This will go away with the switch to SQLite.

- cache->set_history_item_count(0);

- SaveWordList(cache);

- SaveWordMap(cache);

- SaveCharWordMap(cache);

- SaveWordIDHistoryMap(cache);

- SaveHistoryInfoMap(cache);

- SaveWordStartsMap(cache);

-void URLIndexPrivateData::SaveWordList(InMemoryURLIndexCacheItem* cache) const {

- if (word_list_.empty())

+void URLIndexPrivateData::DeleteOldVersionCacheFile() const {

+ if (history_dir_.empty())

return;

- WordListItem* list_item = cache->mutable_word_list();

- list_item->set_word_count(word_list_.size());

- for (String16Vector::const_iterator iter = word_list_.begin();

- iter != word_list_.end(); ++iter)

- list_item->add_word(UTF16ToUTF8(*iter));

+ FilePath path = history_dir_.Append(chrome::kHQPCacheFilename);

+ file_util::Delete(path, false);

}

-void URLIndexPrivateData::SaveWordMap(InMemoryURLIndexCacheItem* cache) const {

- if (word_map_.empty())

- return;

- WordMapItem* map_item = cache->mutable_word_map();

- map_item->set_item_count(word_map_.size());

- for (WordMap::const_iterator iter = word_map_.begin();

- iter != word_map_.end(); ++iter) {

- WordMapEntry* map_entry = map_item->add_word_map_entry();

- map_entry->set_word(UTF16ToUTF8(iter->first));

- map_entry->set_word_id(iter->second);

- }

-void URLIndexPrivateData::SaveCharWordMap(

- InMemoryURLIndexCacheItem* cache) const {

- if (char_word_map_.empty())

- return;

- CharWordMapItem* map_item = cache->mutable_char_word_map();

- map_item->set_item_count(char_word_map_.size());

- for (CharWordIDMap::const_iterator iter = char_word_map_.begin();

- iter != char_word_map_.end(); ++iter) {

- CharWordMapEntry* map_entry = map_item->add_char_word_map_entry();

- map_entry->set_char_16(iter->first);

- const WordIDSet& word_id_set(iter->second);

- map_entry->set_item_count(word_id_set.size());

- for (WordIDSet::const_iterator set_iter = word_id_set.begin();

- set_iter != word_id_set.end(); ++set_iter)

- map_entry->add_word_id(*set_iter);

- }

-void URLIndexPrivateData::SaveWordIDHistoryMap(

- InMemoryURLIndexCacheItem* cache) const {

- if (word_id_history_map_.empty())

- return;

- WordIDHistoryMapItem* map_item = cache->mutable_word_id_history_map();

- map_item->set_item_count(word_id_history_map_.size());

- for (WordIDHistoryMap::const_iterator iter = word_id_history_map_.begin();

- iter != word_id_history_map_.end(); ++iter) {

- WordIDHistoryMapEntry* map_entry =

- map_item->add_word_id_history_map_entry();

- map_entry->set_word_id(iter->first);

- const HistoryIDSet& history_id_set(iter->second);

- map_entry->set_item_count(history_id_set.size());

- for (HistoryIDSet::const_iterator set_iter = history_id_set.begin();

- set_iter != history_id_set.end(); ++set_iter)

- map_entry->add_history_id(*set_iter);

- }

-void URLIndexPrivateData::SaveHistoryInfoMap(

- InMemoryURLIndexCacheItem* cache) const {

- if (history_info_map_.empty())

- return;

- HistoryInfoMapItem* map_item = cache->mutable_history_info_map();

- map_item->set_item_count(history_info_map_.size());

- for (HistoryInfoMap::const_iterator iter = history_info_map_.begin();

- iter != history_info_map_.end(); ++iter) {

- HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry();

- map_entry->set_history_id(iter->first);

- const URLRow& url_row(iter->second);

- // Note: We only save information that contributes to the index so there

- // is no need to save search_term_cache_ (not persistent).

- map_entry->set_visit_count(url_row.visit_count());

- map_entry->set_typed_count(url_row.typed_count());

- map_entry->set_last_visit(url_row.last_visit().ToInternalValue());

- map_entry->set_url(url_row.url().spec());

- map_entry->set_title(UTF16ToUTF8(url_row.title()));

- }

-void URLIndexPrivateData::SaveWordStartsMap(

- InMemoryURLIndexCacheItem* cache) const {

- if (word_starts_map_.empty())

- return;

- // For unit testing: Enable saving of the cache as an earlier version to

- // allow testing of cache file upgrading in ReadFromFile().

- // TODO(mrossetti): Instead of intruding on production code with this kind of

- // test harness, save a copy of an older version cache with known results.

- // Implement this when switching the caching over to SQLite.

- if (saved_cache_version_ < 1)

- return;

- WordStartsMapItem* map_item = cache->mutable_word_starts_map();

- map_item->set_item_count(word_starts_map_.size());

- for (WordStartsMap::const_iterator iter = word_starts_map_.begin();

- iter != word_starts_map_.end(); ++iter) {

- WordStartsMapEntry* map_entry = map_item->add_word_starts_map_entry();

- map_entry->set_history_id(iter->first);

- const RowWordStarts& word_starts(iter->second);

- for (WordStarts::const_iterator i = word_starts.url_word_starts_.begin();

- i != word_starts.url_word_starts_.end(); ++i)

- map_entry->add_url_word_starts(*i);

- for (WordStarts::const_iterator i = word_starts.title_word_starts_.begin();

- i != word_starts.title_word_starts_.end(); ++i)

- map_entry->add_title_word_starts(*i);

- }

-// Cache Restoring -------------------------------------------------------------

-// static

-void URLIndexPrivateData::RestoreFromFileTask(

- const FilePath& file_path,

- scoped_refptr<URLIndexPrivateData> private_data,

- const std::string& languages) {

- private_data = URLIndexPrivateData::RestoreFromFile(file_path, languages);

-// static

-scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::RestoreFromFile(

- const FilePath& file_path,

- const std::string& languages) {

- base::TimeTicks beginning_time = base::TimeTicks::Now();

- if (!file_util::PathExists(file_path))

- return NULL;

- std::string data;

- // If there is no cache file then simply give up. This will cause us to

- // attempt to rebuild from the history database.

- if (!file_util::ReadFileToString(file_path, &data))

- return NULL;

- scoped_refptr<URLIndexPrivateData> restored_data(new URLIndexPrivateData);

- InMemoryURLIndexCacheItem index_cache;

- if (!index_cache.ParseFromArray(data.c_str(), data.size())) {

- LOG(WARNING) << "Failed to parse URLIndexPrivateData cache data read from "

- << file_path.value();

- return restored_data;

- }

- if (!restored_data->RestorePrivateData(index_cache, languages))

- return NULL;

- UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime",

- base::TimeTicks::Now() - beginning_time);

- UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems",

- restored_data->history_id_word_map_.size());

- UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size());

- UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords",

- restored_data->word_map_.size());

- UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars",

- restored_data->char_word_map_.size());

- if (restored_data->Empty())

- return NULL; // 'No data' is the same as a failed reload.

- return restored_data;

-// static

-scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::RebuildFromHistory(

- HistoryDatabase* history_db,

- const std::string& languages,

- const std::set<std::string>& scheme_whitelist) {

- if (!history_db)

- return NULL;

- base::TimeTicks beginning_time = base::TimeTicks::Now();

- scoped_refptr<URLIndexPrivateData> rebuilt_data(new URLIndexPrivateData);

- URLDatabase::URLEnumerator history_enum;

- if (!history_db->InitURLEnumeratorForSignificant(&history_enum))

- return NULL;

- for (URLRow row; history_enum.GetNextURL(&row); )

- rebuilt_data->IndexRow(row, languages, scheme_whitelist);

- UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime",

- base::TimeTicks::Now() - beginning_time);

- UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems",

- rebuilt_data->history_id_word_map_.size());

- UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords",

- rebuilt_data->word_map_.size());

- UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars",

- rebuilt_data->char_word_map_.size());

- return rebuilt_data;

-bool URLIndexPrivateData::RestorePrivateData(

- const InMemoryURLIndexCacheItem& cache,

- const std::string& languages) {

- if (cache.has_version())

- restored_cache_version_ = cache.version();

- return RestoreWordList(cache) && RestoreWordMap(cache) &&

- RestoreCharWordMap(cache) && RestoreWordIDHistoryMap(cache) &&

- RestoreHistoryInfoMap(cache) && RestoreWordStartsMap(cache, languages);

-bool URLIndexPrivateData::RestoreWordList(

- const InMemoryURLIndexCacheItem& cache) {

- if (!cache.has_word_list())

+bool URLIndexPrivateData::GetCacheDBPath(FilePath* file_path) {

+ DCHECK(file_path);

+ if (history_dir_.empty())

return false;

- const WordListItem& list_item(cache.word_list());

- uint32 expected_item_count = list_item.word_count();

- uint32 actual_item_count = list_item.word_size();

- if (actual_item_count == 0 || actual_item_count != expected_item_count)

- return false;

- const RepeatedPtrField<std::string>& words(list_item.word());

- for (RepeatedPtrField<std::string>::const_iterator iter = words.begin();

- iter != words.end(); ++iter)

- word_list_.push_back(UTF8ToUTF16(*iter));

+ *file_path = history_dir_.Append(chrome::kHQPCacheDBFilename);

return true;

}

-bool URLIndexPrivateData::RestoreWordMap(

- const InMemoryURLIndexCacheItem& cache) {

- if (!cache.has_word_map())

- return false;

- const WordMapItem& list_item(cache.word_map());

- uint32 expected_item_count = list_item.item_count();

- uint32 actual_item_count = list_item.word_map_entry_size();

- if (actual_item_count == 0 || actual_item_count != expected_item_count)

- return false;

- const RepeatedPtrField<WordMapEntry>& entries(list_item.word_map_entry());

- for (RepeatedPtrField<WordMapEntry>::const_iterator iter = entries.begin();

- iter != entries.end(); ++iter)

- word_map_[UTF8ToUTF16(iter->word())] = iter->word_id();

- return true;

+void URLIndexPrivateData::SetCacheDatabaseForTesting(

+ InMemoryURLCacheDatabase* test_db) {

+ cache_db_ = test_db;

}

-bool URLIndexPrivateData::RestoreCharWordMap(

- const InMemoryURLIndexCacheItem& cache) {

- if (!cache.has_char_word_map())

- return false;

- const CharWordMapItem& list_item(cache.char_word_map());

- uint32 expected_item_count = list_item.item_count();

- uint32 actual_item_count = list_item.char_word_map_entry_size();

- if (actual_item_count == 0 || actual_item_count != expected_item_count)

- return false;

- const RepeatedPtrField<CharWordMapEntry>&

- entries(list_item.char_word_map_entry());

- for (RepeatedPtrField<CharWordMapEntry>::const_iterator iter =

- entries.begin(); iter != entries.end(); ++iter) {

- expected_item_count = iter->item_count();

- actual_item_count = iter->word_id_size();

- if (actual_item_count == 0 || actual_item_count != expected_item_count)

- return false;

- char16 uni_char = static_cast<char16>(iter->char_16());

- WordIDSet word_id_set;

- const RepeatedField<int32>& word_ids(iter->word_id());

- for (RepeatedField<int32>::const_iterator jiter = word_ids.begin();

- jiter != word_ids.end(); ++jiter)

- word_id_set.insert(*jiter);

- char_word_map_[uni_char] = word_id_set;

- }

- return true;

-bool URLIndexPrivateData::RestoreWordIDHistoryMap(

- const InMemoryURLIndexCacheItem& cache) {

- if (!cache.has_word_id_history_map())

- return false;

- const WordIDHistoryMapItem& list_item(cache.word_id_history_map());

- uint32 expected_item_count = list_item.item_count();

- uint32 actual_item_count = list_item.word_id_history_map_entry_size();

- if (actual_item_count == 0 || actual_item_count != expected_item_count)

- return false;

- const RepeatedPtrField<WordIDHistoryMapEntry>&

- entries(list_item.word_id_history_map_entry());

- for (RepeatedPtrField<WordIDHistoryMapEntry>::const_iterator iter =

- entries.begin(); iter != entries.end(); ++iter) {

- expected_item_count = iter->item_count();

- actual_item_count = iter->history_id_size();

- if (actual_item_count == 0 || actual_item_count != expected_item_count)

- return false;

- WordID word_id = iter->word_id();

- HistoryIDSet history_id_set;

- const RepeatedField<int64>& history_ids(iter->history_id());

- for (RepeatedField<int64>::const_iterator jiter = history_ids.begin();

- jiter != history_ids.end(); ++jiter) {

- history_id_set.insert(*jiter);

- AddToHistoryIDWordMap(*jiter, word_id);

- }

- word_id_history_map_[word_id] = history_id_set;

- }

- return true;

-bool URLIndexPrivateData::RestoreHistoryInfoMap(

- const InMemoryURLIndexCacheItem& cache) {

- if (!cache.has_history_info_map())

- return false;

- const HistoryInfoMapItem& list_item(cache.history_info_map());

- uint32 expected_item_count = list_item.item_count();

- uint32 actual_item_count = list_item.history_info_map_entry_size();

- if (actual_item_count == 0 || actual_item_count != expected_item_count)

- return false;

- const RepeatedPtrField<HistoryInfoMapEntry>&

- entries(list_item.history_info_map_entry());

- for (RepeatedPtrField<HistoryInfoMapEntry>::const_iterator iter =

- entries.begin(); iter != entries.end(); ++iter) {

- HistoryID history_id = iter->history_id();

- GURL url(iter->url());

- URLRow url_row(url, history_id);

- url_row.set_visit_count(iter->visit_count());

- url_row.set_typed_count(iter->typed_count());

- url_row.set_last_visit(base::Time::FromInternalValue(iter->last_visit()));

- if (iter->has_title()) {

- string16 title(UTF8ToUTF16(iter->title()));

- url_row.set_title(title);

- }

- history_info_map_[history_id] = url_row;

- }

- return true;

-bool URLIndexPrivateData::RestoreWordStartsMap(

- const InMemoryURLIndexCacheItem& cache,

- const std::string& languages) {

- // Note that this function must be called after RestoreHistoryInfoMap() has

- // been run as the word starts may have to be recalculated from the urls and

- // page titles.

- if (cache.has_word_starts_map()) {

- const WordStartsMapItem& list_item(cache.word_starts_map());

- uint32 expected_item_count = list_item.item_count();

- uint32 actual_item_count = list_item.word_starts_map_entry_size();

- if (actual_item_count == 0 || actual_item_count != expected_item_count)

- return false;

- const RepeatedPtrField<WordStartsMapEntry>&

- entries(list_item.word_starts_map_entry());

- for (RepeatedPtrField<WordStartsMapEntry>::const_iterator iter =

- entries.begin(); iter != entries.end(); ++iter) {

- HistoryID history_id = iter->history_id();

- RowWordStarts word_starts;

- // Restore the URL word starts.

- const RepeatedField<int32>& url_starts(iter->url_word_starts());

- for (RepeatedField<int32>::const_iterator jiter = url_starts.begin();

- jiter != url_starts.end(); ++jiter)

- word_starts.url_word_starts_.push_back(*jiter);

- // Restore the page title word starts.

- const RepeatedField<int32>& title_starts(iter->title_word_starts());

- for (RepeatedField<int32>::const_iterator jiter = title_starts.begin();

- jiter != title_starts.end(); ++jiter)

- word_starts.title_word_starts_.push_back(*jiter);

- word_starts_map_[history_id] = word_starts;

- }

- } else {

- // Since the cache did not contain any word starts we must rebuild then from

- // the URL and page titles.

- for (HistoryInfoMap::const_iterator iter = history_info_map_.begin();

- iter != history_info_map_.end(); ++iter) {

- RowWordStarts word_starts;

- const URLRow& row(iter->second);

- string16 url(net::FormatUrl(row.url(), languages,

- net::kFormatUrlOmitUsernamePassword,

- net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS,

- NULL, NULL, NULL));

- url = base::i18n::ToLower(url);

- String16VectorFromString16(url, false, &word_starts.url_word_starts_);

- String16VectorFromString16(

- row.title(), false, &word_starts.title_word_starts_);

- word_starts_map_[iter->first] = word_starts;

- }

- return true;

// static

bool URLIndexPrivateData::URLSchemeIsWhitelisted(

const GURL& gurl,

« no previous file with comments | « chrome/browser/history/url_index_private_data.h ('k') | chrome/browser/history/url_index_private_data_unittest.cc » ('j') | no next file with comments »