chrome/browser/history/in_memory_url_index.cc - Issue 8120004: HQP Refactoring (in Preparation for SQLite Cache)

Unified Diff: chrome/browser/history/in_memory_url_index.cc

Issue 8120004: HQP Refactoring (in Preparation for SQLite Cache) (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/

Patch Set: '' Created 9 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« chrome/browser/history/history.cc ('K') | « chrome/browser/history/in_memory_url_index.h ('k') | chrome/browser/history/in_memory_url_index_types.h » ('j') | chrome/browser/history/in_memory_url_index_types.h » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: chrome/browser/history/in_memory_url_index.cc

===================================================================

--- chrome/browser/history/in_memory_url_index.cc (revision 103916)

+++ chrome/browser/history/in_memory_url_index.cc (working copy)

@@ -11,18 +11,20 @@

#include <numeric>

#include "base/file_util.h"

-#include "base/i18n/break_iterator.h"

#include "base/i18n/case_conversion.h"

#include "base/metrics/histogram.h"

-#include "base/string_util.h"

#include "base/threading/thread_restrictions.h"

#include "base/time.h"

#include "base/utf_string_conversions.h"

#include "chrome/browser/autocomplete/autocomplete.h"

#include "chrome/browser/autocomplete/history_provider_util.h"

+#include "chrome/browser/history/history_notifications.h"

#include "chrome/browser/history/url_database.h"

#include "chrome/browser/profiles/profile.h"

+#include "chrome/common/chrome_notification_types.h"

#include "chrome/common/url_constants.h"

+#include "content/common/notification_details.h"

+#include "content/common/notification_source.h"

#include "googleurl/src/url_parse.h"

#include "googleurl/src/url_util.h"

#include "net/base/escape.h"

@@ -59,23 +61,6 @@

// each of these scores for each factor.

const int kScoreRank[] = { 1425, 1200, 900, 400 };

-ScoredHistoryMatch::ScoredHistoryMatch()

- : raw_score(0),

- can_inline(false) {}

-ScoredHistoryMatch::ScoredHistoryMatch(const URLRow& url_info)

- : HistoryMatch(url_info, 0, false, false),

- raw_score(0),

- can_inline(false) {}

-ScoredHistoryMatch::~ScoredHistoryMatch() {}

-// Comparison function for sorting ScoredMatches by their scores.

-bool ScoredHistoryMatch::MatchScoreGreater(const ScoredHistoryMatch& m1,

- const ScoredHistoryMatch& m2) {

- return m1.raw_score >= m2.raw_score;

InMemoryURLIndex::SearchTermCacheItem::SearchTermCacheItem(

const WordIDSet& word_id_set,

const HistoryIDSet& history_id_set)

@@ -88,11 +73,6 @@

InMemoryURLIndex::SearchTermCacheItem::~SearchTermCacheItem() {}

-// Comparison function for sorting TermMatches by their offsets.

-bool MatchOffsetLess(const TermMatch& m1, const TermMatch& m2) {

- return m1.offset < m2.offset;

// Comparison function for sorting search terms by descending length.

bool LengthGreater(const string16& string_a, const string16& string_b) {

return string_a.length() > string_b.length();

@@ -135,15 +115,23 @@

return score;

}

-InMemoryURLIndex::InMemoryURLIndex(const FilePath& history_dir)

+InMemoryURLIndex::InMemoryURLIndex(Profile* profile,

+ const FilePath& history_dir)

: history_dir_(history_dir),

- history_item_count_(0) {

+ private_data_(new URLIndexPrivateData) {

InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_);

+ if (profile) {

+ Source<Profile> source(profile);

+ registrar_.Add(this, chrome::NOTIFICATION_HISTORY_URL_VISITED, source);

+ registrar_.Add(this, chrome::NOTIFICATION_HISTORY_TYPED_URLS_MODIFIED,

+ source);

+ registrar_.Add(this, chrome::NOTIFICATION_HISTORY_URLS_DELETED, source);

+ }

}

// Called only by unit tests.

InMemoryURLIndex::InMemoryURLIndex()

- : history_item_count_(0) {

+ : private_data_(new URLIndexPrivateData) {

InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_);

}

@@ -164,7 +152,7 @@

// Indexing

-bool InMemoryURLIndex::Init(history::URLDatabase* history_db,

+bool InMemoryURLIndex::Init(URLDatabase* history_db,

const std::string& languages) {

// TODO(mrossetti): Register for profile/language change notifications.

languages_ = languages;

@@ -172,10 +160,52 @@

}

void InMemoryURLIndex::ShutDown() {

+ registrar_.RemoveAll();

// Write our cache.

SaveToCacheFile();

}

+void InMemoryURLIndex::Observe(int type,

+ const NotificationSource& source,

+ const NotificationDetails& details) {

+ switch (type) {

+ case chrome::NOTIFICATION_HISTORY_URL_VISITED:

+ OnURLVisited(Details<URLVisitedDetails>(details).ptr());

+ break;

+ case chrome::NOTIFICATION_HISTORY_TYPED_URLS_MODIFIED:

+ OnURLsModified(Details<history::URLsModifiedDetails>(details).ptr());

+ break;

+ case chrome::NOTIFICATION_HISTORY_URLS_DELETED:

+ OnURLsDeleted(Details<history::URLsDeletedDetails>(details).ptr());

+ break;

+ default:

+ // For simplicity, the unit tests send us all notifications, even when

+ // we haven't registered for them, so don't assert here.

+ break;

+ }

+void InMemoryURLIndex::OnURLVisited(const URLVisitedDetails* details) {

+ UpdateURL(details->row);

+void InMemoryURLIndex::OnURLsModified(const URLsModifiedDetails* details) {

+ for (std::vector<history::URLRow>::const_iterator row =

+ details->changed_urls.begin();

+ row != details->changed_urls.end(); ++row)

+ UpdateURL(*row);

+void InMemoryURLIndex::OnURLsDeleted(const URLsDeletedDetails* details) {

+ if (details->all_history) {

+ ClearPrivateData();

+ } else {

+ for (std::vector<URLRow>::const_iterator row = details->rows.begin();

+ row != details->rows.end(); ++row)

+ DeleteURL(*row);

+ }

bool InMemoryURLIndex::IndexRow(const URLRow& row) {

const GURL& gurl(row.url());

@@ -183,26 +213,40 @@

if (!InMemoryURLIndex::URLSchemeIsWhitelisted(gurl))

return true;

+ URLID row_id = row.id();

+ // Strip out username and password before saving and indexing.

string16 url(net::FormatUrl(gurl, languages_,

net::kFormatUrlOmitUsernamePassword,

UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS,

NULL, NULL, NULL));

+ HistoryID history_id = static_cast<HistoryID>(row_id);

+ DCHECK_LT(history_id, std::numeric_limits<HistoryID>::max());

- HistoryID history_id = static_cast<HistoryID>(row.id());

- DCHECK_LT(row.id(), std::numeric_limits<HistoryID>::max());

// Add the row for quick lookup in the history info store.

- URLRow new_row(GURL(url), row.id());

+ URLRow new_row(GURL(url), row_id);

new_row.set_visit_count(row.visit_count());

new_row.set_typed_count(row.typed_count());

new_row.set_last_visit(row.last_visit());

new_row.set_title(row.title());

- history_info_map_[history_id] = new_row;

+ private_data_->history_info_map_[history_id] = new_row;

+ // Index the words contained in the URL and title of the row.

+ AddRowWordsToIndex(new_row);

+ ++(private_data_->history_item_count_);

+ return true;

+void InMemoryURLIndex::AddRowWordsToIndex(const URLRow& row) {

+ HistoryID history_id = static_cast<HistoryID>(row.id());

// Split URL into individual, unique words then add in the title words.

+ const GURL& gurl(row.url());

+ string16 url(net::FormatUrl(gurl, languages_,

+ net::kFormatUrlOmitUsernamePassword,

+ UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS,

+ NULL, NULL, NULL));

url = base::i18n::ToLower(url);

- String16Set url_words = WordSetFromString16(url);

- String16Set title_words = WordSetFromString16(row.title());

+ String16Set url_words = String16SetFromString16(url);

+ String16Set title_words = String16SetFromString16(row.title());

String16Set words;

std::set_union(url_words.begin(), url_words.end(),

title_words.begin(), title_words.end(),

@@ -211,10 +255,60 @@

word_iter != words.end(); ++word_iter)

AddWordToIndex(*word_iter, history_id);

- ++history_item_count_;

- return true;

+ search_term_cache_.clear(); // Invalidate the term cache.

}

+void InMemoryURLIndex::RemoveRowFromIndex(const URLRow& row) {

+ RemoveRowWordsFromIndex(row);

+ HistoryID history_id = static_cast<HistoryID>(row.id());

+ private_data_->history_info_map_.erase(history_id);

+void InMemoryURLIndex::RemoveRowWordsFromIndex(const URLRow& row) {

+ // Remove the entries in history_id_word_map_ and word_id_history_map_ for

+ // this row.

+ URLIndexPrivateData& private_data(*(private_data_.get()));

+ HistoryID history_id = static_cast<HistoryID>(row.id());

+ WordIDSet word_id_set = private_data.history_id_word_map_[history_id];

+ private_data.history_id_word_map_.erase(history_id);

+ // Reconcile any changes to word usage.

+ for (WordIDSet::iterator word_id_iter = word_id_set.begin();

+ word_id_iter != word_id_set.end(); ++word_id_iter) {

+ WordID word_id = *word_id_iter;

+ HistoryIDSet history_ids = private_data.word_id_history_map_[word_id];

Peter Kasting 2011/10/07 17:50:37 Why do you copy this out here, modify the copy, an

mrossetti 2011/10/07 22:24:37 Yeah, great catch! There used to be some more code

+ history_ids.erase(history_id);

+ if (!history_ids.empty()) {

+ // The word is still in use.

+ private_data.word_id_history_map_[word_id] = history_ids;

+ continue;

+ }

+ // The word is no longer in use. Reconcile any changes to character usage.

+ string16 word = private_data.word_list_[word_id];

+ Char16Set characters = Char16SetFromString16(word);

+ for (Char16Set::iterator uni_char_iter = characters.begin();

+ uni_char_iter != characters.end(); ++uni_char_iter) {

+ char16 uni_char = *uni_char_iter;

+ WordIDSet word_ids = private_data.char_word_map_[uni_char];

Peter Kasting 2011/10/07 17:50:37 Same comment.

mrossetti 2011/10/07 22:24:37 Done.

+ word_ids.erase(word_id);

+ if (!word_ids.empty()) {

+ // The character is still in use.

+ private_data.char_word_map_[uni_char] = word_ids;

+ continue;

+ }

+ // The character is no longer in use.

+ private_data.char_word_map_.erase(uni_char);

+ }

+ // Complete the removal of references to the word.

+ private_data.word_id_history_map_.erase(word_id);

+ private_data.word_map_.erase(word);

+ private_data.word_list_[word_id] = string16();

+ private_data.available_words_.insert(word_id);

+ }

bool InMemoryURLIndex::ReloadFromHistory(history::URLDatabase* history_db,

bool clear_cache) {

ClearPrivateData();

@@ -241,12 +335,7 @@

}

void InMemoryURLIndex::ClearPrivateData() {

- history_item_count_ = 0;

- word_list_.clear();

- word_map_.clear();

- char_word_map_.clear();

- word_id_history_map_.clear();

- history_info_map_.clear();

+ private_data_->Clear();

search_term_cache_.clear();

}

@@ -282,10 +371,13 @@

UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime",

base::TimeTicks::Now() - beginning_time);

- UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems", history_item_count_);

+ UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems",

+ private_data_->history_item_count_);

UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size());

- UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", word_map_.size());

- UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", char_word_map_.size());

+ UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords",

+ private_data_->word_map_.size());

+ UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars",

+ private_data_->char_word_map_.size());

return true;

}

@@ -317,13 +409,14 @@

return true;

}

-void InMemoryURLIndex::UpdateURL(URLID row_id, const URLRow& row) {

+void InMemoryURLIndex::UpdateURL(const URLRow& row) {

// The row may or may not already be in our index. If it is not already

// indexed and it qualifies then it gets indexed. If it is already

// indexed and still qualifies then it gets updated, otherwise it

// is deleted from the index.

- HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id);

- if (row_pos == history_info_map_.end()) {

+ HistoryInfoMap::iterator row_pos =

+ private_data_->history_info_map_.find(row.id());

+ if (row_pos == private_data_->history_info_map_.end()) {

// This new row should be indexed if it qualifies.

if (RowQualifiesAsSignificant(row, base::Time()))

IndexRow(row);

@@ -335,25 +428,26 @@

old_row.set_visit_count(row.visit_count());

old_row.set_typed_count(row.typed_count());

old_row.set_last_visit(row.last_visit());

- // TODO(mrossetti): When we start indexing the title the next line

- // will need attention.

- old_row.set_title(row.title());

+ // While the URL is guaranteed to remain stable, the title may have change.

Peter Kasting 2011/10/07 17:50:37 Nit: change -> changed

mrossetti 2011/10/07 22:24:37 Done.

+ // If so, then we need to update the index with the changed words.

+ if (old_row.title() != row.title()) {

+ // Clear all words associated with this row and re-index both the

+ // URL and title.

+ RemoveRowWordsFromIndex(row);

+ old_row.set_title(row.title());

+ AddRowWordsToIndex(old_row);

+ }

} else {

- // This indexed row no longer qualifies and will be de-indexed.

- history_info_map_.erase(row_id);

+ // This indexed row no longer qualifies and will be de-indexed by

+ // clearing all words associated with this row.

+ RemoveRowFromIndex(row);

}

// This invalidates the cache.

search_term_cache_.clear();

- // TODO(mrossetti): Record this transaction in the cache.

}

-void InMemoryURLIndex::DeleteURL(URLID row_id) {

- // Note that this does not remove any reference to this row from the

- // word_id_history_map_. That map will continue to contain (and return)

- // hits against this row until that map is rebuilt, but since the

- // history_info_map_ no longer references the row no erroneous results

- // will propagate to the user.

- history_info_map_.erase(row_id);

+void InMemoryURLIndex::DeleteURL(const URLRow& row) {

+ RemoveRowFromIndex(row);

// This invalidates the word cache.

search_term_cache_.clear();

// TODO(mrossetti): Record this transaction in the cache.

@@ -364,6 +458,12 @@

ScoredHistoryMatches InMemoryURLIndex::HistoryItemsForTerms(

const String16Vector& terms) {

ScoredHistoryMatches scored_items;

+ // Do nothing if we have indexed no words (probably because we've not been

+ // initialized yet).

+ if (private_data_->word_list_.empty())

+ return scored_items;

if (!terms.empty()) {

// Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep

// approach.

@@ -421,7 +521,7 @@

iter->second.used_ = false;

}

-InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDSetFromWords(

+HistoryIDSet InMemoryURLIndex::HistoryIDSetFromWords(

const string16& uni_string) {

// Break the terms down into individual terms (words), get the candidate

// set for each term, and intersect each to get a final candidate list.

@@ -429,7 +529,7 @@

// a string like "http://www.somewebsite.com" which, from our perspective,

// is four words: 'http', 'www', 'somewebsite', and 'com'.

HistoryIDSet history_id_set;

- String16Vector terms = WordVectorFromString16(uni_string, true);

+ String16Vector terms = String16VectorFromString16(uni_string, true);

// Sort the terms into the longest first as such are likely to narrow down

// the results quicker. Also, single character terms are the most expensive

// to process so save them for last.

@@ -456,7 +556,7 @@

return history_id_set;

}

-InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm(

+HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm(

const string16& term) {

if (term.empty())

return HistoryIDSet();

@@ -466,7 +566,7 @@

// occuring words in the user's searches.

size_t term_length = term.length();

- InMemoryURLIndex::WordIDSet word_id_set;

+ WordIDSet word_id_set;

if (term_length > 1) {

// See if this term or a prefix thereof is present in the cache.

SearchTermCacheMap::iterator best_prefix(search_term_cache_.end());

@@ -512,7 +612,8 @@

// Reduce the word set with any leftover, unprocessed characters.

if (!unique_chars.empty()) {

- WordIDSet leftover_set(WordIDSetForTermChars(unique_chars));

+ WordIDSet leftover_set(

+ private_data_->WordIDSetForTermChars(unique_chars));

// We might come up empty on the leftovers.

if (leftover_set.empty()) {

search_term_cache_[term] = SearchTermCacheItem();

@@ -535,13 +636,15 @@

// contains words which do not have the search term as a proper subset.

for (WordIDSet::iterator word_set_iter = word_id_set.begin();

word_set_iter != word_id_set.end(); ) {

- if (word_list_[*word_set_iter].find(term) == string16::npos)

+ if (private_data_->word_list_[*word_set_iter].find(term) ==

+ string16::npos)

word_id_set.erase(word_set_iter++);

else

++word_set_iter;

}

} else {

- word_id_set = WordIDSetForTermChars(Char16SetFromString16(term));

+ word_id_set =

+ private_data_->WordIDSetForTermChars(Char16SetFromString16(term));

}

// If any words resulted then we can compose a set of history IDs by unioning

@@ -551,8 +654,9 @@

for (WordIDSet::iterator word_id_iter = word_id_set.begin();

word_id_iter != word_id_set.end(); ++word_id_iter) {

WordID word_id = *word_id_iter;

- WordIDHistoryMap::iterator word_iter = word_id_history_map_.find(word_id);

- if (word_iter != word_id_history_map_.end()) {

+ WordIDHistoryMap::iterator word_iter =

+ private_data_->word_id_history_map_.find(word_id);

+ if (word_iter != private_data_->word_id_history_map_.end()) {

HistoryIDSet& word_history_id_set(word_iter->second);

history_id_set.insert(word_history_id_set.begin(),

word_history_id_set.end());

@@ -570,85 +674,53 @@

// Utility Functions

-// static

-InMemoryURLIndex::String16Set InMemoryURLIndex::WordSetFromString16(

- const string16& uni_string) {

- const size_t kMaxWordLength = 64;

- String16Vector words = WordVectorFromString16(uni_string, false);

- String16Set word_set;

- for (String16Vector::const_iterator iter = words.begin(); iter != words.end();

- ++iter)

- word_set.insert(base::i18n::ToLower(*iter).substr(0, kMaxWordLength));

- return word_set;

-// static

-InMemoryURLIndex::String16Vector InMemoryURLIndex::WordVectorFromString16(

- const string16& uni_string,

- bool break_on_space) {

- base::i18n::BreakIterator iter(

- uni_string,

- break_on_space ? base::i18n::BreakIterator::BREAK_SPACE

- : base::i18n::BreakIterator::BREAK_WORD);

- String16Vector words;

- if (!iter.Init())

- return words;

- while (iter.Advance()) {

- if (break_on_space || iter.IsWord()) {

- string16 word = iter.GetString();

- if (break_on_space)

- TrimWhitespace(word, TRIM_ALL, &word);

- if (!word.empty())

- words.push_back(word);

- }

- return words;

-// static

-InMemoryURLIndex::Char16Set InMemoryURLIndex::Char16SetFromString16(

- const string16& term) {

- Char16Set characters;

- for (string16::const_iterator iter = term.begin(); iter != term.end();

- ++iter)

- characters.insert(*iter);

- return characters;

void InMemoryURLIndex::AddWordToIndex(const string16& term,

HistoryID history_id) {

- WordMap::iterator word_pos = word_map_.find(term);

- if (word_pos != word_map_.end())

+ WordMap::iterator word_pos = private_data_->word_map_.find(term);

+ if (word_pos != private_data_->word_map_.end())

UpdateWordHistory(word_pos->second, history_id);

else

AddWordHistory(term, history_id);

}

void InMemoryURLIndex::UpdateWordHistory(WordID word_id, HistoryID history_id) {

- WordIDHistoryMap::iterator history_pos = word_id_history_map_.find(word_id);

- DCHECK(history_pos != word_id_history_map_.end());

- HistoryIDSet& history_id_set(history_pos->second);

- history_id_set.insert(history_id);

+ WordIDHistoryMap::iterator history_pos =

+ private_data_->word_id_history_map_.find(word_id);

+ DCHECK(history_pos != private_data_->word_id_history_map_.end());

+ HistoryIDSet& history_id_set(history_pos->second);

+ history_id_set.insert(history_id);

+ private_data_->AddToHistoryIDWordMap(history_id, word_id);

}

// Add a new word to the word list and the word map, and then create a

// new entry in the word/history map.

void InMemoryURLIndex::AddWordHistory(const string16& term,

HistoryID history_id) {

- word_list_.push_back(term);

- WordID word_id = word_list_.size() - 1;

- word_map_[term] = word_id;

+ URLIndexPrivateData& private_data(*(private_data_.get()));

+ WordID word_id = private_data.word_list_.size();

+ if (private_data.available_words_.empty()) {

+ private_data.word_list_.push_back(term);

+ } else {

+ word_id = *(private_data.available_words_.begin());

+ private_data.word_list_[word_id] = term;

+ private_data.available_words_.erase(word_id);

+ }

+ private_data.word_map_[term] = word_id;

HistoryIDSet history_id_set;

history_id_set.insert(history_id);

- word_id_history_map_[word_id] = history_id_set;

+ private_data.word_id_history_map_[word_id] = history_id_set;

+ private_data_->AddToHistoryIDWordMap(history_id, word_id);

// For each character in the newly added word (i.e. a word that is not

// already in the word index), add the word to the character index.

Char16Set characters = Char16SetFromString16(term);

for (Char16Set::iterator uni_char_iter = characters.begin();

uni_char_iter != characters.end(); ++uni_char_iter) {

char16 uni_char = *uni_char_iter;

- CharWordIDMap::iterator char_iter = char_word_map_.find(uni_char);

- if (char_iter != char_word_map_.end()) {

+ CharWordIDMap::iterator char_iter =

+ private_data.char_word_map_.find(uni_char);

+ if (char_iter != private_data.char_word_map_.end()) {

// Update existing entry in the char/word index.

WordIDSet& word_id_set(char_iter->second);

word_id_set.insert(word_id);

@@ -656,108 +728,13 @@

// Create a new entry in the char/word index.

WordIDSet word_id_set;

word_id_set.insert(word_id);

- char_word_map_[uni_char] = word_id_set;

+ private_data.char_word_map_[uni_char] = word_id_set;

}

-InMemoryURLIndex::WordIDSet InMemoryURLIndex::WordIDSetForTermChars(

- const Char16Set& term_chars) {

- WordIDSet word_id_set;

- for (Char16Set::const_iterator c_iter = term_chars.begin();

- c_iter != term_chars.end(); ++c_iter) {

- CharWordIDMap::iterator char_iter = char_word_map_.find(*c_iter);

- if (char_iter == char_word_map_.end()) {

- // A character was not found so there are no matching results: bail.

- word_id_set.clear();

- break;

- }

- WordIDSet& char_word_id_set(char_iter->second);

- // It is possible for there to no longer be any words associated with

- // a particular character. Give up in that case.

- if (char_word_id_set.empty()) {

- word_id_set.clear();

- break;

- }

- if (c_iter == term_chars.begin()) {

- // First character results becomes base set of results.

- word_id_set = char_word_id_set;

- } else {

- // Subsequent character results get intersected in.

- WordIDSet new_word_id_set;

- std::set_intersection(word_id_set.begin(), word_id_set.end(),

- char_word_id_set.begin(), char_word_id_set.end(),

- std::inserter(new_word_id_set,

- new_word_id_set.begin()));

- word_id_set.swap(new_word_id_set);

- }

- return word_id_set;

// static

-TermMatches InMemoryURLIndex::MatchTermInString(const string16& term,

- const string16& string,

- int term_num) {

- const size_t kMaxCompareLength = 2048;

- const string16& short_string = (string.length() > kMaxCompareLength) ?

- string.substr(0, kMaxCompareLength) : string;

- TermMatches matches;

- for (size_t location = short_string.find(term); location != string16::npos;

- location = short_string.find(term, location + 1)) {

- matches.push_back(TermMatch(term_num, location, term.length()));

- }

- return matches;

-// static

-TermMatches InMemoryURLIndex::SortAndDeoverlap(const TermMatches& matches) {

- if (matches.empty())

- return matches;

- TermMatches sorted_matches = matches;

- std::sort(sorted_matches.begin(), sorted_matches.end(), MatchOffsetLess);

- TermMatches clean_matches;

- TermMatch last_match = sorted_matches[0];

- clean_matches.push_back(last_match);

- for (TermMatches::const_iterator iter = sorted_matches.begin() + 1;

- iter != sorted_matches.end(); ++iter) {

- if (iter->offset >= last_match.offset + last_match.length) {

- last_match = *iter;

- clean_matches.push_back(last_match);

- }

- return clean_matches;

-// static

-std::vector<size_t> InMemoryURLIndex::OffsetsFromTermMatches(

- const TermMatches& matches) {

- std::vector<size_t> offsets;

- for (TermMatches::const_iterator i = matches.begin(); i != matches.end(); ++i)

- offsets.push_back(i->offset);

- return offsets;

-// static

-TermMatches InMemoryURLIndex::ReplaceOffsetsInTermMatches(

- const TermMatches& matches,

- const std::vector<size_t>& offsets) {

- DCHECK_EQ(matches.size(), offsets.size());

- TermMatches new_matches;

- std::vector<size_t>::const_iterator offset_iter = offsets.begin();

- for (TermMatches::const_iterator term_iter = matches.begin();

- term_iter != matches.end(); ++term_iter, ++offset_iter) {

- if (*offset_iter != string16::npos) {

- TermMatch new_match(*term_iter);

- new_match.offset = *offset_iter;

- new_matches.push_back(new_match);

- }

- return new_matches;

-// static

+// TODO(mrossetti): This can be made a ctor for ScoredHistoryMatch.

ScoredHistoryMatch InMemoryURLIndex::ScoredMatchForURL(

const URLRow& row,

const String16Vector& terms) {

@@ -786,8 +763,8 @@

}

// Sort matches by offset and eliminate any which overlap.

- match.url_matches = SortAndDeoverlap(match.url_matches);

- match.title_matches = SortAndDeoverlap(match.title_matches);

+ match.url_matches = SortAndDeoverlapMatches(match.url_matches);

+ match.title_matches = SortAndDeoverlapMatches(match.title_matches);

// We should not (currently) inline autocomplete a result unless both of the

// following are true:

@@ -839,7 +816,6 @@

for (int i = 0; i < kSignificantFactors; ++i)

match.raw_score += factor[i];

match.raw_score /= kSignificantFactors;

return match;

}

@@ -901,19 +877,17 @@

const InMemoryURLIndex& index,

const String16Vector& lower_terms)

: index_(index),

- lower_terms_(lower_terms) {

+ lower_terms_(lower_terms) {}

InMemoryURLIndex::AddHistoryMatch::~AddHistoryMatch() {}

-void InMemoryURLIndex::AddHistoryMatch::operator()(

- const InMemoryURLIndex::HistoryID history_id) {

+void InMemoryURLIndex::AddHistoryMatch::operator()(const HistoryID history_id) {

HistoryInfoMap::const_iterator hist_pos =

- index_.history_info_map_.find(history_id);

+ index_.private_data_->history_info_map_.find(history_id);

// Note that a history_id may be present in the word_id_history_map_ yet not

// be found in the history_info_map_. This occurs when an item has been

// deleted by the user or the item no longer qualifies as a quick result.

- if (hist_pos != index_.history_info_map_.end()) {

+ if (hist_pos != index_.private_data_->history_info_map_.end()) {

const URLRow& hist_item = hist_pos->second;

ScoredHistoryMatch match(ScoredMatchForURL(hist_item, lower_terms_));

if (match.raw_score > 0)

@@ -935,7 +909,7 @@

void InMemoryURLIndex::SavePrivateData(InMemoryURLIndexCacheItem* cache) const {

DCHECK(cache);

cache->set_timestamp(base::Time::Now().ToInternalValue());

- cache->set_history_item_count(history_item_count_);

+ cache->set_history_item_count(private_data_->history_item_count_);

SaveWordList(cache);

SaveWordMap(cache);

SaveCharWordMap(cache);

@@ -946,20 +920,19 @@

bool InMemoryURLIndex::RestorePrivateData(

const InMemoryURLIndexCacheItem& cache) {

last_saved_ = base::Time::FromInternalValue(cache.timestamp());

- history_item_count_ = cache.history_item_count();

- return (history_item_count_ == 0) || (RestoreWordList(cache) &&

+ private_data_->history_item_count_ = cache.history_item_count();

+ return (private_data_->history_item_count_ == 0) || (RestoreWordList(cache) &&

RestoreWordMap(cache) && RestoreCharWordMap(cache) &&

RestoreWordIDHistoryMap(cache) && RestoreHistoryInfoMap(cache));

}

void InMemoryURLIndex::SaveWordList(InMemoryURLIndexCacheItem* cache) const {

- if (word_list_.empty())

+ if (private_data_->word_list_.empty())

return;

WordListItem* list_item = cache->mutable_word_list();

- list_item->set_word_count(word_list_.size());

- for (String16Vector::const_iterator iter = word_list_.begin();

- iter != word_list_.end(); ++iter)

+ list_item->set_word_count(private_data_->word_list_.size());

+ for (String16Vector::const_iterator iter = private_data_->word_list_.begin();

+ iter != private_data_->word_list_.end(); ++iter)

list_item->add_word(UTF16ToUTF8(*iter));

}

@@ -974,17 +947,17 @@

const RepeatedPtrField<std::string>& words(list_item.word());

for (RepeatedPtrField<std::string>::const_iterator iter = words.begin();

iter != words.end(); ++iter)

- word_list_.push_back(UTF8ToUTF16(*iter));

+ private_data_->word_list_.push_back(UTF8ToUTF16(*iter));

return true;

}

void InMemoryURLIndex::SaveWordMap(InMemoryURLIndexCacheItem* cache) const {

- if (word_map_.empty())

+ if (private_data_->word_map_.empty())

return;

WordMapItem* map_item = cache->mutable_word_map();

- map_item->set_item_count(word_map_.size());

- for (WordMap::const_iterator iter = word_map_.begin();

- iter != word_map_.end(); ++iter) {

+ map_item->set_item_count(private_data_->word_map_.size());

+ for (WordMap::const_iterator iter = private_data_->word_map_.begin();

+ iter != private_data_->word_map_.end(); ++iter) {

WordMapEntry* map_entry = map_item->add_word_map_entry();

map_entry->set_word(UTF16ToUTF8(iter->first));

map_entry->set_word_id(iter->second);

@@ -1002,17 +975,18 @@

const RepeatedPtrField<WordMapEntry>& entries(list_item.word_map_entry());

for (RepeatedPtrField<WordMapEntry>::const_iterator iter = entries.begin();

iter != entries.end(); ++iter)

- word_map_[UTF8ToUTF16(iter->word())] = iter->word_id();

+ private_data_->word_map_[UTF8ToUTF16(iter->word())] = iter->word_id();

return true;

}

void InMemoryURLIndex::SaveCharWordMap(InMemoryURLIndexCacheItem* cache) const {

- if (char_word_map_.empty())

+ if (private_data_->char_word_map_.empty())

return;

CharWordMapItem* map_item = cache->mutable_char_word_map();

- map_item->set_item_count(char_word_map_.size());

- for (CharWordIDMap::const_iterator iter = char_word_map_.begin();

- iter != char_word_map_.end(); ++iter) {

+ map_item->set_item_count(private_data_->char_word_map_.size());

+ for (CharWordIDMap::const_iterator iter =

+ private_data_->char_word_map_.begin();

+ iter != private_data_->char_word_map_.end(); ++iter) {

CharWordMapEntry* map_entry = map_item->add_char_word_map_entry();

map_entry->set_char_16(iter->first);

const WordIDSet& word_id_set(iter->second);

@@ -1046,19 +1020,20 @@

for (RepeatedField<int32>::const_iterator jiter = word_ids.begin();

jiter != word_ids.end(); ++jiter)

word_id_set.insert(*jiter);

- char_word_map_[uni_char] = word_id_set;

+ private_data_->char_word_map_[uni_char] = word_id_set;

}

return true;

}

void InMemoryURLIndex::SaveWordIDHistoryMap(InMemoryURLIndexCacheItem* cache)

const {

- if (word_id_history_map_.empty())

+ if (private_data_->word_id_history_map_.empty())

return;

WordIDHistoryMapItem* map_item = cache->mutable_word_id_history_map();

- map_item->set_item_count(word_id_history_map_.size());

- for (WordIDHistoryMap::const_iterator iter = word_id_history_map_.begin();

- iter != word_id_history_map_.end(); ++iter) {

+ map_item->set_item_count(private_data_->word_id_history_map_.size());

+ for (WordIDHistoryMap::const_iterator iter =

+ private_data_->word_id_history_map_.begin();

+ iter != private_data_->word_id_history_map_.end(); ++iter) {

WordIDHistoryMapEntry* map_entry =

map_item->add_word_id_history_map_entry();

map_entry->set_word_id(iter->first);

@@ -1091,21 +1066,24 @@

HistoryIDSet history_id_set;

const RepeatedField<int64>& history_ids(iter->history_id());

for (RepeatedField<int64>::const_iterator jiter = history_ids.begin();

- jiter != history_ids.end(); ++jiter)

+ jiter != history_ids.end(); ++jiter) {

history_id_set.insert(*jiter);

- word_id_history_map_[word_id] = history_id_set;

+ private_data_->AddToHistoryIDWordMap(*jiter, word_id);

+ }

+ private_data_->word_id_history_map_[word_id] = history_id_set;

}

return true;

}

void InMemoryURLIndex::SaveHistoryInfoMap(

InMemoryURLIndexCacheItem* cache) const {

- if (history_info_map_.empty())

+ if (private_data_->history_info_map_.empty())

return;

HistoryInfoMapItem* map_item = cache->mutable_history_info_map();

- map_item->set_item_count(history_info_map_.size());

- for (HistoryInfoMap::const_iterator iter = history_info_map_.begin();

- iter != history_info_map_.end(); ++iter) {

+ map_item->set_item_count(private_data_->history_info_map_.size());

+ for (HistoryInfoMap::const_iterator iter =

+ private_data_->history_info_map_.begin();

+ iter != private_data_->history_info_map_.end(); ++iter) {

HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry();

map_entry->set_history_id(iter->first);

const URLRow& url_row(iter->second);

@@ -1143,7 +1121,7 @@

string16 title(UTF8ToUTF16(iter->title()));

url_row.set_title(title);

}

- history_info_map_[history_id] = url_row;

+ private_data_->history_info_map_[history_id] = url_row;

}

return true;

}