Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(49)

Side by Side Diff: chrome/browser/history/in_memory_url_index.cc

Issue 8359019: Create Private Data for InMemoryURLIndex (in Preparation for SQLite Cache) (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/
Patch Set: '' Created 9 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/history/in_memory_url_index.h" 5 #include "chrome/browser/history/in_memory_url_index.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <functional> 8 #include <functional>
9 #include <iterator> 9 #include <iterator>
10 #include <limits> 10 #include <limits>
11 #include <numeric> 11 #include <numeric>
12 12
13 #include "base/file_util.h" 13 #include "base/file_util.h"
14 #include "base/i18n/break_iterator.h"
15 #include "base/i18n/case_conversion.h" 14 #include "base/i18n/case_conversion.h"
16 #include "base/metrics/histogram.h" 15 #include "base/metrics/histogram.h"
17 #include "base/string_util.h" 16 #include "base/string_util.h"
18 #include "base/threading/thread_restrictions.h" 17 #include "base/threading/thread_restrictions.h"
19 #include "base/time.h" 18 #include "base/time.h"
20 #include "base/utf_string_conversions.h" 19 #include "base/utf_string_conversions.h"
21 #include "chrome/browser/autocomplete/autocomplete.h" 20 #include "chrome/browser/autocomplete/autocomplete.h"
22 #include "chrome/browser/autocomplete/history_provider_util.h" 21 #include "chrome/browser/autocomplete/history_provider_util.h"
23 #include "chrome/browser/history/url_database.h" 22 #include "chrome/browser/history/url_database.h"
24 #include "chrome/browser/profiles/profile.h" 23 #include "chrome/browser/profiles/profile.h"
(...skipping 27 matching lines...) Expand all
52 HistoryInfoMapEntry; 51 HistoryInfoMapEntry;
53 52
54 const size_t InMemoryURLIndex::kNoCachedResultForTerm = -1; 53 const size_t InMemoryURLIndex::kNoCachedResultForTerm = -1;
55 54
56 // Score ranges used to get a 'base' score for each of the scoring factors 55 // Score ranges used to get a 'base' score for each of the scoring factors
57 // (such as recency of last visit, times visited, times the URL was typed, 56 // (such as recency of last visit, times visited, times the URL was typed,
58 // and the quality of the string match). There is a matching value range for 57 // and the quality of the string match). There is a matching value range for
59 // each of these scores for each factor. 58 // each of these scores for each factor.
60 const int kScoreRank[] = { 1425, 1200, 900, 400 }; 59 const int kScoreRank[] = { 1425, 1200, 900, 400 };
61 60
62 ScoredHistoryMatch::ScoredHistoryMatch()
63 : raw_score(0),
64 can_inline(false) {}
65
66 ScoredHistoryMatch::ScoredHistoryMatch(const URLRow& url_info)
67 : HistoryMatch(url_info, 0, false, false),
68 raw_score(0),
69 can_inline(false) {}
70
71 ScoredHistoryMatch::~ScoredHistoryMatch() {}
72
73 // Comparison function for sorting ScoredMatches by their scores.
74 bool ScoredHistoryMatch::MatchScoreGreater(const ScoredHistoryMatch& m1,
75 const ScoredHistoryMatch& m2) {
76 return m1.raw_score >= m2.raw_score;
77 }
78
79 InMemoryURLIndex::SearchTermCacheItem::SearchTermCacheItem( 61 InMemoryURLIndex::SearchTermCacheItem::SearchTermCacheItem(
80 const WordIDSet& word_id_set, 62 const WordIDSet& word_id_set,
81 const HistoryIDSet& history_id_set) 63 const HistoryIDSet& history_id_set)
82 : word_id_set_(word_id_set), 64 : word_id_set_(word_id_set),
83 history_id_set_(history_id_set), 65 history_id_set_(history_id_set),
84 used_(true) {} 66 used_(true) {}
85 67
86 InMemoryURLIndex::SearchTermCacheItem::SearchTermCacheItem() 68 InMemoryURLIndex::SearchTermCacheItem::SearchTermCacheItem()
87 : used_(true) {} 69 : used_(true) {}
88 70
89 InMemoryURLIndex::SearchTermCacheItem::~SearchTermCacheItem() {} 71 InMemoryURLIndex::SearchTermCacheItem::~SearchTermCacheItem() {}
90 72
91 // Comparison function for sorting TermMatches by their offsets.
92 bool MatchOffsetLess(const TermMatch& m1, const TermMatch& m2) {
93 return m1.offset < m2.offset;
94 }
95
96 // Comparison function for sorting search terms by descending length. 73 // Comparison function for sorting search terms by descending length.
97 bool LengthGreater(const string16& string_a, const string16& string_b) { 74 bool LengthGreater(const string16& string_a, const string16& string_b) {
98 return string_a.length() > string_b.length(); 75 return string_a.length() > string_b.length();
99 } 76 }
100 77
101 // std::accumulate helper function to add up TermMatches' lengths. 78 // std::accumulate helper function to add up TermMatches' lengths.
102 int AccumulateMatchLength(int total, const TermMatch& match) { 79 int AccumulateMatchLength(int total, const TermMatch& match) {
103 return total + match.length; 80 return total + match.length;
104 } 81 }
105 82
(...skipping 24 matching lines...) Expand all
130 if (i > 0) { 107 if (i > 0) {
131 score += (value - value_ranks[i]) * 108 score += (value - value_ranks[i]) *
132 (kScoreRank[i - 1] - kScoreRank[i]) / 109 (kScoreRank[i - 1] - kScoreRank[i]) /
133 (value_ranks[i - 1] - value_ranks[i]); 110 (value_ranks[i - 1] - value_ranks[i]);
134 } 111 }
135 return score; 112 return score;
136 } 113 }
137 114
138 InMemoryURLIndex::InMemoryURLIndex(const FilePath& history_dir) 115 InMemoryURLIndex::InMemoryURLIndex(const FilePath& history_dir)
139 : history_dir_(history_dir), 116 : history_dir_(history_dir),
140 history_item_count_(0), 117 private_data_(new URLIndexPrivateData),
141 cached_at_shutdown_(false) { 118 cached_at_shutdown_(false) {
142 InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_); 119 InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_);
143 } 120 }
144 121
145 // Called only by unit tests. 122 // Called only by unit tests.
146 InMemoryURLIndex::InMemoryURLIndex() 123 InMemoryURLIndex::InMemoryURLIndex()
147 : history_item_count_(0), 124 : private_data_(new URLIndexPrivateData),
148 cached_at_shutdown_(false) { 125 cached_at_shutdown_(false) {
149 InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_); 126 InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_);
150 } 127 }
151 128
152 InMemoryURLIndex::~InMemoryURLIndex() { 129 InMemoryURLIndex::~InMemoryURLIndex() {
153 // If there was a history directory (which there won't be for some unit tests) 130 // If there was a history directory (which there won't be for some unit tests)
154 // then insure that the cache has already been saved. 131 // then insure that the cache has already been saved.
155 DCHECK(history_dir_.empty() || cached_at_shutdown_); 132 DCHECK(history_dir_.empty() || cached_at_shutdown_);
156 } 133 }
157 134
158 // static 135 // static
159 void InMemoryURLIndex::InitializeSchemeWhitelist( 136 void InMemoryURLIndex::InitializeSchemeWhitelist(
160 std::set<std::string>* whitelist) { 137 std::set<std::string>* whitelist) {
161 DCHECK(whitelist); 138 DCHECK(whitelist);
162 whitelist->insert(std::string(chrome::kAboutScheme)); 139 whitelist->insert(std::string(chrome::kAboutScheme));
163 whitelist->insert(std::string(chrome::kChromeUIScheme)); 140 whitelist->insert(std::string(chrome::kChromeUIScheme));
164 whitelist->insert(std::string(chrome::kFileScheme)); 141 whitelist->insert(std::string(chrome::kFileScheme));
165 whitelist->insert(std::string(chrome::kFtpScheme)); 142 whitelist->insert(std::string(chrome::kFtpScheme));
166 whitelist->insert(std::string(chrome::kHttpScheme)); 143 whitelist->insert(std::string(chrome::kHttpScheme));
167 whitelist->insert(std::string(chrome::kHttpsScheme)); 144 whitelist->insert(std::string(chrome::kHttpsScheme));
168 whitelist->insert(std::string(chrome::kMailToScheme)); 145 whitelist->insert(std::string(chrome::kMailToScheme));
169 } 146 }
170 147
171 // Indexing 148 // Indexing
172 149
173 bool InMemoryURLIndex::Init(history::URLDatabase* history_db, 150 bool InMemoryURLIndex::Init(URLDatabase* history_db,
174 const std::string& languages) { 151 const std::string& languages) {
175 // TODO(mrossetti): Register for profile/language change notifications. 152 // TODO(mrossetti): Register for profile/language change notifications.
176 languages_ = languages; 153 languages_ = languages;
177 return ReloadFromHistory(history_db, false); 154 return ReloadFromHistory(history_db, false);
178 } 155 }
179 156
180 void InMemoryURLIndex::ShutDown() { 157 void InMemoryURLIndex::ShutDown() {
181 // Write our cache. 158 // Write our cache.
182 SaveToCacheFile(); 159 SaveToCacheFile();
183 cached_at_shutdown_ = true; 160 cached_at_shutdown_ = true;
184 } 161 }
185 162
186 bool InMemoryURLIndex::IndexRow(const URLRow& row) { 163 void InMemoryURLIndex::IndexRow(const URLRow& row) {
187 const GURL& gurl(row.url()); 164 const GURL& gurl(row.url());
188 165
189 // Index only URLs with a whitelisted scheme. 166 // Index only URLs with a whitelisted scheme.
190 if (!InMemoryURLIndex::URLSchemeIsWhitelisted(gurl)) 167 if (!InMemoryURLIndex::URLSchemeIsWhitelisted(gurl))
191 return true; 168 return;
192 169
170 URLID row_id = row.id();
171 // Strip out username and password before saving and indexing.
193 string16 url(net::FormatUrl(gurl, languages_, 172 string16 url(net::FormatUrl(gurl, languages_,
194 net::kFormatUrlOmitUsernamePassword, 173 net::kFormatUrlOmitUsernamePassword,
195 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS, 174 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS,
196 NULL, NULL, NULL)); 175 NULL, NULL, NULL));
197 176
198 HistoryID history_id = static_cast<HistoryID>(row.id()); 177 HistoryID history_id = static_cast<HistoryID>(row_id);
199 DCHECK_LT(row.id(), std::numeric_limits<HistoryID>::max()); 178 DCHECK_LT(history_id, std::numeric_limits<HistoryID>::max());
200 179
201 // Add the row for quick lookup in the history info store. 180 // Add the row for quick lookup in the history info store.
202 URLRow new_row(GURL(url), row.id()); 181 URLRow new_row(GURL(url), row_id);
203 new_row.set_visit_count(row.visit_count()); 182 new_row.set_visit_count(row.visit_count());
204 new_row.set_typed_count(row.typed_count()); 183 new_row.set_typed_count(row.typed_count());
205 new_row.set_last_visit(row.last_visit()); 184 new_row.set_last_visit(row.last_visit());
206 new_row.set_title(row.title()); 185 new_row.set_title(row.title());
207 history_info_map_[history_id] = new_row; 186 private_data_->history_info_map_[history_id] = new_row;
208 187
188 // Index the words contained in the URL and title of the row.
189 AddRowWordsToIndex(new_row);
190 return;
191 }
192
193 void InMemoryURLIndex::AddRowWordsToIndex(const URLRow& row) {
194 HistoryID history_id = static_cast<HistoryID>(row.id());
209 // Split URL into individual, unique words then add in the title words. 195 // Split URL into individual, unique words then add in the title words.
196 const GURL& gurl(row.url());
197 string16 url(net::FormatUrl(gurl, languages_,
198 net::kFormatUrlOmitUsernamePassword,
199 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS,
200 NULL, NULL, NULL));
210 url = base::i18n::ToLower(url); 201 url = base::i18n::ToLower(url);
211 String16Set url_words = WordSetFromString16(url); 202 String16Set url_words = String16SetFromString16(url);
212 String16Set title_words = WordSetFromString16(row.title()); 203 String16Set title_words = String16SetFromString16(row.title());
213 String16Set words; 204 String16Set words;
214 std::set_union(url_words.begin(), url_words.end(), 205 std::set_union(url_words.begin(), url_words.end(),
215 title_words.begin(), title_words.end(), 206 title_words.begin(), title_words.end(),
216 std::insert_iterator<String16Set>(words, words.begin())); 207 std::insert_iterator<String16Set>(words, words.begin()));
217 for (String16Set::iterator word_iter = words.begin(); 208 for (String16Set::iterator word_iter = words.begin();
218 word_iter != words.end(); ++word_iter) 209 word_iter != words.end(); ++word_iter)
219 AddWordToIndex(*word_iter, history_id); 210 AddWordToIndex(*word_iter, history_id);
220 211
221 ++history_item_count_; 212 search_term_cache_.clear(); // Invalidate the term cache.
222 return true; 213 }
214
215 void InMemoryURLIndex::RemoveRowFromIndex(const URLRow& row) {
216 RemoveRowWordsFromIndex(row);
217 HistoryID history_id = static_cast<HistoryID>(row.id());
218 private_data_->history_info_map_.erase(history_id);
219 }
220
221 void InMemoryURLIndex::RemoveRowWordsFromIndex(const URLRow& row) {
222 // Remove the entries in history_id_word_map_ and word_id_history_map_ for
223 // this row.
224 URLIndexPrivateData& private_data(*(private_data_.get()));
225 HistoryID history_id = static_cast<HistoryID>(row.id());
226 WordIDSet word_id_set = private_data.history_id_word_map_[history_id];
227 private_data.history_id_word_map_.erase(history_id);
228
229 // Reconcile any changes to word usage.
230 for (WordIDSet::iterator word_id_iter = word_id_set.begin();
231 word_id_iter != word_id_set.end(); ++word_id_iter) {
232 WordID word_id = *word_id_iter;
233 private_data.word_id_history_map_[word_id].erase(history_id);
234 if (!private_data.word_id_history_map_[word_id].empty())
235 continue; // The word is still in use.
236
237 // The word is no longer in use. Reconcile any changes to character usage.
238 string16 word = private_data.word_list_[word_id];
239 Char16Set characters = Char16SetFromString16(word);
240 for (Char16Set::iterator uni_char_iter = characters.begin();
241 uni_char_iter != characters.end(); ++uni_char_iter) {
242 char16 uni_char = *uni_char_iter;
243 private_data.char_word_map_[uni_char].erase(word_id);
244 if (private_data.char_word_map_[uni_char].empty())
245 private_data.char_word_map_.erase(uni_char); // No longer in use.
246 }
247
248 // Complete the removal of references to the word.
249 private_data.word_id_history_map_.erase(word_id);
250 private_data.word_map_.erase(word);
251 private_data.word_list_[word_id] = string16();
252 private_data.available_words_.insert(word_id);
253 }
223 } 254 }
224 255
225 bool InMemoryURLIndex::ReloadFromHistory(history::URLDatabase* history_db, 256 bool InMemoryURLIndex::ReloadFromHistory(history::URLDatabase* history_db,
226 bool clear_cache) { 257 bool clear_cache) {
227 ClearPrivateData(); 258 ClearPrivateData();
228 259
229 if (!history_db) 260 if (!history_db)
230 return false; 261 return false;
231 262
232 if (clear_cache || !RestoreFromCacheFile()) { 263 if (clear_cache || !RestoreFromCacheFile()) {
233 base::TimeTicks beginning_time = base::TimeTicks::Now(); 264 base::TimeTicks beginning_time = base::TimeTicks::Now();
234 // The index has to be built from scratch. 265 // The index has to be built from scratch.
235 URLDatabase::URLEnumerator history_enum; 266 URLDatabase::URLEnumerator history_enum;
236 if (!history_db->InitURLEnumeratorForSignificant(&history_enum)) 267 if (!history_db->InitURLEnumeratorForSignificant(&history_enum))
237 return false; 268 return false;
238 URLRow row; 269 URLRow row;
239 while (history_enum.GetNextURL(&row)) { 270 while (history_enum.GetNextURL(&row))
240 if (!IndexRow(row)) 271 IndexRow(row);
241 return false;
242 }
243 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime", 272 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime",
244 base::TimeTicks::Now() - beginning_time); 273 base::TimeTicks::Now() - beginning_time);
245 SaveToCacheFile(); 274 SaveToCacheFile();
246 } 275 }
247 return true; 276 return true;
248 } 277 }
249 278
250 void InMemoryURLIndex::ClearPrivateData() { 279 void InMemoryURLIndex::ClearPrivateData() {
251 history_item_count_ = 0; 280 private_data_->Clear();
252 word_list_.clear();
253 word_map_.clear();
254 char_word_map_.clear();
255 word_id_history_map_.clear();
256 history_info_map_.clear();
257 search_term_cache_.clear(); 281 search_term_cache_.clear();
258 } 282 }
259 283
260 bool InMemoryURLIndex::RestoreFromCacheFile() { 284 bool InMemoryURLIndex::RestoreFromCacheFile() {
261 // TODO(mrossetti): Figure out how to determine if the cache is up-to-date. 285 // TODO(mrossetti): Figure out how to determine if the cache is up-to-date.
262 // That is: ensure that the database has not been modified since the cache 286 // That is: ensure that the database has not been modified since the cache
263 // was last saved. DB file modification date is inadequate. There are no 287 // was last saved. DB file modification date is inadequate. There are no
264 // SQLite table checksums automatically stored. 288 // SQLite table checksums automatically stored.
265 // FIXME(mrossetti): Move File IO to another thread. 289 // FIXME(mrossetti): Move File IO to another thread.
266 base::ThreadRestrictions::ScopedAllowIO allow_io; 290 base::ThreadRestrictions::ScopedAllowIO allow_io;
(...skipping 15 matching lines...) Expand all
282 return false; 306 return false;
283 } 307 }
284 308
285 if (!RestorePrivateData(index_cache)) { 309 if (!RestorePrivateData(index_cache)) {
286 ClearPrivateData(); // Back to square one -- must build from scratch. 310 ClearPrivateData(); // Back to square one -- must build from scratch.
287 return false; 311 return false;
288 } 312 }
289 313
290 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime", 314 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime",
291 base::TimeTicks::Now() - beginning_time); 315 base::TimeTicks::Now() - beginning_time);
292 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems", history_item_count_); 316 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems",
317 private_data_->history_id_word_map_.size());
293 UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size()); 318 UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size());
294 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", word_map_.size()); 319 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords",
295 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", char_word_map_.size()); 320 private_data_->word_map_.size());
321 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars",
322 private_data_->char_word_map_.size());
296 return true; 323 return true;
297 } 324 }
298 325
299 bool InMemoryURLIndex::SaveToCacheFile() { 326 bool InMemoryURLIndex::SaveToCacheFile() {
300 // TODO(mrossetti): Move File IO to another thread. 327 // TODO(mrossetti): Move File IO to another thread.
301 base::ThreadRestrictions::ScopedAllowIO allow_io; 328 base::ThreadRestrictions::ScopedAllowIO allow_io;
302 FilePath file_path; 329 FilePath file_path;
303 if (!GetCacheFilePath(&file_path)) 330 if (!GetCacheFilePath(&file_path))
304 return false; 331 return false;
305 332
(...skipping 14 matching lines...) Expand all
320 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexSaveCacheTime", 347 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexSaveCacheTime",
321 base::TimeTicks::Now() - beginning_time); 348 base::TimeTicks::Now() - beginning_time);
322 return true; 349 return true;
323 } 350 }
324 351
325 void InMemoryURLIndex::UpdateURL(URLID row_id, const URLRow& row) { 352 void InMemoryURLIndex::UpdateURL(URLID row_id, const URLRow& row) {
326 // The row may or may not already be in our index. If it is not already 353 // The row may or may not already be in our index. If it is not already
327 // indexed and it qualifies then it gets indexed. If it is already 354 // indexed and it qualifies then it gets indexed. If it is already
328 // indexed and still qualifies then it gets updated, otherwise it 355 // indexed and still qualifies then it gets updated, otherwise it
329 // is deleted from the index. 356 // is deleted from the index.
330 HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id); 357 HistoryInfoMap::iterator row_pos =
331 if (row_pos == history_info_map_.end()) { 358 private_data_->history_info_map_.find(row_id);
359 if (row_pos == private_data_->history_info_map_.end()) {
332 // This new row should be indexed if it qualifies. 360 // This new row should be indexed if it qualifies.
333 if (RowQualifiesAsSignificant(row, base::Time())) 361 URLRow new_row(row);
334 IndexRow(row); 362 new_row.set_id(row_id);
363 if (RowQualifiesAsSignificant(new_row, base::Time()))
364 IndexRow(new_row);
335 } else if (RowQualifiesAsSignificant(row, base::Time())) { 365 } else if (RowQualifiesAsSignificant(row, base::Time())) {
336 // This indexed row still qualifies and will be re-indexed. 366 // This indexed row still qualifies and will be re-indexed.
337 // The url won't have changed but the title, visit count, etc. 367 // The url won't have changed but the title, visit count, etc.
338 // might have changed. 368 // might have changed.
339 URLRow& old_row = row_pos->second; 369 URLRow& updated_row = row_pos->second;
340 old_row.set_visit_count(row.visit_count()); 370 updated_row.set_visit_count(row.visit_count());
341 old_row.set_typed_count(row.typed_count()); 371 updated_row.set_typed_count(row.typed_count());
342 old_row.set_last_visit(row.last_visit()); 372 updated_row.set_last_visit(row.last_visit());
343 // TODO(mrossetti): When we start indexing the title the next line 373 // While the URL is guaranteed to remain stable, the title may have changed.
344 // will need attention. 374 // If so, then we need to update the index with the changed words.
345 old_row.set_title(row.title()); 375 if (updated_row.title() != row.title()) {
376 // Clear all words associated with this row and re-index both the
377 // URL and title.
378 RemoveRowWordsFromIndex(updated_row);
379 updated_row.set_title(row.title());
380 AddRowWordsToIndex(updated_row);
381 }
346 } else { 382 } else {
347 // This indexed row no longer qualifies and will be de-indexed. 383 // This indexed row no longer qualifies and will be de-indexed by
348 history_info_map_.erase(row_id); 384 // clearing all words associated with this row.
385 URLRow& removed_row = row_pos->second;
386 RemoveRowFromIndex(removed_row);
349 } 387 }
350 // This invalidates the cache. 388 // This invalidates the cache.
351 search_term_cache_.clear(); 389 search_term_cache_.clear();
352 // TODO(mrossetti): Record this transaction in the cache.
353 } 390 }
354 391
355 void InMemoryURLIndex::DeleteURL(URLID row_id) { 392 void InMemoryURLIndex::DeleteURL(URLID row_id) {
356 // Note that this does not remove any reference to this row from the 393 // Note that this does not remove any reference to this row from the
357 // word_id_history_map_. That map will continue to contain (and return) 394 // word_id_history_map_. That map will continue to contain (and return)
358 // hits against this row until that map is rebuilt, but since the 395 // hits against this row until that map is rebuilt, but since the
359 // history_info_map_ no longer references the row no erroneous results 396 // history_info_map_ no longer references the row no erroneous results
360 // will propagate to the user. 397 // will propagate to the user.
361 history_info_map_.erase(row_id); 398 private_data_->history_info_map_.erase(row_id);
362 // This invalidates the word cache. 399 // This invalidates the word cache.
363 search_term_cache_.clear(); 400 search_term_cache_.clear();
364 // TODO(mrossetti): Record this transaction in the cache.
365 } 401 }
366 402
367 // Searching 403 // Searching
368 404
369 ScoredHistoryMatches InMemoryURLIndex::HistoryItemsForTerms( 405 ScoredHistoryMatches InMemoryURLIndex::HistoryItemsForTerms(
370 const String16Vector& terms) { 406 const String16Vector& terms) {
371 ScoredHistoryMatches scored_items; 407 ScoredHistoryMatches scored_items;
408
409 // Do nothing if we have indexed no words (probably because we've not been
410 // initialized yet).
411 if (private_data_->word_list_.empty())
412 return scored_items;
413
372 if (!terms.empty()) { 414 if (!terms.empty()) {
373 // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep 415 // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep
374 // approach. 416 // approach.
375 ResetSearchTermCache(); 417 ResetSearchTermCache();
376 418
377 // Lowercase the terms. 419 // Lowercase the terms.
378 // TODO(mrossetti): Another opportunity for a transform algorithm. 420 // TODO(mrossetti): Another opportunity for a transform algorithm.
379 String16Vector lower_terms; 421 String16Vector lower_terms;
380 for (String16Vector::const_iterator term_iter = terms.begin(); 422 for (String16Vector::const_iterator term_iter = terms.begin();
381 term_iter != terms.end(); ++term_iter) 423 term_iter != terms.end(); ++term_iter)
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
419 461
420 return scored_items; 462 return scored_items;
421 } 463 }
422 464
423 void InMemoryURLIndex::ResetSearchTermCache() { 465 void InMemoryURLIndex::ResetSearchTermCache() {
424 for (SearchTermCacheMap::iterator iter = search_term_cache_.begin(); 466 for (SearchTermCacheMap::iterator iter = search_term_cache_.begin();
425 iter != search_term_cache_.end(); ++iter) 467 iter != search_term_cache_.end(); ++iter)
426 iter->second.used_ = false; 468 iter->second.used_ = false;
427 } 469 }
428 470
429 InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDSetFromWords( 471 HistoryIDSet InMemoryURLIndex::HistoryIDSetFromWords(
430 const string16& uni_string) { 472 const string16& uni_string) {
431 // Break the terms down into individual terms (words), get the candidate 473 // Break the terms down into individual terms (words), get the candidate
432 // set for each term, and intersect each to get a final candidate list. 474 // set for each term, and intersect each to get a final candidate list.
433 // Note that a single 'term' from the user's perspective might be 475 // Note that a single 'term' from the user's perspective might be
434 // a string like "http://www.somewebsite.com" which, from our perspective, 476 // a string like "http://www.somewebsite.com" which, from our perspective,
435 // is four words: 'http', 'www', 'somewebsite', and 'com'. 477 // is four words: 'http', 'www', 'somewebsite', and 'com'.
436 HistoryIDSet history_id_set; 478 HistoryIDSet history_id_set;
437 String16Vector terms = WordVectorFromString16(uni_string, true); 479 String16Vector terms = String16VectorFromString16(uni_string, true);
438 // Sort the terms into the longest first as such are likely to narrow down 480 // Sort the terms into the longest first as such are likely to narrow down
439 // the results quicker. Also, single character terms are the most expensive 481 // the results quicker. Also, single character terms are the most expensive
440 // to process so save them for last. 482 // to process so save them for last.
441 std::sort(terms.begin(), terms.end(), LengthGreater); 483 std::sort(terms.begin(), terms.end(), LengthGreater);
442 for (String16Vector::iterator iter = terms.begin(); iter != terms.end(); 484 for (String16Vector::iterator iter = terms.begin(); iter != terms.end();
443 ++iter) { 485 ++iter) {
444 string16 uni_word = *iter; 486 string16 uni_word = *iter;
445 HistoryIDSet term_history_set = HistoryIDsForTerm(uni_word); 487 HistoryIDSet term_history_set = HistoryIDsForTerm(uni_word);
446 if (term_history_set.empty()) { 488 if (term_history_set.empty()) {
447 history_id_set.clear(); 489 history_id_set.clear();
448 break; 490 break;
449 } 491 }
450 if (iter == terms.begin()) { 492 if (iter == terms.begin()) {
451 history_id_set.swap(term_history_set); 493 history_id_set.swap(term_history_set);
452 } else { 494 } else {
453 HistoryIDSet new_history_id_set; 495 HistoryIDSet new_history_id_set;
454 std::set_intersection(history_id_set.begin(), history_id_set.end(), 496 std::set_intersection(history_id_set.begin(), history_id_set.end(),
455 term_history_set.begin(), term_history_set.end(), 497 term_history_set.begin(), term_history_set.end(),
456 std::inserter(new_history_id_set, 498 std::inserter(new_history_id_set,
457 new_history_id_set.begin())); 499 new_history_id_set.begin()));
458 history_id_set.swap(new_history_id_set); 500 history_id_set.swap(new_history_id_set);
459 } 501 }
460 } 502 }
461 return history_id_set; 503 return history_id_set;
462 } 504 }
463 505
464 InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm( 506 HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm(
465 const string16& term) { 507 const string16& term) {
466 if (term.empty()) 508 if (term.empty())
467 return HistoryIDSet(); 509 return HistoryIDSet();
468 510
469 // TODO(mrossetti): Consider optimizing for very common terms such as 511 // TODO(mrossetti): Consider optimizing for very common terms such as
470 // 'http[s]', 'www', 'com', etc. Or collect the top 100 more frequently 512 // 'http[s]', 'www', 'com', etc. Or collect the top 100 more frequently
471 // occuring words in the user's searches. 513 // occuring words in the user's searches.
472 514
473 size_t term_length = term.length(); 515 size_t term_length = term.length();
474 InMemoryURLIndex::WordIDSet word_id_set; 516 WordIDSet word_id_set;
475 if (term_length > 1) { 517 if (term_length > 1) {
476 // See if this term or a prefix thereof is present in the cache. 518 // See if this term or a prefix thereof is present in the cache.
477 SearchTermCacheMap::iterator best_prefix(search_term_cache_.end()); 519 SearchTermCacheMap::iterator best_prefix(search_term_cache_.end());
478 for (SearchTermCacheMap::iterator cache_iter = search_term_cache_.begin(); 520 for (SearchTermCacheMap::iterator cache_iter = search_term_cache_.begin();
479 cache_iter != search_term_cache_.end(); ++cache_iter) { 521 cache_iter != search_term_cache_.end(); ++cache_iter) {
480 if (StartsWith(term, cache_iter->first, false) && 522 if (StartsWith(term, cache_iter->first, false) &&
481 (best_prefix == search_term_cache_.end() || 523 (best_prefix == search_term_cache_.end() ||
482 cache_iter->first.length() > best_prefix->first.length())) 524 cache_iter->first.length() > best_prefix->first.length()))
483 best_prefix = cache_iter; 525 best_prefix = cache_iter;
484 } 526 }
(...skipping 25 matching lines...) Expand all
510 552
511 // Filter for each remaining, unique character in the term. 553 // Filter for each remaining, unique character in the term.
512 Char16Set leftover_chars = Char16SetFromString16(leftovers); 554 Char16Set leftover_chars = Char16SetFromString16(leftovers);
513 Char16Set unique_chars; 555 Char16Set unique_chars;
514 std::set_difference(leftover_chars.begin(), leftover_chars.end(), 556 std::set_difference(leftover_chars.begin(), leftover_chars.end(),
515 prefix_chars.begin(), prefix_chars.end(), 557 prefix_chars.begin(), prefix_chars.end(),
516 std::inserter(unique_chars, unique_chars.begin())); 558 std::inserter(unique_chars, unique_chars.begin()));
517 559
518 // Reduce the word set with any leftover, unprocessed characters. 560 // Reduce the word set with any leftover, unprocessed characters.
519 if (!unique_chars.empty()) { 561 if (!unique_chars.empty()) {
520 WordIDSet leftover_set(WordIDSetForTermChars(unique_chars)); 562 WordIDSet leftover_set(
563 private_data_->WordIDSetForTermChars(unique_chars));
521 // We might come up empty on the leftovers. 564 // We might come up empty on the leftovers.
522 if (leftover_set.empty()) { 565 if (leftover_set.empty()) {
523 search_term_cache_[term] = SearchTermCacheItem(); 566 search_term_cache_[term] = SearchTermCacheItem();
524 return HistoryIDSet(); 567 return HistoryIDSet();
525 } 568 }
526 // Or there may not have been a prefix from which to start. 569 // Or there may not have been a prefix from which to start.
527 if (prefix_chars.empty()) { 570 if (prefix_chars.empty()) {
528 word_id_set.swap(leftover_set); 571 word_id_set.swap(leftover_set);
529 } else { 572 } else {
530 WordIDSet new_word_id_set; 573 WordIDSet new_word_id_set;
531 std::set_intersection(word_id_set.begin(), word_id_set.end(), 574 std::set_intersection(word_id_set.begin(), word_id_set.end(),
532 leftover_set.begin(), leftover_set.end(), 575 leftover_set.begin(), leftover_set.end(),
533 std::inserter(new_word_id_set, 576 std::inserter(new_word_id_set,
534 new_word_id_set.begin())); 577 new_word_id_set.begin()));
535 word_id_set.swap(new_word_id_set); 578 word_id_set.swap(new_word_id_set);
536 } 579 }
537 } 580 }
538 581
539 // We must filter the word list because the resulting word set surely 582 // We must filter the word list because the resulting word set surely
540 // contains words which do not have the search term as a proper subset. 583 // contains words which do not have the search term as a proper subset.
541 for (WordIDSet::iterator word_set_iter = word_id_set.begin(); 584 for (WordIDSet::iterator word_set_iter = word_id_set.begin();
542 word_set_iter != word_id_set.end(); ) { 585 word_set_iter != word_id_set.end(); ) {
543 if (word_list_[*word_set_iter].find(term) == string16::npos) 586 if (private_data_->word_list_[*word_set_iter].find(term) ==
587 string16::npos)
544 word_id_set.erase(word_set_iter++); 588 word_id_set.erase(word_set_iter++);
545 else 589 else
546 ++word_set_iter; 590 ++word_set_iter;
547 } 591 }
548 } else { 592 } else {
549 word_id_set = WordIDSetForTermChars(Char16SetFromString16(term)); 593 word_id_set =
594 private_data_->WordIDSetForTermChars(Char16SetFromString16(term));
550 } 595 }
551 596
552 // If any words resulted then we can compose a set of history IDs by unioning 597 // If any words resulted then we can compose a set of history IDs by unioning
553 // the sets from each word. 598 // the sets from each word.
554 HistoryIDSet history_id_set; 599 HistoryIDSet history_id_set;
555 if (!word_id_set.empty()) { 600 if (!word_id_set.empty()) {
556 for (WordIDSet::iterator word_id_iter = word_id_set.begin(); 601 for (WordIDSet::iterator word_id_iter = word_id_set.begin();
557 word_id_iter != word_id_set.end(); ++word_id_iter) { 602 word_id_iter != word_id_set.end(); ++word_id_iter) {
558 WordID word_id = *word_id_iter; 603 WordID word_id = *word_id_iter;
559 WordIDHistoryMap::iterator word_iter = word_id_history_map_.find(word_id); 604 WordIDHistoryMap::iterator word_iter =
560 if (word_iter != word_id_history_map_.end()) { 605 private_data_->word_id_history_map_.find(word_id);
606 if (word_iter != private_data_->word_id_history_map_.end()) {
561 HistoryIDSet& word_history_id_set(word_iter->second); 607 HistoryIDSet& word_history_id_set(word_iter->second);
562 history_id_set.insert(word_history_id_set.begin(), 608 history_id_set.insert(word_history_id_set.begin(),
563 word_history_id_set.end()); 609 word_history_id_set.end());
564 } 610 }
565 } 611 }
566 } 612 }
567 613
568 // Record a new cache entry for this word if the term is longer than 614 // Record a new cache entry for this word if the term is longer than
569 // a single character. 615 // a single character.
570 if (term_length > 1) 616 if (term_length > 1)
571 search_term_cache_[term] = SearchTermCacheItem(word_id_set, history_id_set); 617 search_term_cache_[term] = SearchTermCacheItem(word_id_set, history_id_set);
572 618
573 return history_id_set; 619 return history_id_set;
574 } 620 }
575 621
576 // Utility Functions 622 // Utility Functions
577 623
578 // static
579 InMemoryURLIndex::String16Set InMemoryURLIndex::WordSetFromString16(
580 const string16& uni_string) {
581 const size_t kMaxWordLength = 64;
582 String16Vector words = WordVectorFromString16(uni_string, false);
583 String16Set word_set;
584 for (String16Vector::const_iterator iter = words.begin(); iter != words.end();
585 ++iter)
586 word_set.insert(base::i18n::ToLower(*iter).substr(0, kMaxWordLength));
587 return word_set;
588 }
589
590 // static
591 InMemoryURLIndex::String16Vector InMemoryURLIndex::WordVectorFromString16(
592 const string16& uni_string,
593 bool break_on_space) {
594 base::i18n::BreakIterator iter(
595 uni_string,
596 break_on_space ? base::i18n::BreakIterator::BREAK_SPACE
597 : base::i18n::BreakIterator::BREAK_WORD);
598 String16Vector words;
599 if (!iter.Init())
600 return words;
601 while (iter.Advance()) {
602 if (break_on_space || iter.IsWord()) {
603 string16 word = iter.GetString();
604 if (break_on_space)
605 TrimWhitespace(word, TRIM_ALL, &word);
606 if (!word.empty())
607 words.push_back(word);
608 }
609 }
610 return words;
611 }
612
613 // static
614 InMemoryURLIndex::Char16Set InMemoryURLIndex::Char16SetFromString16(
615 const string16& term) {
616 Char16Set characters;
617 for (string16::const_iterator iter = term.begin(); iter != term.end();
618 ++iter)
619 characters.insert(*iter);
620 return characters;
621 }
622
623 void InMemoryURLIndex::AddWordToIndex(const string16& term, 624 void InMemoryURLIndex::AddWordToIndex(const string16& term,
624 HistoryID history_id) { 625 HistoryID history_id) {
625 WordMap::iterator word_pos = word_map_.find(term); 626 WordMap::iterator word_pos = private_data_->word_map_.find(term);
626 if (word_pos != word_map_.end()) 627 if (word_pos != private_data_->word_map_.end())
627 UpdateWordHistory(word_pos->second, history_id); 628 UpdateWordHistory(word_pos->second, history_id);
628 else 629 else
629 AddWordHistory(term, history_id); 630 AddWordHistory(term, history_id);
630 } 631 }
631 632
632 void InMemoryURLIndex::UpdateWordHistory(WordID word_id, HistoryID history_id) { 633 void InMemoryURLIndex::UpdateWordHistory(WordID word_id, HistoryID history_id) {
633 WordIDHistoryMap::iterator history_pos = word_id_history_map_.find(word_id); 634 WordIDHistoryMap::iterator history_pos =
634 DCHECK(history_pos != word_id_history_map_.end()); 635 private_data_->word_id_history_map_.find(word_id);
635 HistoryIDSet& history_id_set(history_pos->second); 636 DCHECK(history_pos != private_data_->word_id_history_map_.end());
636 history_id_set.insert(history_id); 637 HistoryIDSet& history_id_set(history_pos->second);
638 history_id_set.insert(history_id);
639 private_data_->AddToHistoryIDWordMap(history_id, word_id);
637 } 640 }
638 641
639 // Add a new word to the word list and the word map, and then create a 642 // Add a new word to the word list and the word map, and then create a
640 // new entry in the word/history map. 643 // new entry in the word/history map.
641 void InMemoryURLIndex::AddWordHistory(const string16& term, 644 void InMemoryURLIndex::AddWordHistory(const string16& term,
642 HistoryID history_id) { 645 HistoryID history_id) {
643 word_list_.push_back(term); 646 URLIndexPrivateData& private_data(*(private_data_.get()));
644 WordID word_id = word_list_.size() - 1; 647 WordID word_id = private_data.word_list_.size();
645 word_map_[term] = word_id; 648 if (private_data.available_words_.empty()) {
649 private_data.word_list_.push_back(term);
650 } else {
651 word_id = *(private_data.available_words_.begin());
652 private_data.word_list_[word_id] = term;
653 private_data.available_words_.erase(word_id);
654 }
655 private_data.word_map_[term] = word_id;
656
646 HistoryIDSet history_id_set; 657 HistoryIDSet history_id_set;
647 history_id_set.insert(history_id); 658 history_id_set.insert(history_id);
648 word_id_history_map_[word_id] = history_id_set; 659 private_data.word_id_history_map_[word_id] = history_id_set;
660 private_data.AddToHistoryIDWordMap(history_id, word_id);
661
649 // For each character in the newly added word (i.e. a word that is not 662 // For each character in the newly added word (i.e. a word that is not
650 // already in the word index), add the word to the character index. 663 // already in the word index), add the word to the character index.
651 Char16Set characters = Char16SetFromString16(term); 664 Char16Set characters = Char16SetFromString16(term);
652 for (Char16Set::iterator uni_char_iter = characters.begin(); 665 for (Char16Set::iterator uni_char_iter = characters.begin();
653 uni_char_iter != characters.end(); ++uni_char_iter) { 666 uni_char_iter != characters.end(); ++uni_char_iter) {
654 char16 uni_char = *uni_char_iter; 667 char16 uni_char = *uni_char_iter;
655 CharWordIDMap::iterator char_iter = char_word_map_.find(uni_char); 668 CharWordIDMap::iterator char_iter =
656 if (char_iter != char_word_map_.end()) { 669 private_data.char_word_map_.find(uni_char);
670 if (char_iter != private_data.char_word_map_.end()) {
657 // Update existing entry in the char/word index. 671 // Update existing entry in the char/word index.
658 WordIDSet& word_id_set(char_iter->second); 672 WordIDSet& word_id_set(char_iter->second);
659 word_id_set.insert(word_id); 673 word_id_set.insert(word_id);
660 } else { 674 } else {
661 // Create a new entry in the char/word index. 675 // Create a new entry in the char/word index.
662 WordIDSet word_id_set; 676 WordIDSet word_id_set;
663 word_id_set.insert(word_id); 677 word_id_set.insert(word_id);
664 char_word_map_[uni_char] = word_id_set; 678 private_data.char_word_map_[uni_char] = word_id_set;
665 } 679 }
666 } 680 }
667 } 681 }
668 682
669 InMemoryURLIndex::WordIDSet InMemoryURLIndex::WordIDSetForTermChars(
670 const Char16Set& term_chars) {
671 WordIDSet word_id_set;
672 for (Char16Set::const_iterator c_iter = term_chars.begin();
673 c_iter != term_chars.end(); ++c_iter) {
674 CharWordIDMap::iterator char_iter = char_word_map_.find(*c_iter);
675 if (char_iter == char_word_map_.end()) {
676 // A character was not found so there are no matching results: bail.
677 word_id_set.clear();
678 break;
679 }
680 WordIDSet& char_word_id_set(char_iter->second);
681 // It is possible for there to no longer be any words associated with
682 // a particular character. Give up in that case.
683 if (char_word_id_set.empty()) {
684 word_id_set.clear();
685 break;
686 }
687
688 if (c_iter == term_chars.begin()) {
689 // First character results becomes base set of results.
690 word_id_set = char_word_id_set;
691 } else {
692 // Subsequent character results get intersected in.
693 WordIDSet new_word_id_set;
694 std::set_intersection(word_id_set.begin(), word_id_set.end(),
695 char_word_id_set.begin(), char_word_id_set.end(),
696 std::inserter(new_word_id_set,
697 new_word_id_set.begin()));
698 word_id_set.swap(new_word_id_set);
699 }
700 }
701 return word_id_set;
702 }
703
704 // static 683 // static
705 TermMatches InMemoryURLIndex::MatchTermInString(const string16& term, 684 // TODO(mrossetti): This can be made a ctor for ScoredHistoryMatch.
706 const string16& string,
707 int term_num) {
708 const size_t kMaxCompareLength = 2048;
709 const string16& short_string = (string.length() > kMaxCompareLength) ?
710 string.substr(0, kMaxCompareLength) : string;
711 TermMatches matches;
712 for (size_t location = short_string.find(term); location != string16::npos;
713 location = short_string.find(term, location + 1)) {
714 matches.push_back(TermMatch(term_num, location, term.length()));
715 }
716 return matches;
717 }
718
719 // static
720 TermMatches InMemoryURLIndex::SortAndDeoverlap(const TermMatches& matches) {
721 if (matches.empty())
722 return matches;
723 TermMatches sorted_matches = matches;
724 std::sort(sorted_matches.begin(), sorted_matches.end(), MatchOffsetLess);
725 TermMatches clean_matches;
726 TermMatch last_match = sorted_matches[0];
727 clean_matches.push_back(last_match);
728 for (TermMatches::const_iterator iter = sorted_matches.begin() + 1;
729 iter != sorted_matches.end(); ++iter) {
730 if (iter->offset >= last_match.offset + last_match.length) {
731 last_match = *iter;
732 clean_matches.push_back(last_match);
733 }
734 }
735 return clean_matches;
736 }
737
738 // static
739 std::vector<size_t> InMemoryURLIndex::OffsetsFromTermMatches(
740 const TermMatches& matches) {
741 std::vector<size_t> offsets;
742 for (TermMatches::const_iterator i = matches.begin(); i != matches.end(); ++i)
743 offsets.push_back(i->offset);
744 return offsets;
745 }
746
747 // static
748 TermMatches InMemoryURLIndex::ReplaceOffsetsInTermMatches(
749 const TermMatches& matches,
750 const std::vector<size_t>& offsets) {
751 DCHECK_EQ(matches.size(), offsets.size());
752 TermMatches new_matches;
753 std::vector<size_t>::const_iterator offset_iter = offsets.begin();
754 for (TermMatches::const_iterator term_iter = matches.begin();
755 term_iter != matches.end(); ++term_iter, ++offset_iter) {
756 if (*offset_iter != string16::npos) {
757 TermMatch new_match(*term_iter);
758 new_match.offset = *offset_iter;
759 new_matches.push_back(new_match);
760 }
761 }
762 return new_matches;
763 }
764
765 // static
766 ScoredHistoryMatch InMemoryURLIndex::ScoredMatchForURL( 685 ScoredHistoryMatch InMemoryURLIndex::ScoredMatchForURL(
767 const URLRow& row, 686 const URLRow& row,
768 const String16Vector& terms) { 687 const String16Vector& terms) {
769 ScoredHistoryMatch match(row); 688 ScoredHistoryMatch match(row);
770 GURL gurl = row.url(); 689 GURL gurl = row.url();
771 if (!gurl.is_valid()) 690 if (!gurl.is_valid())
772 return match; 691 return match;
773 692
774 // Figure out where each search term appears in the URL and/or page title 693 // Figure out where each search term appears in the URL and/or page title
775 // so that we can score as well as provide autocomplete highlighting. 694 // so that we can score as well as provide autocomplete highlighting.
776 string16 url = base::i18n::ToLower(UTF8ToUTF16(gurl.spec())); 695 string16 url = base::i18n::ToLower(UTF8ToUTF16(gurl.spec()));
777 string16 title = base::i18n::ToLower(row.title()); 696 string16 title = base::i18n::ToLower(row.title());
778 int term_num = 0; 697 int term_num = 0;
779 for (String16Vector::const_iterator iter = terms.begin(); iter != terms.end(); 698 for (String16Vector::const_iterator iter = terms.begin(); iter != terms.end();
780 ++iter, ++term_num) { 699 ++iter, ++term_num) {
781 string16 term = *iter; 700 string16 term = *iter;
782 TermMatches url_term_matches = MatchTermInString(term, url, term_num); 701 TermMatches url_term_matches = MatchTermInString(term, url, term_num);
783 TermMatches title_term_matches = MatchTermInString(term, title, term_num); 702 TermMatches title_term_matches = MatchTermInString(term, title, term_num);
784 if (url_term_matches.empty() && title_term_matches.empty()) 703 if (url_term_matches.empty() && title_term_matches.empty())
785 return match; // A term was not found in either URL or title - reject. 704 return match; // A term was not found in either URL or title - reject.
786 match.url_matches.insert(match.url_matches.end(), url_term_matches.begin(), 705 match.url_matches.insert(match.url_matches.end(), url_term_matches.begin(),
787 url_term_matches.end()); 706 url_term_matches.end());
788 match.title_matches.insert(match.title_matches.end(), 707 match.title_matches.insert(match.title_matches.end(),
789 title_term_matches.begin(), 708 title_term_matches.begin(),
790 title_term_matches.end()); 709 title_term_matches.end());
791 } 710 }
792 711
793 // Sort matches by offset and eliminate any which overlap. 712 // Sort matches by offset and eliminate any which overlap.
794 match.url_matches = SortAndDeoverlap(match.url_matches); 713 match.url_matches = SortAndDeoverlapMatches(match.url_matches);
795 match.title_matches = SortAndDeoverlap(match.title_matches); 714 match.title_matches = SortAndDeoverlapMatches(match.title_matches);
796 715
797 // We should not (currently) inline autocomplete a result unless both of the 716 // We should not (currently) inline autocomplete a result unless both of the
798 // following are true: 717 // following are true:
799 // * There is exactly one substring matches in the URL, and 718 // * There is exactly one substring matches in the URL, and
800 // * The one URL match starts at the beginning of the URL. 719 // * The one URL match starts at the beginning of the URL.
801 match.can_inline = 720 match.can_inline =
802 match.url_matches.size() == 1 && match.url_matches[0].offset == 0; 721 match.url_matches.size() == 1 && match.url_matches[0].offset == 0;
803 722
804 // Get partial scores based on term matching. Note that the score for 723 // Get partial scores based on term matching. Note that the score for
805 // each of the URL and title are adjusted by the fraction of the 724 // each of the URL and title are adjusted by the fraction of the
(...skipping 93 matching lines...) Expand 10 before | Expand all | Expand 10 after
899 818
900 // Scale the sum of the three components above into a single score component 819 // Scale the sum of the three components above into a single score component
901 // on the same scale as that used in ScoredMatchForURL(). 820 // on the same scale as that used in ScoredMatchForURL().
902 return ScoreForValue(raw_score, kTermScoreLevel); 821 return ScoreForValue(raw_score, kTermScoreLevel);
903 } 822 }
904 823
905 InMemoryURLIndex::AddHistoryMatch::AddHistoryMatch( 824 InMemoryURLIndex::AddHistoryMatch::AddHistoryMatch(
906 const InMemoryURLIndex& index, 825 const InMemoryURLIndex& index,
907 const String16Vector& lower_terms) 826 const String16Vector& lower_terms)
908 : index_(index), 827 : index_(index),
909 lower_terms_(lower_terms) { 828 lower_terms_(lower_terms) {}
910 }
911 829
912 InMemoryURLIndex::AddHistoryMatch::~AddHistoryMatch() {} 830 InMemoryURLIndex::AddHistoryMatch::~AddHistoryMatch() {}
913 831
914 void InMemoryURLIndex::AddHistoryMatch::operator()( 832 void InMemoryURLIndex::AddHistoryMatch::operator()(const HistoryID history_id) {
915 const InMemoryURLIndex::HistoryID history_id) {
916 HistoryInfoMap::const_iterator hist_pos = 833 HistoryInfoMap::const_iterator hist_pos =
917 index_.history_info_map_.find(history_id); 834 index_.private_data_->history_info_map_.find(history_id);
918 // Note that a history_id may be present in the word_id_history_map_ yet not 835 // Note that a history_id may be present in the word_id_history_map_ yet not
919 // be found in the history_info_map_. This occurs when an item has been 836 // be found in the history_info_map_. This occurs when an item has been
920 // deleted by the user or the item no longer qualifies as a quick result. 837 // deleted by the user or the item no longer qualifies as a quick result.
921 if (hist_pos != index_.history_info_map_.end()) { 838 if (hist_pos != index_.private_data_->history_info_map_.end()) {
922 const URLRow& hist_item = hist_pos->second; 839 const URLRow& hist_item = hist_pos->second;
923 ScoredHistoryMatch match(ScoredMatchForURL(hist_item, lower_terms_)); 840 ScoredHistoryMatch match(ScoredMatchForURL(hist_item, lower_terms_));
924 if (match.raw_score > 0) 841 if (match.raw_score > 0)
925 scored_matches_.push_back(match); 842 scored_matches_.push_back(match);
926 } 843 }
927 } 844 }
928 845
929 bool InMemoryURLIndex::GetCacheFilePath(FilePath* file_path) { 846 bool InMemoryURLIndex::GetCacheFilePath(FilePath* file_path) {
930 if (history_dir_.empty()) 847 if (history_dir_.empty())
931 return false; 848 return false;
932 *file_path = history_dir_.Append(FILE_PATH_LITERAL("History Provider Cache")); 849 *file_path = history_dir_.Append(FILE_PATH_LITERAL("History Provider Cache"));
933 return true; 850 return true;
934 } 851 }
935 852
936 bool InMemoryURLIndex::URLSchemeIsWhitelisted(const GURL& gurl) const { 853 bool InMemoryURLIndex::URLSchemeIsWhitelisted(const GURL& gurl) const {
937 return scheme_whitelist_.find(gurl.scheme()) != scheme_whitelist_.end(); 854 return scheme_whitelist_.find(gurl.scheme()) != scheme_whitelist_.end();
938 } 855 }
939 856
940 void InMemoryURLIndex::SavePrivateData(InMemoryURLIndexCacheItem* cache) const { 857 void InMemoryURLIndex::SavePrivateData(InMemoryURLIndexCacheItem* cache) const {
941 DCHECK(cache); 858 DCHECK(cache);
942 cache->set_timestamp(base::Time::Now().ToInternalValue()); 859 cache->set_timestamp(base::Time::Now().ToInternalValue());
943 cache->set_history_item_count(history_item_count_); 860 // history_item_count_ is no longer used but rather than change the protobuf
861 // definition use a placeholder. This will go away with the switch to SQLite.
862 cache->set_history_item_count(0);
944 SaveWordList(cache); 863 SaveWordList(cache);
945 SaveWordMap(cache); 864 SaveWordMap(cache);
946 SaveCharWordMap(cache); 865 SaveCharWordMap(cache);
947 SaveWordIDHistoryMap(cache); 866 SaveWordIDHistoryMap(cache);
948 SaveHistoryInfoMap(cache); 867 SaveHistoryInfoMap(cache);
949 } 868 }
950 869
951 bool InMemoryURLIndex::RestorePrivateData( 870 bool InMemoryURLIndex::RestorePrivateData(
952 const InMemoryURLIndexCacheItem& cache) { 871 const InMemoryURLIndexCacheItem& cache) {
953 last_saved_ = base::Time::FromInternalValue(cache.timestamp()); 872 last_saved_ = base::Time::FromInternalValue(cache.timestamp());
954 history_item_count_ = cache.history_item_count(); 873 return RestoreWordList(cache) && RestoreWordMap(cache) &&
955 return (history_item_count_ == 0) || (RestoreWordList(cache) && 874 RestoreCharWordMap(cache) && RestoreWordIDHistoryMap(cache) &&
956 RestoreWordMap(cache) && RestoreCharWordMap(cache) && 875 RestoreHistoryInfoMap(cache);
957 RestoreWordIDHistoryMap(cache) && RestoreHistoryInfoMap(cache));
958 } 876 }
959 877
960
961 void InMemoryURLIndex::SaveWordList(InMemoryURLIndexCacheItem* cache) const { 878 void InMemoryURLIndex::SaveWordList(InMemoryURLIndexCacheItem* cache) const {
962 if (word_list_.empty()) 879 if (private_data_->word_list_.empty())
963 return; 880 return;
964 WordListItem* list_item = cache->mutable_word_list(); 881 WordListItem* list_item = cache->mutable_word_list();
965 list_item->set_word_count(word_list_.size()); 882 list_item->set_word_count(private_data_->word_list_.size());
966 for (String16Vector::const_iterator iter = word_list_.begin(); 883 for (String16Vector::const_iterator iter = private_data_->word_list_.begin();
967 iter != word_list_.end(); ++iter) 884 iter != private_data_->word_list_.end(); ++iter)
968 list_item->add_word(UTF16ToUTF8(*iter)); 885 list_item->add_word(UTF16ToUTF8(*iter));
969 } 886 }
970 887
971 bool InMemoryURLIndex::RestoreWordList(const InMemoryURLIndexCacheItem& cache) { 888 bool InMemoryURLIndex::RestoreWordList(const InMemoryURLIndexCacheItem& cache) {
972 if (!cache.has_word_list()) 889 if (!cache.has_word_list())
973 return false; 890 return false;
974 const WordListItem& list_item(cache.word_list()); 891 const WordListItem& list_item(cache.word_list());
975 uint32 expected_item_count = list_item.word_count(); 892 uint32 expected_item_count = list_item.word_count();
976 uint32 actual_item_count = list_item.word_size(); 893 uint32 actual_item_count = list_item.word_size();
977 if (actual_item_count == 0 || actual_item_count != expected_item_count) 894 if (actual_item_count == 0 || actual_item_count != expected_item_count)
978 return false; 895 return false;
979 const RepeatedPtrField<std::string>& words(list_item.word()); 896 const RepeatedPtrField<std::string>& words(list_item.word());
980 for (RepeatedPtrField<std::string>::const_iterator iter = words.begin(); 897 for (RepeatedPtrField<std::string>::const_iterator iter = words.begin();
981 iter != words.end(); ++iter) 898 iter != words.end(); ++iter)
982 word_list_.push_back(UTF8ToUTF16(*iter)); 899 private_data_->word_list_.push_back(UTF8ToUTF16(*iter));
983 return true; 900 return true;
984 } 901 }
985 902
986 void InMemoryURLIndex::SaveWordMap(InMemoryURLIndexCacheItem* cache) const { 903 void InMemoryURLIndex::SaveWordMap(InMemoryURLIndexCacheItem* cache) const {
987 if (word_map_.empty()) 904 if (private_data_->word_map_.empty())
988 return; 905 return;
989 WordMapItem* map_item = cache->mutable_word_map(); 906 WordMapItem* map_item = cache->mutable_word_map();
990 map_item->set_item_count(word_map_.size()); 907 map_item->set_item_count(private_data_->word_map_.size());
991 for (WordMap::const_iterator iter = word_map_.begin(); 908 for (WordMap::const_iterator iter = private_data_->word_map_.begin();
992 iter != word_map_.end(); ++iter) { 909 iter != private_data_->word_map_.end(); ++iter) {
993 WordMapEntry* map_entry = map_item->add_word_map_entry(); 910 WordMapEntry* map_entry = map_item->add_word_map_entry();
994 map_entry->set_word(UTF16ToUTF8(iter->first)); 911 map_entry->set_word(UTF16ToUTF8(iter->first));
995 map_entry->set_word_id(iter->second); 912 map_entry->set_word_id(iter->second);
996 } 913 }
997 } 914 }
998 915
999 bool InMemoryURLIndex::RestoreWordMap(const InMemoryURLIndexCacheItem& cache) { 916 bool InMemoryURLIndex::RestoreWordMap(const InMemoryURLIndexCacheItem& cache) {
1000 if (!cache.has_word_map()) 917 if (!cache.has_word_map())
1001 return false; 918 return false;
1002 const WordMapItem& list_item(cache.word_map()); 919 const WordMapItem& list_item(cache.word_map());
1003 uint32 expected_item_count = list_item.item_count(); 920 uint32 expected_item_count = list_item.item_count();
1004 uint32 actual_item_count = list_item.word_map_entry_size(); 921 uint32 actual_item_count = list_item.word_map_entry_size();
1005 if (actual_item_count == 0 || actual_item_count != expected_item_count) 922 if (actual_item_count == 0 || actual_item_count != expected_item_count)
1006 return false; 923 return false;
1007 const RepeatedPtrField<WordMapEntry>& entries(list_item.word_map_entry()); 924 const RepeatedPtrField<WordMapEntry>& entries(list_item.word_map_entry());
1008 for (RepeatedPtrField<WordMapEntry>::const_iterator iter = entries.begin(); 925 for (RepeatedPtrField<WordMapEntry>::const_iterator iter = entries.begin();
1009 iter != entries.end(); ++iter) 926 iter != entries.end(); ++iter)
1010 word_map_[UTF8ToUTF16(iter->word())] = iter->word_id(); 927 private_data_->word_map_[UTF8ToUTF16(iter->word())] = iter->word_id();
1011 return true; 928 return true;
1012 } 929 }
1013 930
1014 void InMemoryURLIndex::SaveCharWordMap(InMemoryURLIndexCacheItem* cache) const { 931 void InMemoryURLIndex::SaveCharWordMap(InMemoryURLIndexCacheItem* cache) const {
1015 if (char_word_map_.empty()) 932 if (private_data_->char_word_map_.empty())
1016 return; 933 return;
1017 CharWordMapItem* map_item = cache->mutable_char_word_map(); 934 CharWordMapItem* map_item = cache->mutable_char_word_map();
1018 map_item->set_item_count(char_word_map_.size()); 935 map_item->set_item_count(private_data_->char_word_map_.size());
1019 for (CharWordIDMap::const_iterator iter = char_word_map_.begin(); 936 for (CharWordIDMap::const_iterator iter =
1020 iter != char_word_map_.end(); ++iter) { 937 private_data_->char_word_map_.begin();
938 iter != private_data_->char_word_map_.end(); ++iter) {
1021 CharWordMapEntry* map_entry = map_item->add_char_word_map_entry(); 939 CharWordMapEntry* map_entry = map_item->add_char_word_map_entry();
1022 map_entry->set_char_16(iter->first); 940 map_entry->set_char_16(iter->first);
1023 const WordIDSet& word_id_set(iter->second); 941 const WordIDSet& word_id_set(iter->second);
1024 map_entry->set_item_count(word_id_set.size()); 942 map_entry->set_item_count(word_id_set.size());
1025 for (WordIDSet::const_iterator set_iter = word_id_set.begin(); 943 for (WordIDSet::const_iterator set_iter = word_id_set.begin();
1026 set_iter != word_id_set.end(); ++set_iter) 944 set_iter != word_id_set.end(); ++set_iter)
1027 map_entry->add_word_id(*set_iter); 945 map_entry->add_word_id(*set_iter);
1028 } 946 }
1029 } 947 }
1030 948
(...skipping 13 matching lines...) Expand all
1044 expected_item_count = iter->item_count(); 962 expected_item_count = iter->item_count();
1045 actual_item_count = iter->word_id_size(); 963 actual_item_count = iter->word_id_size();
1046 if (actual_item_count == 0 || actual_item_count != expected_item_count) 964 if (actual_item_count == 0 || actual_item_count != expected_item_count)
1047 return false; 965 return false;
1048 char16 uni_char = static_cast<char16>(iter->char_16()); 966 char16 uni_char = static_cast<char16>(iter->char_16());
1049 WordIDSet word_id_set; 967 WordIDSet word_id_set;
1050 const RepeatedField<int32>& word_ids(iter->word_id()); 968 const RepeatedField<int32>& word_ids(iter->word_id());
1051 for (RepeatedField<int32>::const_iterator jiter = word_ids.begin(); 969 for (RepeatedField<int32>::const_iterator jiter = word_ids.begin();
1052 jiter != word_ids.end(); ++jiter) 970 jiter != word_ids.end(); ++jiter)
1053 word_id_set.insert(*jiter); 971 word_id_set.insert(*jiter);
1054 char_word_map_[uni_char] = word_id_set; 972 private_data_->char_word_map_[uni_char] = word_id_set;
1055 } 973 }
1056 return true; 974 return true;
1057 } 975 }
1058 976
1059 void InMemoryURLIndex::SaveWordIDHistoryMap(InMemoryURLIndexCacheItem* cache) 977 void InMemoryURLIndex::SaveWordIDHistoryMap(InMemoryURLIndexCacheItem* cache)
1060 const { 978 const {
1061 if (word_id_history_map_.empty()) 979 if (private_data_->word_id_history_map_.empty())
1062 return; 980 return;
1063 WordIDHistoryMapItem* map_item = cache->mutable_word_id_history_map(); 981 WordIDHistoryMapItem* map_item = cache->mutable_word_id_history_map();
1064 map_item->set_item_count(word_id_history_map_.size()); 982 map_item->set_item_count(private_data_->word_id_history_map_.size());
1065 for (WordIDHistoryMap::const_iterator iter = word_id_history_map_.begin(); 983 for (WordIDHistoryMap::const_iterator iter =
1066 iter != word_id_history_map_.end(); ++iter) { 984 private_data_->word_id_history_map_.begin();
985 iter != private_data_->word_id_history_map_.end(); ++iter) {
1067 WordIDHistoryMapEntry* map_entry = 986 WordIDHistoryMapEntry* map_entry =
1068 map_item->add_word_id_history_map_entry(); 987 map_item->add_word_id_history_map_entry();
1069 map_entry->set_word_id(iter->first); 988 map_entry->set_word_id(iter->first);
1070 const HistoryIDSet& history_id_set(iter->second); 989 const HistoryIDSet& history_id_set(iter->second);
1071 map_entry->set_item_count(history_id_set.size()); 990 map_entry->set_item_count(history_id_set.size());
1072 for (HistoryIDSet::const_iterator set_iter = history_id_set.begin(); 991 for (HistoryIDSet::const_iterator set_iter = history_id_set.begin();
1073 set_iter != history_id_set.end(); ++set_iter) 992 set_iter != history_id_set.end(); ++set_iter)
1074 map_entry->add_history_id(*set_iter); 993 map_entry->add_history_id(*set_iter);
1075 } 994 }
1076 } 995 }
(...skipping 12 matching lines...) Expand all
1089 for (RepeatedPtrField<WordIDHistoryMapEntry>::const_iterator iter = 1008 for (RepeatedPtrField<WordIDHistoryMapEntry>::const_iterator iter =
1090 entries.begin(); iter != entries.end(); ++iter) { 1009 entries.begin(); iter != entries.end(); ++iter) {
1091 expected_item_count = iter->item_count(); 1010 expected_item_count = iter->item_count();
1092 actual_item_count = iter->history_id_size(); 1011 actual_item_count = iter->history_id_size();
1093 if (actual_item_count == 0 || actual_item_count != expected_item_count) 1012 if (actual_item_count == 0 || actual_item_count != expected_item_count)
1094 return false; 1013 return false;
1095 WordID word_id = iter->word_id(); 1014 WordID word_id = iter->word_id();
1096 HistoryIDSet history_id_set; 1015 HistoryIDSet history_id_set;
1097 const RepeatedField<int64>& history_ids(iter->history_id()); 1016 const RepeatedField<int64>& history_ids(iter->history_id());
1098 for (RepeatedField<int64>::const_iterator jiter = history_ids.begin(); 1017 for (RepeatedField<int64>::const_iterator jiter = history_ids.begin();
1099 jiter != history_ids.end(); ++jiter) 1018 jiter != history_ids.end(); ++jiter) {
1100 history_id_set.insert(*jiter); 1019 history_id_set.insert(*jiter);
1101 word_id_history_map_[word_id] = history_id_set; 1020 private_data_->AddToHistoryIDWordMap(*jiter, word_id);
1021 }
1022 private_data_->word_id_history_map_[word_id] = history_id_set;
1102 } 1023 }
1103 return true; 1024 return true;
1104 } 1025 }
1105 1026
1106 void InMemoryURLIndex::SaveHistoryInfoMap( 1027 void InMemoryURLIndex::SaveHistoryInfoMap(
1107 InMemoryURLIndexCacheItem* cache) const { 1028 InMemoryURLIndexCacheItem* cache) const {
1108 if (history_info_map_.empty()) 1029 if (private_data_->history_info_map_.empty())
1109 return; 1030 return;
1110 HistoryInfoMapItem* map_item = cache->mutable_history_info_map(); 1031 HistoryInfoMapItem* map_item = cache->mutable_history_info_map();
1111 map_item->set_item_count(history_info_map_.size()); 1032 map_item->set_item_count(private_data_->history_info_map_.size());
1112 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin(); 1033 for (HistoryInfoMap::const_iterator iter =
1113 iter != history_info_map_.end(); ++iter) { 1034 private_data_->history_info_map_.begin();
1035 iter != private_data_->history_info_map_.end(); ++iter) {
1114 HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry(); 1036 HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry();
1115 map_entry->set_history_id(iter->first); 1037 map_entry->set_history_id(iter->first);
1116 const URLRow& url_row(iter->second); 1038 const URLRow& url_row(iter->second);
1117 // Note: We only save information that contributes to the index so there 1039 // Note: We only save information that contributes to the index so there
1118 // is no need to save search_term_cache_ (not persistent), 1040 // is no need to save search_term_cache_ (not persistent),
1119 // languages_, etc. 1041 // languages_, etc.
1120 map_entry->set_visit_count(url_row.visit_count()); 1042 map_entry->set_visit_count(url_row.visit_count());
1121 map_entry->set_typed_count(url_row.typed_count()); 1043 map_entry->set_typed_count(url_row.typed_count());
1122 map_entry->set_last_visit(url_row.last_visit().ToInternalValue()); 1044 map_entry->set_last_visit(url_row.last_visit().ToInternalValue());
1123 map_entry->set_url(url_row.url().spec()); 1045 map_entry->set_url(url_row.url().spec());
(...skipping 17 matching lines...) Expand all
1141 HistoryID history_id = iter->history_id(); 1063 HistoryID history_id = iter->history_id();
1142 GURL url(iter->url()); 1064 GURL url(iter->url());
1143 URLRow url_row(url, history_id); 1065 URLRow url_row(url, history_id);
1144 url_row.set_visit_count(iter->visit_count()); 1066 url_row.set_visit_count(iter->visit_count());
1145 url_row.set_typed_count(iter->typed_count()); 1067 url_row.set_typed_count(iter->typed_count());
1146 url_row.set_last_visit(base::Time::FromInternalValue(iter->last_visit())); 1068 url_row.set_last_visit(base::Time::FromInternalValue(iter->last_visit()));
1147 if (iter->has_title()) { 1069 if (iter->has_title()) {
1148 string16 title(UTF8ToUTF16(iter->title())); 1070 string16 title(UTF8ToUTF16(iter->title()));
1149 url_row.set_title(title); 1071 url_row.set_title(title);
1150 } 1072 }
1151 history_info_map_[history_id] = url_row; 1073 private_data_->history_info_map_[history_id] = url_row;
1152 } 1074 }
1153 return true; 1075 return true;
1154 } 1076 }
1155 1077
1156 } // namespace history 1078 } // namespace history
OLDNEW
« no previous file with comments | « chrome/browser/history/in_memory_url_index.h ('k') | chrome/browser/history/in_memory_url_index_types.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698