Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(400)

Side by Side Diff: chrome/browser/history/in_memory_url_index.cc

Issue 8120004: HQP Refactoring (in Preparation for SQLite Cache) (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/
Patch Set: '' Created 9 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/history/in_memory_url_index.h" 5 #include "chrome/browser/history/in_memory_url_index.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <functional> 8 #include <functional>
9 #include <iterator> 9 #include <iterator>
10 #include <limits> 10 #include <limits>
11 #include <numeric> 11 #include <numeric>
12 12
13 #include "base/file_util.h" 13 #include "base/file_util.h"
14 #include "base/i18n/break_iterator.h"
15 #include "base/i18n/case_conversion.h" 14 #include "base/i18n/case_conversion.h"
16 #include "base/metrics/histogram.h" 15 #include "base/metrics/histogram.h"
17 #include "base/string_util.h"
18 #include "base/threading/thread_restrictions.h" 16 #include "base/threading/thread_restrictions.h"
19 #include "base/time.h" 17 #include "base/time.h"
20 #include "base/utf_string_conversions.h" 18 #include "base/utf_string_conversions.h"
21 #include "chrome/browser/autocomplete/autocomplete.h" 19 #include "chrome/browser/autocomplete/autocomplete.h"
22 #include "chrome/browser/autocomplete/history_provider_util.h" 20 #include "chrome/browser/autocomplete/history_provider_util.h"
21 #include "chrome/browser/history/history_notifications.h"
23 #include "chrome/browser/history/url_database.h" 22 #include "chrome/browser/history/url_database.h"
24 #include "chrome/browser/profiles/profile.h" 23 #include "chrome/browser/profiles/profile.h"
24 #include "chrome/common/chrome_notification_types.h"
25 #include "chrome/common/url_constants.h" 25 #include "chrome/common/url_constants.h"
26 #include "content/common/notification_details.h"
27 #include "content/common/notification_source.h"
26 #include "googleurl/src/url_parse.h" 28 #include "googleurl/src/url_parse.h"
27 #include "googleurl/src/url_util.h" 29 #include "googleurl/src/url_util.h"
28 #include "net/base/escape.h" 30 #include "net/base/escape.h"
29 #include "net/base/net_util.h" 31 #include "net/base/net_util.h"
30 #include "third_party/protobuf/src/google/protobuf/repeated_field.h" 32 #include "third_party/protobuf/src/google/protobuf/repeated_field.h"
31 #include "ui/base/l10n/l10n_util.h" 33 #include "ui/base/l10n/l10n_util.h"
32 34
33 using google::protobuf::RepeatedField; 35 using google::protobuf::RepeatedField;
34 using google::protobuf::RepeatedPtrField; 36 using google::protobuf::RepeatedPtrField;
35 using in_memory_url_index::InMemoryURLIndexCacheItem; 37 using in_memory_url_index::InMemoryURLIndexCacheItem;
(...skipping 16 matching lines...) Expand all
52 HistoryInfoMapEntry; 54 HistoryInfoMapEntry;
53 55
54 const size_t InMemoryURLIndex::kNoCachedResultForTerm = -1; 56 const size_t InMemoryURLIndex::kNoCachedResultForTerm = -1;
55 57
56 // Score ranges used to get a 'base' score for each of the scoring factors 58 // Score ranges used to get a 'base' score for each of the scoring factors
57 // (such as recency of last visit, times visited, times the URL was typed, 59 // (such as recency of last visit, times visited, times the URL was typed,
58 // and the quality of the string match). There is a matching value range for 60 // and the quality of the string match). There is a matching value range for
59 // each of these scores for each factor. 61 // each of these scores for each factor.
60 const int kScoreRank[] = { 1425, 1200, 900, 400 }; 62 const int kScoreRank[] = { 1425, 1200, 900, 400 };
61 63
62 ScoredHistoryMatch::ScoredHistoryMatch()
63 : raw_score(0),
64 can_inline(false) {}
65
66 ScoredHistoryMatch::ScoredHistoryMatch(const URLRow& url_info)
67 : HistoryMatch(url_info, 0, false, false),
68 raw_score(0),
69 can_inline(false) {}
70
71 ScoredHistoryMatch::~ScoredHistoryMatch() {}
72
73 // Comparison function for sorting ScoredMatches by their scores.
74 bool ScoredHistoryMatch::MatchScoreGreater(const ScoredHistoryMatch& m1,
75 const ScoredHistoryMatch& m2) {
76 return m1.raw_score >= m2.raw_score;
77 }
78
79 InMemoryURLIndex::SearchTermCacheItem::SearchTermCacheItem( 64 InMemoryURLIndex::SearchTermCacheItem::SearchTermCacheItem(
80 const WordIDSet& word_id_set, 65 const WordIDSet& word_id_set,
81 const HistoryIDSet& history_id_set) 66 const HistoryIDSet& history_id_set)
82 : word_id_set_(word_id_set), 67 : word_id_set_(word_id_set),
83 history_id_set_(history_id_set), 68 history_id_set_(history_id_set),
84 used_(true) {} 69 used_(true) {}
85 70
86 InMemoryURLIndex::SearchTermCacheItem::SearchTermCacheItem() 71 InMemoryURLIndex::SearchTermCacheItem::SearchTermCacheItem()
87 : used_(true) {} 72 : used_(true) {}
88 73
89 InMemoryURLIndex::SearchTermCacheItem::~SearchTermCacheItem() {} 74 InMemoryURLIndex::SearchTermCacheItem::~SearchTermCacheItem() {}
90 75
91 // Comparison function for sorting TermMatches by their offsets.
92 bool MatchOffsetLess(const TermMatch& m1, const TermMatch& m2) {
93 return m1.offset < m2.offset;
94 }
95
96 // Comparison function for sorting search terms by descending length. 76 // Comparison function for sorting search terms by descending length.
97 bool LengthGreater(const string16& string_a, const string16& string_b) { 77 bool LengthGreater(const string16& string_a, const string16& string_b) {
98 return string_a.length() > string_b.length(); 78 return string_a.length() > string_b.length();
99 } 79 }
100 80
101 // std::accumulate helper function to add up TermMatches' lengths. 81 // std::accumulate helper function to add up TermMatches' lengths.
102 int AccumulateMatchLength(int total, const TermMatch& match) { 82 int AccumulateMatchLength(int total, const TermMatch& match) {
103 return total + match.length; 83 return total + match.length;
104 } 84 }
105 85
(...skipping 22 matching lines...) Expand all
128 return 0; 108 return 0;
129 int score = kScoreRank[i]; 109 int score = kScoreRank[i];
130 if (i > 0) { 110 if (i > 0) {
131 score += (value - value_ranks[i]) * 111 score += (value - value_ranks[i]) *
132 (kScoreRank[i - 1] - kScoreRank[i]) / 112 (kScoreRank[i - 1] - kScoreRank[i]) /
133 (value_ranks[i - 1] - value_ranks[i]); 113 (value_ranks[i - 1] - value_ranks[i]);
134 } 114 }
135 return score; 115 return score;
136 } 116 }
137 117
138 InMemoryURLIndex::InMemoryURLIndex(const FilePath& history_dir) 118 InMemoryURLIndex::InMemoryURLIndex(Profile* profile,
119 const FilePath& history_dir)
139 : history_dir_(history_dir), 120 : history_dir_(history_dir),
140 history_item_count_(0) { 121 private_data_(new URLIndexPrivateData) {
141 InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_); 122 InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_);
123 if (profile) {
124 Source<Profile> source(profile);
125 registrar_.Add(this, chrome::NOTIFICATION_HISTORY_URL_VISITED, source);
126 registrar_.Add(this, chrome::NOTIFICATION_HISTORY_TYPED_URLS_MODIFIED,
127 source);
128 registrar_.Add(this, chrome::NOTIFICATION_HISTORY_URLS_DELETED, source);
129 }
142 } 130 }
143 131
144 // Called only by unit tests. 132 // Called only by unit tests.
145 InMemoryURLIndex::InMemoryURLIndex() 133 InMemoryURLIndex::InMemoryURLIndex()
146 : history_item_count_(0) { 134 : private_data_(new URLIndexPrivateData) {
147 InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_); 135 InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_);
148 } 136 }
149 137
150 InMemoryURLIndex::~InMemoryURLIndex() {} 138 InMemoryURLIndex::~InMemoryURLIndex() {}
151 139
152 // static 140 // static
153 void InMemoryURLIndex::InitializeSchemeWhitelist( 141 void InMemoryURLIndex::InitializeSchemeWhitelist(
154 std::set<std::string>* whitelist) { 142 std::set<std::string>* whitelist) {
155 DCHECK(whitelist); 143 DCHECK(whitelist);
156 whitelist->insert(std::string(chrome::kAboutScheme)); 144 whitelist->insert(std::string(chrome::kAboutScheme));
157 whitelist->insert(std::string(chrome::kChromeUIScheme)); 145 whitelist->insert(std::string(chrome::kChromeUIScheme));
158 whitelist->insert(std::string(chrome::kFileScheme)); 146 whitelist->insert(std::string(chrome::kFileScheme));
159 whitelist->insert(std::string(chrome::kFtpScheme)); 147 whitelist->insert(std::string(chrome::kFtpScheme));
160 whitelist->insert(std::string(chrome::kHttpScheme)); 148 whitelist->insert(std::string(chrome::kHttpScheme));
161 whitelist->insert(std::string(chrome::kHttpsScheme)); 149 whitelist->insert(std::string(chrome::kHttpsScheme));
162 whitelist->insert(std::string(chrome::kMailToScheme)); 150 whitelist->insert(std::string(chrome::kMailToScheme));
163 } 151 }
164 152
165 // Indexing 153 // Indexing
166 154
167 bool InMemoryURLIndex::Init(history::URLDatabase* history_db, 155 bool InMemoryURLIndex::Init(URLDatabase* history_db,
168 const std::string& languages) { 156 const std::string& languages) {
169 // TODO(mrossetti): Register for profile/language change notifications. 157 // TODO(mrossetti): Register for profile/language change notifications.
170 languages_ = languages; 158 languages_ = languages;
171 return ReloadFromHistory(history_db, false); 159 return ReloadFromHistory(history_db, false);
172 } 160 }
173 161
174 void InMemoryURLIndex::ShutDown() { 162 void InMemoryURLIndex::ShutDown() {
163 registrar_.RemoveAll();
175 // Write our cache. 164 // Write our cache.
176 SaveToCacheFile(); 165 SaveToCacheFile();
177 } 166 }
178 167
168 void InMemoryURLIndex::Observe(int type,
169 const NotificationSource& source,
170 const NotificationDetails& details) {
171 switch (type) {
172 case chrome::NOTIFICATION_HISTORY_URL_VISITED:
173 OnURLVisited(Details<URLVisitedDetails>(details).ptr());
174 break;
175 case chrome::NOTIFICATION_HISTORY_TYPED_URLS_MODIFIED:
176 OnURLsModified(Details<history::URLsModifiedDetails>(details).ptr());
177 break;
178 case chrome::NOTIFICATION_HISTORY_URLS_DELETED:
179 OnURLsDeleted(Details<history::URLsDeletedDetails>(details).ptr());
180 break;
181 default:
182 // For simplicity, the unit tests send us all notifications, even when
183 // we haven't registered for them, so don't assert here.
184 break;
185 }
186 }
187
188 void InMemoryURLIndex::OnURLVisited(const URLVisitedDetails* details) {
189 UpdateURL(details->row);
190 }
191
192 void InMemoryURLIndex::OnURLsModified(const URLsModifiedDetails* details) {
193 for (std::vector<history::URLRow>::const_iterator row =
194 details->changed_urls.begin();
195 row != details->changed_urls.end(); ++row)
196 UpdateURL(*row);
197 }
198
199 void InMemoryURLIndex::OnURLsDeleted(const URLsDeletedDetails* details) {
200 if (details->all_history) {
201 ClearPrivateData();
202 } else {
203 for (std::vector<URLRow>::const_iterator row = details->rows.begin();
204 row != details->rows.end(); ++row)
205 DeleteURL(*row);
206 }
207 }
208
179 bool InMemoryURLIndex::IndexRow(const URLRow& row) { 209 bool InMemoryURLIndex::IndexRow(const URLRow& row) {
180 const GURL& gurl(row.url()); 210 const GURL& gurl(row.url());
181 211
182 // Index only URLs with a whitelisted scheme. 212 // Index only URLs with a whitelisted scheme.
183 if (!InMemoryURLIndex::URLSchemeIsWhitelisted(gurl)) 213 if (!InMemoryURLIndex::URLSchemeIsWhitelisted(gurl))
184 return true; 214 return true;
185 215
216 URLID row_id = row.id();
217 // Strip out username and password before saving and indexing.
186 string16 url(net::FormatUrl(gurl, languages_, 218 string16 url(net::FormatUrl(gurl, languages_,
187 net::kFormatUrlOmitUsernamePassword, 219 net::kFormatUrlOmitUsernamePassword,
188 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS, 220 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS,
189 NULL, NULL, NULL)); 221 NULL, NULL, NULL));
190 222 HistoryID history_id = static_cast<HistoryID>(row_id);
191 HistoryID history_id = static_cast<HistoryID>(row.id()); 223 DCHECK_LT(history_id, std::numeric_limits<HistoryID>::max());
192 DCHECK_LT(row.id(), std::numeric_limits<HistoryID>::max());
193 224
194 // Add the row for quick lookup in the history info store. 225 // Add the row for quick lookup in the history info store.
195 URLRow new_row(GURL(url), row.id()); 226 URLRow new_row(GURL(url), row_id);
196 new_row.set_visit_count(row.visit_count()); 227 new_row.set_visit_count(row.visit_count());
197 new_row.set_typed_count(row.typed_count()); 228 new_row.set_typed_count(row.typed_count());
198 new_row.set_last_visit(row.last_visit()); 229 new_row.set_last_visit(row.last_visit());
199 new_row.set_title(row.title()); 230 new_row.set_title(row.title());
200 history_info_map_[history_id] = new_row; 231 private_data_->history_info_map_[history_id] = new_row;
201 232
233 // Index the words contained in the URL and title of the row.
234 AddRowWordsToIndex(new_row);
235 ++(private_data_->history_item_count_);
236 return true;
237 }
238
239 void InMemoryURLIndex::AddRowWordsToIndex(const URLRow& row) {
240 HistoryID history_id = static_cast<HistoryID>(row.id());
202 // Split URL into individual, unique words then add in the title words. 241 // Split URL into individual, unique words then add in the title words.
242 const GURL& gurl(row.url());
243 string16 url(net::FormatUrl(gurl, languages_,
244 net::kFormatUrlOmitUsernamePassword,
245 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS,
246 NULL, NULL, NULL));
203 url = base::i18n::ToLower(url); 247 url = base::i18n::ToLower(url);
204 String16Set url_words = WordSetFromString16(url); 248 String16Set url_words = String16SetFromString16(url);
205 String16Set title_words = WordSetFromString16(row.title()); 249 String16Set title_words = String16SetFromString16(row.title());
206 String16Set words; 250 String16Set words;
207 std::set_union(url_words.begin(), url_words.end(), 251 std::set_union(url_words.begin(), url_words.end(),
208 title_words.begin(), title_words.end(), 252 title_words.begin(), title_words.end(),
209 std::insert_iterator<String16Set>(words, words.begin())); 253 std::insert_iterator<String16Set>(words, words.begin()));
210 for (String16Set::iterator word_iter = words.begin(); 254 for (String16Set::iterator word_iter = words.begin();
211 word_iter != words.end(); ++word_iter) 255 word_iter != words.end(); ++word_iter)
212 AddWordToIndex(*word_iter, history_id); 256 AddWordToIndex(*word_iter, history_id);
213 257
214 ++history_item_count_; 258 search_term_cache_.clear(); // Invalidate the term cache.
215 return true; 259 }
260
261 void InMemoryURLIndex::RemoveRowFromIndex(const URLRow& row) {
262 RemoveRowWordsFromIndex(row);
263 HistoryID history_id = static_cast<HistoryID>(row.id());
264 private_data_->history_info_map_.erase(history_id);
265 }
266
267 void InMemoryURLIndex::RemoveRowWordsFromIndex(const URLRow& row) {
268 // Remove the entries in history_id_word_map_ and word_id_history_map_ for
269 // this row.
270 URLIndexPrivateData& private_data(*(private_data_.get()));
271 HistoryID history_id = static_cast<HistoryID>(row.id());
272 WordIDSet word_id_set = private_data.history_id_word_map_[history_id];
273 private_data.history_id_word_map_.erase(history_id);
274
275 // Reconcile any changes to word usage.
276 for (WordIDSet::iterator word_id_iter = word_id_set.begin();
277 word_id_iter != word_id_set.end(); ++word_id_iter) {
278 WordID word_id = *word_id_iter;
279 HistoryIDSet history_ids = private_data.word_id_history_map_[word_id];
Peter Kasting 2011/10/07 17:50:37 Why do you copy this out here, modify the copy, an
mrossetti 2011/10/07 22:24:37 Yeah, great catch! There used to be some more code
280 history_ids.erase(history_id);
281 if (!history_ids.empty()) {
282 // The word is still in use.
283 private_data.word_id_history_map_[word_id] = history_ids;
284 continue;
285 }
286
287 // The word is no longer in use. Reconcile any changes to character usage.
288 string16 word = private_data.word_list_[word_id];
289 Char16Set characters = Char16SetFromString16(word);
290 for (Char16Set::iterator uni_char_iter = characters.begin();
291 uni_char_iter != characters.end(); ++uni_char_iter) {
292 char16 uni_char = *uni_char_iter;
293 WordIDSet word_ids = private_data.char_word_map_[uni_char];
Peter Kasting 2011/10/07 17:50:37 Same comment.
mrossetti 2011/10/07 22:24:37 Done.
294 word_ids.erase(word_id);
295 if (!word_ids.empty()) {
296 // The character is still in use.
297 private_data.char_word_map_[uni_char] = word_ids;
298 continue;
299 }
300 // The character is no longer in use.
301 private_data.char_word_map_.erase(uni_char);
302 }
303
304 // Complete the removal of references to the word.
305 private_data.word_id_history_map_.erase(word_id);
306 private_data.word_map_.erase(word);
307 private_data.word_list_[word_id] = string16();
308 private_data.available_words_.insert(word_id);
309 }
216 } 310 }
217 311
218 bool InMemoryURLIndex::ReloadFromHistory(history::URLDatabase* history_db, 312 bool InMemoryURLIndex::ReloadFromHistory(history::URLDatabase* history_db,
219 bool clear_cache) { 313 bool clear_cache) {
220 ClearPrivateData(); 314 ClearPrivateData();
221 315
222 if (!history_db) 316 if (!history_db)
223 return false; 317 return false;
224 318
225 if (clear_cache || !RestoreFromCacheFile()) { 319 if (clear_cache || !RestoreFromCacheFile()) {
226 base::TimeTicks beginning_time = base::TimeTicks::Now(); 320 base::TimeTicks beginning_time = base::TimeTicks::Now();
227 // The index has to be built from scratch. 321 // The index has to be built from scratch.
228 URLDatabase::URLEnumerator history_enum; 322 URLDatabase::URLEnumerator history_enum;
229 if (!history_db->InitURLEnumeratorForSignificant(&history_enum)) 323 if (!history_db->InitURLEnumeratorForSignificant(&history_enum))
230 return false; 324 return false;
231 URLRow row; 325 URLRow row;
232 while (history_enum.GetNextURL(&row)) { 326 while (history_enum.GetNextURL(&row)) {
233 if (!IndexRow(row)) 327 if (!IndexRow(row))
234 return false; 328 return false;
235 } 329 }
236 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime", 330 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime",
237 base::TimeTicks::Now() - beginning_time); 331 base::TimeTicks::Now() - beginning_time);
238 SaveToCacheFile(); 332 SaveToCacheFile();
239 } 333 }
240 return true; 334 return true;
241 } 335 }
242 336
243 void InMemoryURLIndex::ClearPrivateData() { 337 void InMemoryURLIndex::ClearPrivateData() {
244 history_item_count_ = 0; 338 private_data_->Clear();
245 word_list_.clear();
246 word_map_.clear();
247 char_word_map_.clear();
248 word_id_history_map_.clear();
249 history_info_map_.clear();
250 search_term_cache_.clear(); 339 search_term_cache_.clear();
251 } 340 }
252 341
253 bool InMemoryURLIndex::RestoreFromCacheFile() { 342 bool InMemoryURLIndex::RestoreFromCacheFile() {
254 // TODO(mrossetti): Figure out how to determine if the cache is up-to-date. 343 // TODO(mrossetti): Figure out how to determine if the cache is up-to-date.
255 // That is: ensure that the database has not been modified since the cache 344 // That is: ensure that the database has not been modified since the cache
256 // was last saved. DB file modification date is inadequate. There are no 345 // was last saved. DB file modification date is inadequate. There are no
257 // SQLite table checksums automatically stored. 346 // SQLite table checksums automatically stored.
258 // FIXME(mrossetti): Move File IO to another thread. 347 // FIXME(mrossetti): Move File IO to another thread.
259 base::ThreadRestrictions::ScopedAllowIO allow_io; 348 base::ThreadRestrictions::ScopedAllowIO allow_io;
(...skipping 15 matching lines...) Expand all
275 return false; 364 return false;
276 } 365 }
277 366
278 if (!RestorePrivateData(index_cache)) { 367 if (!RestorePrivateData(index_cache)) {
279 ClearPrivateData(); // Back to square one -- must build from scratch. 368 ClearPrivateData(); // Back to square one -- must build from scratch.
280 return false; 369 return false;
281 } 370 }
282 371
283 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime", 372 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime",
284 base::TimeTicks::Now() - beginning_time); 373 base::TimeTicks::Now() - beginning_time);
285 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems", history_item_count_); 374 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems",
375 private_data_->history_item_count_);
286 UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size()); 376 UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size());
287 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", word_map_.size()); 377 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords",
288 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", char_word_map_.size()); 378 private_data_->word_map_.size());
379 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars",
380 private_data_->char_word_map_.size());
289 return true; 381 return true;
290 } 382 }
291 383
292 bool InMemoryURLIndex::SaveToCacheFile() { 384 bool InMemoryURLIndex::SaveToCacheFile() {
293 // TODO(mrossetti): Move File IO to another thread. 385 // TODO(mrossetti): Move File IO to another thread.
294 base::ThreadRestrictions::ScopedAllowIO allow_io; 386 base::ThreadRestrictions::ScopedAllowIO allow_io;
295 base::TimeTicks beginning_time = base::TimeTicks::Now(); 387 base::TimeTicks beginning_time = base::TimeTicks::Now();
296 InMemoryURLIndexCacheItem index_cache; 388 InMemoryURLIndexCacheItem index_cache;
297 SavePrivateData(&index_cache); 389 SavePrivateData(&index_cache);
298 std::string data; 390 std::string data;
(...skipping 11 matching lines...) Expand all
310 int size = data.size(); 402 int size = data.size();
311 if (file_util::WriteFile(file_path, data.c_str(), size) != size) { 403 if (file_util::WriteFile(file_path, data.c_str(), size) != size) {
312 LOG(WARNING) << "Failed to write " << file_path.value(); 404 LOG(WARNING) << "Failed to write " << file_path.value();
313 return false; 405 return false;
314 } 406 }
315 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexSaveCacheTime", 407 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexSaveCacheTime",
316 base::TimeTicks::Now() - beginning_time); 408 base::TimeTicks::Now() - beginning_time);
317 return true; 409 return true;
318 } 410 }
319 411
320 void InMemoryURLIndex::UpdateURL(URLID row_id, const URLRow& row) { 412 void InMemoryURLIndex::UpdateURL(const URLRow& row) {
321 // The row may or may not already be in our index. If it is not already 413 // The row may or may not already be in our index. If it is not already
322 // indexed and it qualifies then it gets indexed. If it is already 414 // indexed and it qualifies then it gets indexed. If it is already
323 // indexed and still qualifies then it gets updated, otherwise it 415 // indexed and still qualifies then it gets updated, otherwise it
324 // is deleted from the index. 416 // is deleted from the index.
325 HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id); 417 HistoryInfoMap::iterator row_pos =
326 if (row_pos == history_info_map_.end()) { 418 private_data_->history_info_map_.find(row.id());
419 if (row_pos == private_data_->history_info_map_.end()) {
327 // This new row should be indexed if it qualifies. 420 // This new row should be indexed if it qualifies.
328 if (RowQualifiesAsSignificant(row, base::Time())) 421 if (RowQualifiesAsSignificant(row, base::Time()))
329 IndexRow(row); 422 IndexRow(row);
330 } else if (RowQualifiesAsSignificant(row, base::Time())) { 423 } else if (RowQualifiesAsSignificant(row, base::Time())) {
331 // This indexed row still qualifies and will be re-indexed. 424 // This indexed row still qualifies and will be re-indexed.
332 // The url won't have changed but the title, visit count, etc. 425 // The url won't have changed but the title, visit count, etc.
333 // might have changed. 426 // might have changed.
334 URLRow& old_row = row_pos->second; 427 URLRow& old_row = row_pos->second;
335 old_row.set_visit_count(row.visit_count()); 428 old_row.set_visit_count(row.visit_count());
336 old_row.set_typed_count(row.typed_count()); 429 old_row.set_typed_count(row.typed_count());
337 old_row.set_last_visit(row.last_visit()); 430 old_row.set_last_visit(row.last_visit());
338 // TODO(mrossetti): When we start indexing the title the next line 431 // While the URL is guaranteed to remain stable, the title may have change.
Peter Kasting 2011/10/07 17:50:37 Nit: change -> changed
mrossetti 2011/10/07 22:24:37 Done.
339 // will need attention. 432 // If so, then we need to update the index with the changed words.
340 old_row.set_title(row.title()); 433 if (old_row.title() != row.title()) {
434 // Clear all words associated with this row and re-index both the
435 // URL and title.
436 RemoveRowWordsFromIndex(row);
437 old_row.set_title(row.title());
438 AddRowWordsToIndex(old_row);
439 }
341 } else { 440 } else {
342 // This indexed row no longer qualifies and will be de-indexed. 441 // This indexed row no longer qualifies and will be de-indexed by
343 history_info_map_.erase(row_id); 442 // clearing all words associated with this row.
443 RemoveRowFromIndex(row);
344 } 444 }
345 // This invalidates the cache. 445 // This invalidates the cache.
346 search_term_cache_.clear(); 446 search_term_cache_.clear();
347 // TODO(mrossetti): Record this transaction in the cache.
348 } 447 }
349 448
350 void InMemoryURLIndex::DeleteURL(URLID row_id) { 449 void InMemoryURLIndex::DeleteURL(const URLRow& row) {
351 // Note that this does not remove any reference to this row from the 450 RemoveRowFromIndex(row);
352 // word_id_history_map_. That map will continue to contain (and return)
353 // hits against this row until that map is rebuilt, but since the
354 // history_info_map_ no longer references the row no erroneous results
355 // will propagate to the user.
356 history_info_map_.erase(row_id);
357 // This invalidates the word cache. 451 // This invalidates the word cache.
358 search_term_cache_.clear(); 452 search_term_cache_.clear();
359 // TODO(mrossetti): Record this transaction in the cache. 453 // TODO(mrossetti): Record this transaction in the cache.
360 } 454 }
361 455
362 // Searching 456 // Searching
363 457
364 ScoredHistoryMatches InMemoryURLIndex::HistoryItemsForTerms( 458 ScoredHistoryMatches InMemoryURLIndex::HistoryItemsForTerms(
365 const String16Vector& terms) { 459 const String16Vector& terms) {
366 ScoredHistoryMatches scored_items; 460 ScoredHistoryMatches scored_items;
461
462 // Do nothing if we have indexed no words (probably because we've not been
463 // initialized yet).
464 if (private_data_->word_list_.empty())
465 return scored_items;
466
367 if (!terms.empty()) { 467 if (!terms.empty()) {
368 // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep 468 // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep
369 // approach. 469 // approach.
370 ResetSearchTermCache(); 470 ResetSearchTermCache();
371 471
372 // Lowercase the terms. 472 // Lowercase the terms.
373 // TODO(mrossetti): Another opportunity for a transform algorithm. 473 // TODO(mrossetti): Another opportunity for a transform algorithm.
374 String16Vector lower_terms; 474 String16Vector lower_terms;
375 for (String16Vector::const_iterator term_iter = terms.begin(); 475 for (String16Vector::const_iterator term_iter = terms.begin();
376 term_iter != terms.end(); ++term_iter) 476 term_iter != terms.end(); ++term_iter)
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
414 514
415 return scored_items; 515 return scored_items;
416 } 516 }
417 517
418 void InMemoryURLIndex::ResetSearchTermCache() { 518 void InMemoryURLIndex::ResetSearchTermCache() {
419 for (SearchTermCacheMap::iterator iter = search_term_cache_.begin(); 519 for (SearchTermCacheMap::iterator iter = search_term_cache_.begin();
420 iter != search_term_cache_.end(); ++iter) 520 iter != search_term_cache_.end(); ++iter)
421 iter->second.used_ = false; 521 iter->second.used_ = false;
422 } 522 }
423 523
424 InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDSetFromWords( 524 HistoryIDSet InMemoryURLIndex::HistoryIDSetFromWords(
425 const string16& uni_string) { 525 const string16& uni_string) {
426 // Break the terms down into individual terms (words), get the candidate 526 // Break the terms down into individual terms (words), get the candidate
427 // set for each term, and intersect each to get a final candidate list. 527 // set for each term, and intersect each to get a final candidate list.
428 // Note that a single 'term' from the user's perspective might be 528 // Note that a single 'term' from the user's perspective might be
429 // a string like "http://www.somewebsite.com" which, from our perspective, 529 // a string like "http://www.somewebsite.com" which, from our perspective,
430 // is four words: 'http', 'www', 'somewebsite', and 'com'. 530 // is four words: 'http', 'www', 'somewebsite', and 'com'.
431 HistoryIDSet history_id_set; 531 HistoryIDSet history_id_set;
432 String16Vector terms = WordVectorFromString16(uni_string, true); 532 String16Vector terms = String16VectorFromString16(uni_string, true);
433 // Sort the terms into the longest first as such are likely to narrow down 533 // Sort the terms into the longest first as such are likely to narrow down
434 // the results quicker. Also, single character terms are the most expensive 534 // the results quicker. Also, single character terms are the most expensive
435 // to process so save them for last. 535 // to process so save them for last.
436 std::sort(terms.begin(), terms.end(), LengthGreater); 536 std::sort(terms.begin(), terms.end(), LengthGreater);
437 for (String16Vector::iterator iter = terms.begin(); iter != terms.end(); 537 for (String16Vector::iterator iter = terms.begin(); iter != terms.end();
438 ++iter) { 538 ++iter) {
439 string16 uni_word = *iter; 539 string16 uni_word = *iter;
440 HistoryIDSet term_history_set = HistoryIDsForTerm(uni_word); 540 HistoryIDSet term_history_set = HistoryIDsForTerm(uni_word);
441 if (term_history_set.empty()) { 541 if (term_history_set.empty()) {
442 history_id_set.clear(); 542 history_id_set.clear();
443 break; 543 break;
444 } 544 }
445 if (iter == terms.begin()) { 545 if (iter == terms.begin()) {
446 history_id_set.swap(term_history_set); 546 history_id_set.swap(term_history_set);
447 } else { 547 } else {
448 HistoryIDSet new_history_id_set; 548 HistoryIDSet new_history_id_set;
449 std::set_intersection(history_id_set.begin(), history_id_set.end(), 549 std::set_intersection(history_id_set.begin(), history_id_set.end(),
450 term_history_set.begin(), term_history_set.end(), 550 term_history_set.begin(), term_history_set.end(),
451 std::inserter(new_history_id_set, 551 std::inserter(new_history_id_set,
452 new_history_id_set.begin())); 552 new_history_id_set.begin()));
453 history_id_set.swap(new_history_id_set); 553 history_id_set.swap(new_history_id_set);
454 } 554 }
455 } 555 }
456 return history_id_set; 556 return history_id_set;
457 } 557 }
458 558
459 InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm( 559 HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm(
460 const string16& term) { 560 const string16& term) {
461 if (term.empty()) 561 if (term.empty())
462 return HistoryIDSet(); 562 return HistoryIDSet();
463 563
464 // TODO(mrossetti): Consider optimizing for very common terms such as 564 // TODO(mrossetti): Consider optimizing for very common terms such as
465 // 'http[s]', 'www', 'com', etc. Or collect the top 100 more frequently 565 // 'http[s]', 'www', 'com', etc. Or collect the top 100 more frequently
466 // occuring words in the user's searches. 566 // occuring words in the user's searches.
467 567
468 size_t term_length = term.length(); 568 size_t term_length = term.length();
469 InMemoryURLIndex::WordIDSet word_id_set; 569 WordIDSet word_id_set;
470 if (term_length > 1) { 570 if (term_length > 1) {
471 // See if this term or a prefix thereof is present in the cache. 571 // See if this term or a prefix thereof is present in the cache.
472 SearchTermCacheMap::iterator best_prefix(search_term_cache_.end()); 572 SearchTermCacheMap::iterator best_prefix(search_term_cache_.end());
473 for (SearchTermCacheMap::iterator cache_iter = search_term_cache_.begin(); 573 for (SearchTermCacheMap::iterator cache_iter = search_term_cache_.begin();
474 cache_iter != search_term_cache_.end(); ++cache_iter) { 574 cache_iter != search_term_cache_.end(); ++cache_iter) {
475 if (StartsWith(term, cache_iter->first, false) && 575 if (StartsWith(term, cache_iter->first, false) &&
476 (best_prefix == search_term_cache_.end() || 576 (best_prefix == search_term_cache_.end() ||
477 cache_iter->first.length() > best_prefix->first.length())) 577 cache_iter->first.length() > best_prefix->first.length()))
478 best_prefix = cache_iter; 578 best_prefix = cache_iter;
479 } 579 }
(...skipping 25 matching lines...) Expand all
505 605
506 // Filter for each remaining, unique character in the term. 606 // Filter for each remaining, unique character in the term.
507 Char16Set leftover_chars = Char16SetFromString16(leftovers); 607 Char16Set leftover_chars = Char16SetFromString16(leftovers);
508 Char16Set unique_chars; 608 Char16Set unique_chars;
509 std::set_difference(leftover_chars.begin(), leftover_chars.end(), 609 std::set_difference(leftover_chars.begin(), leftover_chars.end(),
510 prefix_chars.begin(), prefix_chars.end(), 610 prefix_chars.begin(), prefix_chars.end(),
511 std::inserter(unique_chars, unique_chars.begin())); 611 std::inserter(unique_chars, unique_chars.begin()));
512 612
513 // Reduce the word set with any leftover, unprocessed characters. 613 // Reduce the word set with any leftover, unprocessed characters.
514 if (!unique_chars.empty()) { 614 if (!unique_chars.empty()) {
515 WordIDSet leftover_set(WordIDSetForTermChars(unique_chars)); 615 WordIDSet leftover_set(
616 private_data_->WordIDSetForTermChars(unique_chars));
516 // We might come up empty on the leftovers. 617 // We might come up empty on the leftovers.
517 if (leftover_set.empty()) { 618 if (leftover_set.empty()) {
518 search_term_cache_[term] = SearchTermCacheItem(); 619 search_term_cache_[term] = SearchTermCacheItem();
519 return HistoryIDSet(); 620 return HistoryIDSet();
520 } 621 }
521 // Or there may not have been a prefix from which to start. 622 // Or there may not have been a prefix from which to start.
522 if (prefix_chars.empty()) { 623 if (prefix_chars.empty()) {
523 word_id_set.swap(leftover_set); 624 word_id_set.swap(leftover_set);
524 } else { 625 } else {
525 WordIDSet new_word_id_set; 626 WordIDSet new_word_id_set;
526 std::set_intersection(word_id_set.begin(), word_id_set.end(), 627 std::set_intersection(word_id_set.begin(), word_id_set.end(),
527 leftover_set.begin(), leftover_set.end(), 628 leftover_set.begin(), leftover_set.end(),
528 std::inserter(new_word_id_set, 629 std::inserter(new_word_id_set,
529 new_word_id_set.begin())); 630 new_word_id_set.begin()));
530 word_id_set.swap(new_word_id_set); 631 word_id_set.swap(new_word_id_set);
531 } 632 }
532 } 633 }
533 634
534 // We must filter the word list because the resulting word set surely 635 // We must filter the word list because the resulting word set surely
535 // contains words which do not have the search term as a proper subset. 636 // contains words which do not have the search term as a proper subset.
536 for (WordIDSet::iterator word_set_iter = word_id_set.begin(); 637 for (WordIDSet::iterator word_set_iter = word_id_set.begin();
537 word_set_iter != word_id_set.end(); ) { 638 word_set_iter != word_id_set.end(); ) {
538 if (word_list_[*word_set_iter].find(term) == string16::npos) 639 if (private_data_->word_list_[*word_set_iter].find(term) ==
640 string16::npos)
539 word_id_set.erase(word_set_iter++); 641 word_id_set.erase(word_set_iter++);
540 else 642 else
541 ++word_set_iter; 643 ++word_set_iter;
542 } 644 }
543 } else { 645 } else {
544 word_id_set = WordIDSetForTermChars(Char16SetFromString16(term)); 646 word_id_set =
647 private_data_->WordIDSetForTermChars(Char16SetFromString16(term));
545 } 648 }
546 649
547 // If any words resulted then we can compose a set of history IDs by unioning 650 // If any words resulted then we can compose a set of history IDs by unioning
548 // the sets from each word. 651 // the sets from each word.
549 HistoryIDSet history_id_set; 652 HistoryIDSet history_id_set;
550 if (!word_id_set.empty()) { 653 if (!word_id_set.empty()) {
551 for (WordIDSet::iterator word_id_iter = word_id_set.begin(); 654 for (WordIDSet::iterator word_id_iter = word_id_set.begin();
552 word_id_iter != word_id_set.end(); ++word_id_iter) { 655 word_id_iter != word_id_set.end(); ++word_id_iter) {
553 WordID word_id = *word_id_iter; 656 WordID word_id = *word_id_iter;
554 WordIDHistoryMap::iterator word_iter = word_id_history_map_.find(word_id); 657 WordIDHistoryMap::iterator word_iter =
555 if (word_iter != word_id_history_map_.end()) { 658 private_data_->word_id_history_map_.find(word_id);
659 if (word_iter != private_data_->word_id_history_map_.end()) {
556 HistoryIDSet& word_history_id_set(word_iter->second); 660 HistoryIDSet& word_history_id_set(word_iter->second);
557 history_id_set.insert(word_history_id_set.begin(), 661 history_id_set.insert(word_history_id_set.begin(),
558 word_history_id_set.end()); 662 word_history_id_set.end());
559 } 663 }
560 } 664 }
561 } 665 }
562 666
563 // Record a new cache entry for this word if the term is longer than 667 // Record a new cache entry for this word if the term is longer than
564 // a single character. 668 // a single character.
565 if (term_length > 1) 669 if (term_length > 1)
566 search_term_cache_[term] = SearchTermCacheItem(word_id_set, history_id_set); 670 search_term_cache_[term] = SearchTermCacheItem(word_id_set, history_id_set);
567 671
568 return history_id_set; 672 return history_id_set;
569 } 673 }
570 674
571 // Utility Functions 675 // Utility Functions
572 676
573 // static
574 InMemoryURLIndex::String16Set InMemoryURLIndex::WordSetFromString16(
575 const string16& uni_string) {
576 const size_t kMaxWordLength = 64;
577 String16Vector words = WordVectorFromString16(uni_string, false);
578 String16Set word_set;
579 for (String16Vector::const_iterator iter = words.begin(); iter != words.end();
580 ++iter)
581 word_set.insert(base::i18n::ToLower(*iter).substr(0, kMaxWordLength));
582 return word_set;
583 }
584
585 // static
586 InMemoryURLIndex::String16Vector InMemoryURLIndex::WordVectorFromString16(
587 const string16& uni_string,
588 bool break_on_space) {
589 base::i18n::BreakIterator iter(
590 uni_string,
591 break_on_space ? base::i18n::BreakIterator::BREAK_SPACE
592 : base::i18n::BreakIterator::BREAK_WORD);
593 String16Vector words;
594 if (!iter.Init())
595 return words;
596 while (iter.Advance()) {
597 if (break_on_space || iter.IsWord()) {
598 string16 word = iter.GetString();
599 if (break_on_space)
600 TrimWhitespace(word, TRIM_ALL, &word);
601 if (!word.empty())
602 words.push_back(word);
603 }
604 }
605 return words;
606 }
607
608 // static
609 InMemoryURLIndex::Char16Set InMemoryURLIndex::Char16SetFromString16(
610 const string16& term) {
611 Char16Set characters;
612 for (string16::const_iterator iter = term.begin(); iter != term.end();
613 ++iter)
614 characters.insert(*iter);
615 return characters;
616 }
617
618 void InMemoryURLIndex::AddWordToIndex(const string16& term, 677 void InMemoryURLIndex::AddWordToIndex(const string16& term,
619 HistoryID history_id) { 678 HistoryID history_id) {
620 WordMap::iterator word_pos = word_map_.find(term); 679 WordMap::iterator word_pos = private_data_->word_map_.find(term);
621 if (word_pos != word_map_.end()) 680 if (word_pos != private_data_->word_map_.end())
622 UpdateWordHistory(word_pos->second, history_id); 681 UpdateWordHistory(word_pos->second, history_id);
623 else 682 else
624 AddWordHistory(term, history_id); 683 AddWordHistory(term, history_id);
625 } 684 }
626 685
627 void InMemoryURLIndex::UpdateWordHistory(WordID word_id, HistoryID history_id) { 686 void InMemoryURLIndex::UpdateWordHistory(WordID word_id, HistoryID history_id) {
628 WordIDHistoryMap::iterator history_pos = word_id_history_map_.find(word_id); 687 WordIDHistoryMap::iterator history_pos =
629 DCHECK(history_pos != word_id_history_map_.end()); 688 private_data_->word_id_history_map_.find(word_id);
630 HistoryIDSet& history_id_set(history_pos->second); 689 DCHECK(history_pos != private_data_->word_id_history_map_.end());
631 history_id_set.insert(history_id); 690 HistoryIDSet& history_id_set(history_pos->second);
691 history_id_set.insert(history_id);
692 private_data_->AddToHistoryIDWordMap(history_id, word_id);
632 } 693 }
633 694
634 // Add a new word to the word list and the word map, and then create a 695 // Add a new word to the word list and the word map, and then create a
635 // new entry in the word/history map. 696 // new entry in the word/history map.
636 void InMemoryURLIndex::AddWordHistory(const string16& term, 697 void InMemoryURLIndex::AddWordHistory(const string16& term,
637 HistoryID history_id) { 698 HistoryID history_id) {
638 word_list_.push_back(term); 699 URLIndexPrivateData& private_data(*(private_data_.get()));
639 WordID word_id = word_list_.size() - 1; 700 WordID word_id = private_data.word_list_.size();
640 word_map_[term] = word_id; 701 if (private_data.available_words_.empty()) {
702 private_data.word_list_.push_back(term);
703 } else {
704 word_id = *(private_data.available_words_.begin());
705 private_data.word_list_[word_id] = term;
706 private_data.available_words_.erase(word_id);
707 }
708 private_data.word_map_[term] = word_id;
709
641 HistoryIDSet history_id_set; 710 HistoryIDSet history_id_set;
642 history_id_set.insert(history_id); 711 history_id_set.insert(history_id);
643 word_id_history_map_[word_id] = history_id_set; 712 private_data.word_id_history_map_[word_id] = history_id_set;
713 private_data_->AddToHistoryIDWordMap(history_id, word_id);
714
644 // For each character in the newly added word (i.e. a word that is not 715 // For each character in the newly added word (i.e. a word that is not
645 // already in the word index), add the word to the character index. 716 // already in the word index), add the word to the character index.
646 Char16Set characters = Char16SetFromString16(term); 717 Char16Set characters = Char16SetFromString16(term);
647 for (Char16Set::iterator uni_char_iter = characters.begin(); 718 for (Char16Set::iterator uni_char_iter = characters.begin();
648 uni_char_iter != characters.end(); ++uni_char_iter) { 719 uni_char_iter != characters.end(); ++uni_char_iter) {
649 char16 uni_char = *uni_char_iter; 720 char16 uni_char = *uni_char_iter;
650 CharWordIDMap::iterator char_iter = char_word_map_.find(uni_char); 721 CharWordIDMap::iterator char_iter =
651 if (char_iter != char_word_map_.end()) { 722 private_data.char_word_map_.find(uni_char);
723 if (char_iter != private_data.char_word_map_.end()) {
652 // Update existing entry in the char/word index. 724 // Update existing entry in the char/word index.
653 WordIDSet& word_id_set(char_iter->second); 725 WordIDSet& word_id_set(char_iter->second);
654 word_id_set.insert(word_id); 726 word_id_set.insert(word_id);
655 } else { 727 } else {
656 // Create a new entry in the char/word index. 728 // Create a new entry in the char/word index.
657 WordIDSet word_id_set; 729 WordIDSet word_id_set;
658 word_id_set.insert(word_id); 730 word_id_set.insert(word_id);
659 char_word_map_[uni_char] = word_id_set; 731 private_data.char_word_map_[uni_char] = word_id_set;
660 } 732 }
661 } 733 }
662 } 734 }
663 735
664 InMemoryURLIndex::WordIDSet InMemoryURLIndex::WordIDSetForTermChars(
665 const Char16Set& term_chars) {
666 WordIDSet word_id_set;
667 for (Char16Set::const_iterator c_iter = term_chars.begin();
668 c_iter != term_chars.end(); ++c_iter) {
669 CharWordIDMap::iterator char_iter = char_word_map_.find(*c_iter);
670 if (char_iter == char_word_map_.end()) {
671 // A character was not found so there are no matching results: bail.
672 word_id_set.clear();
673 break;
674 }
675 WordIDSet& char_word_id_set(char_iter->second);
676 // It is possible for there to no longer be any words associated with
677 // a particular character. Give up in that case.
678 if (char_word_id_set.empty()) {
679 word_id_set.clear();
680 break;
681 }
682
683 if (c_iter == term_chars.begin()) {
684 // First character results becomes base set of results.
685 word_id_set = char_word_id_set;
686 } else {
687 // Subsequent character results get intersected in.
688 WordIDSet new_word_id_set;
689 std::set_intersection(word_id_set.begin(), word_id_set.end(),
690 char_word_id_set.begin(), char_word_id_set.end(),
691 std::inserter(new_word_id_set,
692 new_word_id_set.begin()));
693 word_id_set.swap(new_word_id_set);
694 }
695 }
696 return word_id_set;
697 }
698
699 // static 736 // static
700 TermMatches InMemoryURLIndex::MatchTermInString(const string16& term, 737 // TODO(mrossetti): This can be made a ctor for ScoredHistoryMatch.
701 const string16& string,
702 int term_num) {
703 const size_t kMaxCompareLength = 2048;
704 const string16& short_string = (string.length() > kMaxCompareLength) ?
705 string.substr(0, kMaxCompareLength) : string;
706 TermMatches matches;
707 for (size_t location = short_string.find(term); location != string16::npos;
708 location = short_string.find(term, location + 1)) {
709 matches.push_back(TermMatch(term_num, location, term.length()));
710 }
711 return matches;
712 }
713
714 // static
715 TermMatches InMemoryURLIndex::SortAndDeoverlap(const TermMatches& matches) {
716 if (matches.empty())
717 return matches;
718 TermMatches sorted_matches = matches;
719 std::sort(sorted_matches.begin(), sorted_matches.end(), MatchOffsetLess);
720 TermMatches clean_matches;
721 TermMatch last_match = sorted_matches[0];
722 clean_matches.push_back(last_match);
723 for (TermMatches::const_iterator iter = sorted_matches.begin() + 1;
724 iter != sorted_matches.end(); ++iter) {
725 if (iter->offset >= last_match.offset + last_match.length) {
726 last_match = *iter;
727 clean_matches.push_back(last_match);
728 }
729 }
730 return clean_matches;
731 }
732
733 // static
734 std::vector<size_t> InMemoryURLIndex::OffsetsFromTermMatches(
735 const TermMatches& matches) {
736 std::vector<size_t> offsets;
737 for (TermMatches::const_iterator i = matches.begin(); i != matches.end(); ++i)
738 offsets.push_back(i->offset);
739 return offsets;
740 }
741
742 // static
743 TermMatches InMemoryURLIndex::ReplaceOffsetsInTermMatches(
744 const TermMatches& matches,
745 const std::vector<size_t>& offsets) {
746 DCHECK_EQ(matches.size(), offsets.size());
747 TermMatches new_matches;
748 std::vector<size_t>::const_iterator offset_iter = offsets.begin();
749 for (TermMatches::const_iterator term_iter = matches.begin();
750 term_iter != matches.end(); ++term_iter, ++offset_iter) {
751 if (*offset_iter != string16::npos) {
752 TermMatch new_match(*term_iter);
753 new_match.offset = *offset_iter;
754 new_matches.push_back(new_match);
755 }
756 }
757 return new_matches;
758 }
759
760 // static
761 ScoredHistoryMatch InMemoryURLIndex::ScoredMatchForURL( 738 ScoredHistoryMatch InMemoryURLIndex::ScoredMatchForURL(
762 const URLRow& row, 739 const URLRow& row,
763 const String16Vector& terms) { 740 const String16Vector& terms) {
764 ScoredHistoryMatch match(row); 741 ScoredHistoryMatch match(row);
765 GURL gurl = row.url(); 742 GURL gurl = row.url();
766 if (!gurl.is_valid()) 743 if (!gurl.is_valid())
767 return match; 744 return match;
768 745
769 // Figure out where each search term appears in the URL and/or page title 746 // Figure out where each search term appears in the URL and/or page title
770 // so that we can score as well as provide autocomplete highlighting. 747 // so that we can score as well as provide autocomplete highlighting.
771 string16 url = base::i18n::ToLower(UTF8ToUTF16(gurl.spec())); 748 string16 url = base::i18n::ToLower(UTF8ToUTF16(gurl.spec()));
772 string16 title = base::i18n::ToLower(row.title()); 749 string16 title = base::i18n::ToLower(row.title());
773 int term_num = 0; 750 int term_num = 0;
774 for (String16Vector::const_iterator iter = terms.begin(); iter != terms.end(); 751 for (String16Vector::const_iterator iter = terms.begin(); iter != terms.end();
775 ++iter, ++term_num) { 752 ++iter, ++term_num) {
776 string16 term = *iter; 753 string16 term = *iter;
777 TermMatches url_term_matches = MatchTermInString(term, url, term_num); 754 TermMatches url_term_matches = MatchTermInString(term, url, term_num);
778 TermMatches title_term_matches = MatchTermInString(term, title, term_num); 755 TermMatches title_term_matches = MatchTermInString(term, title, term_num);
779 if (url_term_matches.empty() && title_term_matches.empty()) 756 if (url_term_matches.empty() && title_term_matches.empty())
780 return match; // A term was not found in either URL or title - reject. 757 return match; // A term was not found in either URL or title - reject.
781 match.url_matches.insert(match.url_matches.end(), url_term_matches.begin(), 758 match.url_matches.insert(match.url_matches.end(), url_term_matches.begin(),
782 url_term_matches.end()); 759 url_term_matches.end());
783 match.title_matches.insert(match.title_matches.end(), 760 match.title_matches.insert(match.title_matches.end(),
784 title_term_matches.begin(), 761 title_term_matches.begin(),
785 title_term_matches.end()); 762 title_term_matches.end());
786 } 763 }
787 764
788 // Sort matches by offset and eliminate any which overlap. 765 // Sort matches by offset and eliminate any which overlap.
789 match.url_matches = SortAndDeoverlap(match.url_matches); 766 match.url_matches = SortAndDeoverlapMatches(match.url_matches);
790 match.title_matches = SortAndDeoverlap(match.title_matches); 767 match.title_matches = SortAndDeoverlapMatches(match.title_matches);
791 768
792 // We should not (currently) inline autocomplete a result unless both of the 769 // We should not (currently) inline autocomplete a result unless both of the
793 // following are true: 770 // following are true:
794 // * There is exactly one substring matches in the URL, and 771 // * There is exactly one substring matches in the URL, and
795 // * The one URL match starts at the beginning of the URL. 772 // * The one URL match starts at the beginning of the URL.
796 match.can_inline = 773 match.can_inline =
797 match.url_matches.size() == 1 && match.url_matches[0].offset == 0; 774 match.url_matches.size() == 1 && match.url_matches[0].offset == 0;
798 775
799 // Get partial scores based on term matching. Note that the score for 776 // Get partial scores based on term matching. Note that the score for
800 // each of the URL and title are adjusted by the fraction of the 777 // each of the URL and title are adjusted by the fraction of the
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after
832 int factor[] = {term_score, days_ago_value, visit_count_value, 809 int factor[] = {term_score, days_ago_value, visit_count_value,
833 visit_count_value, typed_count_value, typed_count_value, 810 visit_count_value, typed_count_value, typed_count_value,
834 typed_count_value}; 811 typed_count_value};
835 const int kInsignificantFactors = 2; 812 const int kInsignificantFactors = 2;
836 const int kSignificantFactors = arraysize(factor) - kInsignificantFactors; 813 const int kSignificantFactors = arraysize(factor) - kInsignificantFactors;
837 std::partial_sort(factor, factor + kSignificantFactors, 814 std::partial_sort(factor, factor + kSignificantFactors,
838 factor + arraysize(factor), std::greater<int>()); 815 factor + arraysize(factor), std::greater<int>());
839 for (int i = 0; i < kSignificantFactors; ++i) 816 for (int i = 0; i < kSignificantFactors; ++i)
840 match.raw_score += factor[i]; 817 match.raw_score += factor[i];
841 match.raw_score /= kSignificantFactors; 818 match.raw_score /= kSignificantFactors;
842
843 return match; 819 return match;
844 } 820 }
845 821
846 int InMemoryURLIndex::ScoreComponentForMatches(const TermMatches& matches, 822 int InMemoryURLIndex::ScoreComponentForMatches(const TermMatches& matches,
847 size_t max_length) { 823 size_t max_length) {
848 // TODO(mrossetti): This is good enough for now but must be fine-tuned. 824 // TODO(mrossetti): This is good enough for now but must be fine-tuned.
849 if (matches.empty()) 825 if (matches.empty())
850 return 0; 826 return 0;
851 827
852 // Score component for whether the input terms (if more than one) were found 828 // Score component for whether the input terms (if more than one) were found
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after
894 870
895 // Scale the sum of the three components above into a single score component 871 // Scale the sum of the three components above into a single score component
896 // on the same scale as that used in ScoredMatchForURL(). 872 // on the same scale as that used in ScoredMatchForURL().
897 return ScoreForValue(raw_score, kTermScoreLevel); 873 return ScoreForValue(raw_score, kTermScoreLevel);
898 } 874 }
899 875
900 InMemoryURLIndex::AddHistoryMatch::AddHistoryMatch( 876 InMemoryURLIndex::AddHistoryMatch::AddHistoryMatch(
901 const InMemoryURLIndex& index, 877 const InMemoryURLIndex& index,
902 const String16Vector& lower_terms) 878 const String16Vector& lower_terms)
903 : index_(index), 879 : index_(index),
904 lower_terms_(lower_terms) { 880 lower_terms_(lower_terms) {}
905 }
906 881
907 InMemoryURLIndex::AddHistoryMatch::~AddHistoryMatch() {} 882 InMemoryURLIndex::AddHistoryMatch::~AddHistoryMatch() {}
908 883
909 void InMemoryURLIndex::AddHistoryMatch::operator()( 884 void InMemoryURLIndex::AddHistoryMatch::operator()(const HistoryID history_id) {
910 const InMemoryURLIndex::HistoryID history_id) {
911 HistoryInfoMap::const_iterator hist_pos = 885 HistoryInfoMap::const_iterator hist_pos =
912 index_.history_info_map_.find(history_id); 886 index_.private_data_->history_info_map_.find(history_id);
913 // Note that a history_id may be present in the word_id_history_map_ yet not 887 // Note that a history_id may be present in the word_id_history_map_ yet not
914 // be found in the history_info_map_. This occurs when an item has been 888 // be found in the history_info_map_. This occurs when an item has been
915 // deleted by the user or the item no longer qualifies as a quick result. 889 // deleted by the user or the item no longer qualifies as a quick result.
916 if (hist_pos != index_.history_info_map_.end()) { 890 if (hist_pos != index_.private_data_->history_info_map_.end()) {
917 const URLRow& hist_item = hist_pos->second; 891 const URLRow& hist_item = hist_pos->second;
918 ScoredHistoryMatch match(ScoredMatchForURL(hist_item, lower_terms_)); 892 ScoredHistoryMatch match(ScoredMatchForURL(hist_item, lower_terms_));
919 if (match.raw_score > 0) 893 if (match.raw_score > 0)
920 scored_matches_.push_back(match); 894 scored_matches_.push_back(match);
921 } 895 }
922 } 896 }
923 897
924 bool InMemoryURLIndex::GetCacheFilePath(FilePath* file_path) { 898 bool InMemoryURLIndex::GetCacheFilePath(FilePath* file_path) {
925 if (history_dir_.empty()) 899 if (history_dir_.empty())
926 return false; 900 return false;
927 *file_path = history_dir_.Append(FILE_PATH_LITERAL("History Provider Cache")); 901 *file_path = history_dir_.Append(FILE_PATH_LITERAL("History Provider Cache"));
928 return true; 902 return true;
929 } 903 }
930 904
931 bool InMemoryURLIndex::URLSchemeIsWhitelisted(const GURL& gurl) const { 905 bool InMemoryURLIndex::URLSchemeIsWhitelisted(const GURL& gurl) const {
932 return scheme_whitelist_.find(gurl.scheme()) != scheme_whitelist_.end(); 906 return scheme_whitelist_.find(gurl.scheme()) != scheme_whitelist_.end();
933 } 907 }
934 908
935 void InMemoryURLIndex::SavePrivateData(InMemoryURLIndexCacheItem* cache) const { 909 void InMemoryURLIndex::SavePrivateData(InMemoryURLIndexCacheItem* cache) const {
936 DCHECK(cache); 910 DCHECK(cache);
937 cache->set_timestamp(base::Time::Now().ToInternalValue()); 911 cache->set_timestamp(base::Time::Now().ToInternalValue());
938 cache->set_history_item_count(history_item_count_); 912 cache->set_history_item_count(private_data_->history_item_count_);
939 SaveWordList(cache); 913 SaveWordList(cache);
940 SaveWordMap(cache); 914 SaveWordMap(cache);
941 SaveCharWordMap(cache); 915 SaveCharWordMap(cache);
942 SaveWordIDHistoryMap(cache); 916 SaveWordIDHistoryMap(cache);
943 SaveHistoryInfoMap(cache); 917 SaveHistoryInfoMap(cache);
944 } 918 }
945 919
946 bool InMemoryURLIndex::RestorePrivateData( 920 bool InMemoryURLIndex::RestorePrivateData(
947 const InMemoryURLIndexCacheItem& cache) { 921 const InMemoryURLIndexCacheItem& cache) {
948 last_saved_ = base::Time::FromInternalValue(cache.timestamp()); 922 last_saved_ = base::Time::FromInternalValue(cache.timestamp());
949 history_item_count_ = cache.history_item_count(); 923 private_data_->history_item_count_ = cache.history_item_count();
950 return (history_item_count_ == 0) || (RestoreWordList(cache) && 924 return (private_data_->history_item_count_ == 0) || (RestoreWordList(cache) &&
951 RestoreWordMap(cache) && RestoreCharWordMap(cache) && 925 RestoreWordMap(cache) && RestoreCharWordMap(cache) &&
952 RestoreWordIDHistoryMap(cache) && RestoreHistoryInfoMap(cache)); 926 RestoreWordIDHistoryMap(cache) && RestoreHistoryInfoMap(cache));
953 } 927 }
954 928
955
956 void InMemoryURLIndex::SaveWordList(InMemoryURLIndexCacheItem* cache) const { 929 void InMemoryURLIndex::SaveWordList(InMemoryURLIndexCacheItem* cache) const {
957 if (word_list_.empty()) 930 if (private_data_->word_list_.empty())
958 return; 931 return;
959 WordListItem* list_item = cache->mutable_word_list(); 932 WordListItem* list_item = cache->mutable_word_list();
960 list_item->set_word_count(word_list_.size()); 933 list_item->set_word_count(private_data_->word_list_.size());
961 for (String16Vector::const_iterator iter = word_list_.begin(); 934 for (String16Vector::const_iterator iter = private_data_->word_list_.begin();
962 iter != word_list_.end(); ++iter) 935 iter != private_data_->word_list_.end(); ++iter)
963 list_item->add_word(UTF16ToUTF8(*iter)); 936 list_item->add_word(UTF16ToUTF8(*iter));
964 } 937 }
965 938
966 bool InMemoryURLIndex::RestoreWordList(const InMemoryURLIndexCacheItem& cache) { 939 bool InMemoryURLIndex::RestoreWordList(const InMemoryURLIndexCacheItem& cache) {
967 if (!cache.has_word_list()) 940 if (!cache.has_word_list())
968 return false; 941 return false;
969 const WordListItem& list_item(cache.word_list()); 942 const WordListItem& list_item(cache.word_list());
970 uint32 expected_item_count = list_item.word_count(); 943 uint32 expected_item_count = list_item.word_count();
971 uint32 actual_item_count = list_item.word_size(); 944 uint32 actual_item_count = list_item.word_size();
972 if (actual_item_count == 0 || actual_item_count != expected_item_count) 945 if (actual_item_count == 0 || actual_item_count != expected_item_count)
973 return false; 946 return false;
974 const RepeatedPtrField<std::string>& words(list_item.word()); 947 const RepeatedPtrField<std::string>& words(list_item.word());
975 for (RepeatedPtrField<std::string>::const_iterator iter = words.begin(); 948 for (RepeatedPtrField<std::string>::const_iterator iter = words.begin();
976 iter != words.end(); ++iter) 949 iter != words.end(); ++iter)
977 word_list_.push_back(UTF8ToUTF16(*iter)); 950 private_data_->word_list_.push_back(UTF8ToUTF16(*iter));
978 return true; 951 return true;
979 } 952 }
980 953
981 void InMemoryURLIndex::SaveWordMap(InMemoryURLIndexCacheItem* cache) const { 954 void InMemoryURLIndex::SaveWordMap(InMemoryURLIndexCacheItem* cache) const {
982 if (word_map_.empty()) 955 if (private_data_->word_map_.empty())
983 return; 956 return;
984 WordMapItem* map_item = cache->mutable_word_map(); 957 WordMapItem* map_item = cache->mutable_word_map();
985 map_item->set_item_count(word_map_.size()); 958 map_item->set_item_count(private_data_->word_map_.size());
986 for (WordMap::const_iterator iter = word_map_.begin(); 959 for (WordMap::const_iterator iter = private_data_->word_map_.begin();
987 iter != word_map_.end(); ++iter) { 960 iter != private_data_->word_map_.end(); ++iter) {
988 WordMapEntry* map_entry = map_item->add_word_map_entry(); 961 WordMapEntry* map_entry = map_item->add_word_map_entry();
989 map_entry->set_word(UTF16ToUTF8(iter->first)); 962 map_entry->set_word(UTF16ToUTF8(iter->first));
990 map_entry->set_word_id(iter->second); 963 map_entry->set_word_id(iter->second);
991 } 964 }
992 } 965 }
993 966
994 bool InMemoryURLIndex::RestoreWordMap(const InMemoryURLIndexCacheItem& cache) { 967 bool InMemoryURLIndex::RestoreWordMap(const InMemoryURLIndexCacheItem& cache) {
995 if (!cache.has_word_map()) 968 if (!cache.has_word_map())
996 return false; 969 return false;
997 const WordMapItem& list_item(cache.word_map()); 970 const WordMapItem& list_item(cache.word_map());
998 uint32 expected_item_count = list_item.item_count(); 971 uint32 expected_item_count = list_item.item_count();
999 uint32 actual_item_count = list_item.word_map_entry_size(); 972 uint32 actual_item_count = list_item.word_map_entry_size();
1000 if (actual_item_count == 0 || actual_item_count != expected_item_count) 973 if (actual_item_count == 0 || actual_item_count != expected_item_count)
1001 return false; 974 return false;
1002 const RepeatedPtrField<WordMapEntry>& entries(list_item.word_map_entry()); 975 const RepeatedPtrField<WordMapEntry>& entries(list_item.word_map_entry());
1003 for (RepeatedPtrField<WordMapEntry>::const_iterator iter = entries.begin(); 976 for (RepeatedPtrField<WordMapEntry>::const_iterator iter = entries.begin();
1004 iter != entries.end(); ++iter) 977 iter != entries.end(); ++iter)
1005 word_map_[UTF8ToUTF16(iter->word())] = iter->word_id(); 978 private_data_->word_map_[UTF8ToUTF16(iter->word())] = iter->word_id();
1006 return true; 979 return true;
1007 } 980 }
1008 981
1009 void InMemoryURLIndex::SaveCharWordMap(InMemoryURLIndexCacheItem* cache) const { 982 void InMemoryURLIndex::SaveCharWordMap(InMemoryURLIndexCacheItem* cache) const {
1010 if (char_word_map_.empty()) 983 if (private_data_->char_word_map_.empty())
1011 return; 984 return;
1012 CharWordMapItem* map_item = cache->mutable_char_word_map(); 985 CharWordMapItem* map_item = cache->mutable_char_word_map();
1013 map_item->set_item_count(char_word_map_.size()); 986 map_item->set_item_count(private_data_->char_word_map_.size());
1014 for (CharWordIDMap::const_iterator iter = char_word_map_.begin(); 987 for (CharWordIDMap::const_iterator iter =
1015 iter != char_word_map_.end(); ++iter) { 988 private_data_->char_word_map_.begin();
989 iter != private_data_->char_word_map_.end(); ++iter) {
1016 CharWordMapEntry* map_entry = map_item->add_char_word_map_entry(); 990 CharWordMapEntry* map_entry = map_item->add_char_word_map_entry();
1017 map_entry->set_char_16(iter->first); 991 map_entry->set_char_16(iter->first);
1018 const WordIDSet& word_id_set(iter->second); 992 const WordIDSet& word_id_set(iter->second);
1019 map_entry->set_item_count(word_id_set.size()); 993 map_entry->set_item_count(word_id_set.size());
1020 for (WordIDSet::const_iterator set_iter = word_id_set.begin(); 994 for (WordIDSet::const_iterator set_iter = word_id_set.begin();
1021 set_iter != word_id_set.end(); ++set_iter) 995 set_iter != word_id_set.end(); ++set_iter)
1022 map_entry->add_word_id(*set_iter); 996 map_entry->add_word_id(*set_iter);
1023 } 997 }
1024 } 998 }
1025 999
(...skipping 13 matching lines...) Expand all
1039 expected_item_count = iter->item_count(); 1013 expected_item_count = iter->item_count();
1040 actual_item_count = iter->word_id_size(); 1014 actual_item_count = iter->word_id_size();
1041 if (actual_item_count == 0 || actual_item_count != expected_item_count) 1015 if (actual_item_count == 0 || actual_item_count != expected_item_count)
1042 return false; 1016 return false;
1043 char16 uni_char = static_cast<char16>(iter->char_16()); 1017 char16 uni_char = static_cast<char16>(iter->char_16());
1044 WordIDSet word_id_set; 1018 WordIDSet word_id_set;
1045 const RepeatedField<int32>& word_ids(iter->word_id()); 1019 const RepeatedField<int32>& word_ids(iter->word_id());
1046 for (RepeatedField<int32>::const_iterator jiter = word_ids.begin(); 1020 for (RepeatedField<int32>::const_iterator jiter = word_ids.begin();
1047 jiter != word_ids.end(); ++jiter) 1021 jiter != word_ids.end(); ++jiter)
1048 word_id_set.insert(*jiter); 1022 word_id_set.insert(*jiter);
1049 char_word_map_[uni_char] = word_id_set; 1023 private_data_->char_word_map_[uni_char] = word_id_set;
1050 } 1024 }
1051 return true; 1025 return true;
1052 } 1026 }
1053 1027
1054 void InMemoryURLIndex::SaveWordIDHistoryMap(InMemoryURLIndexCacheItem* cache) 1028 void InMemoryURLIndex::SaveWordIDHistoryMap(InMemoryURLIndexCacheItem* cache)
1055 const { 1029 const {
1056 if (word_id_history_map_.empty()) 1030 if (private_data_->word_id_history_map_.empty())
1057 return; 1031 return;
1058 WordIDHistoryMapItem* map_item = cache->mutable_word_id_history_map(); 1032 WordIDHistoryMapItem* map_item = cache->mutable_word_id_history_map();
1059 map_item->set_item_count(word_id_history_map_.size()); 1033 map_item->set_item_count(private_data_->word_id_history_map_.size());
1060 for (WordIDHistoryMap::const_iterator iter = word_id_history_map_.begin(); 1034 for (WordIDHistoryMap::const_iterator iter =
1061 iter != word_id_history_map_.end(); ++iter) { 1035 private_data_->word_id_history_map_.begin();
1036 iter != private_data_->word_id_history_map_.end(); ++iter) {
1062 WordIDHistoryMapEntry* map_entry = 1037 WordIDHistoryMapEntry* map_entry =
1063 map_item->add_word_id_history_map_entry(); 1038 map_item->add_word_id_history_map_entry();
1064 map_entry->set_word_id(iter->first); 1039 map_entry->set_word_id(iter->first);
1065 const HistoryIDSet& history_id_set(iter->second); 1040 const HistoryIDSet& history_id_set(iter->second);
1066 map_entry->set_item_count(history_id_set.size()); 1041 map_entry->set_item_count(history_id_set.size());
1067 for (HistoryIDSet::const_iterator set_iter = history_id_set.begin(); 1042 for (HistoryIDSet::const_iterator set_iter = history_id_set.begin();
1068 set_iter != history_id_set.end(); ++set_iter) 1043 set_iter != history_id_set.end(); ++set_iter)
1069 map_entry->add_history_id(*set_iter); 1044 map_entry->add_history_id(*set_iter);
1070 } 1045 }
1071 } 1046 }
(...skipping 12 matching lines...) Expand all
1084 for (RepeatedPtrField<WordIDHistoryMapEntry>::const_iterator iter = 1059 for (RepeatedPtrField<WordIDHistoryMapEntry>::const_iterator iter =
1085 entries.begin(); iter != entries.end(); ++iter) { 1060 entries.begin(); iter != entries.end(); ++iter) {
1086 expected_item_count = iter->item_count(); 1061 expected_item_count = iter->item_count();
1087 actual_item_count = iter->history_id_size(); 1062 actual_item_count = iter->history_id_size();
1088 if (actual_item_count == 0 || actual_item_count != expected_item_count) 1063 if (actual_item_count == 0 || actual_item_count != expected_item_count)
1089 return false; 1064 return false;
1090 WordID word_id = iter->word_id(); 1065 WordID word_id = iter->word_id();
1091 HistoryIDSet history_id_set; 1066 HistoryIDSet history_id_set;
1092 const RepeatedField<int64>& history_ids(iter->history_id()); 1067 const RepeatedField<int64>& history_ids(iter->history_id());
1093 for (RepeatedField<int64>::const_iterator jiter = history_ids.begin(); 1068 for (RepeatedField<int64>::const_iterator jiter = history_ids.begin();
1094 jiter != history_ids.end(); ++jiter) 1069 jiter != history_ids.end(); ++jiter) {
1095 history_id_set.insert(*jiter); 1070 history_id_set.insert(*jiter);
1096 word_id_history_map_[word_id] = history_id_set; 1071 private_data_->AddToHistoryIDWordMap(*jiter, word_id);
1072 }
1073 private_data_->word_id_history_map_[word_id] = history_id_set;
1097 } 1074 }
1098 return true; 1075 return true;
1099 } 1076 }
1100 1077
1101 void InMemoryURLIndex::SaveHistoryInfoMap( 1078 void InMemoryURLIndex::SaveHistoryInfoMap(
1102 InMemoryURLIndexCacheItem* cache) const { 1079 InMemoryURLIndexCacheItem* cache) const {
1103 if (history_info_map_.empty()) 1080 if (private_data_->history_info_map_.empty())
1104 return; 1081 return;
1105 HistoryInfoMapItem* map_item = cache->mutable_history_info_map(); 1082 HistoryInfoMapItem* map_item = cache->mutable_history_info_map();
1106 map_item->set_item_count(history_info_map_.size()); 1083 map_item->set_item_count(private_data_->history_info_map_.size());
1107 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin(); 1084 for (HistoryInfoMap::const_iterator iter =
1108 iter != history_info_map_.end(); ++iter) { 1085 private_data_->history_info_map_.begin();
1086 iter != private_data_->history_info_map_.end(); ++iter) {
1109 HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry(); 1087 HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry();
1110 map_entry->set_history_id(iter->first); 1088 map_entry->set_history_id(iter->first);
1111 const URLRow& url_row(iter->second); 1089 const URLRow& url_row(iter->second);
1112 // Note: We only save information that contributes to the index so there 1090 // Note: We only save information that contributes to the index so there
1113 // is no need to save search_term_cache_ (not persistent), 1091 // is no need to save search_term_cache_ (not persistent),
1114 // languages_, etc. 1092 // languages_, etc.
1115 map_entry->set_visit_count(url_row.visit_count()); 1093 map_entry->set_visit_count(url_row.visit_count());
1116 map_entry->set_typed_count(url_row.typed_count()); 1094 map_entry->set_typed_count(url_row.typed_count());
1117 map_entry->set_last_visit(url_row.last_visit().ToInternalValue()); 1095 map_entry->set_last_visit(url_row.last_visit().ToInternalValue());
1118 map_entry->set_url(url_row.url().spec()); 1096 map_entry->set_url(url_row.url().spec());
(...skipping 17 matching lines...) Expand all
1136 HistoryID history_id = iter->history_id(); 1114 HistoryID history_id = iter->history_id();
1137 GURL url(iter->url()); 1115 GURL url(iter->url());
1138 URLRow url_row(url, history_id); 1116 URLRow url_row(url, history_id);
1139 url_row.set_visit_count(iter->visit_count()); 1117 url_row.set_visit_count(iter->visit_count());
1140 url_row.set_typed_count(iter->typed_count()); 1118 url_row.set_typed_count(iter->typed_count());
1141 url_row.set_last_visit(base::Time::FromInternalValue(iter->last_visit())); 1119 url_row.set_last_visit(base::Time::FromInternalValue(iter->last_visit()));
1142 if (iter->has_title()) { 1120 if (iter->has_title()) {
1143 string16 title(UTF8ToUTF16(iter->title())); 1121 string16 title(UTF8ToUTF16(iter->title()));
1144 url_row.set_title(title); 1122 url_row.set_title(title);
1145 } 1123 }
1146 history_info_map_[history_id] = url_row; 1124 private_data_->history_info_map_[history_id] = url_row;
1147 } 1125 }
1148 return true; 1126 return true;
1149 } 1127 }
1150 1128
1151 } // namespace history 1129 } // namespace history
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698