OLD | NEW |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/browser/history/in_memory_url_index.h" | 5 #include "chrome/browser/history/in_memory_url_index.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <functional> | 8 #include <functional> |
9 #include <iterator> | 9 #include <iterator> |
10 #include <limits> | 10 #include <limits> |
11 #include <numeric> | 11 #include <numeric> |
12 | 12 |
13 #include "base/file_util.h" | 13 #include "base/file_util.h" |
14 #include "base/i18n/break_iterator.h" | |
15 #include "base/i18n/case_conversion.h" | 14 #include "base/i18n/case_conversion.h" |
16 #include "base/metrics/histogram.h" | 15 #include "base/metrics/histogram.h" |
17 #include "base/string_util.h" | 16 #include "base/string_util.h" |
18 #include "base/threading/thread_restrictions.h" | 17 #include "base/threading/thread_restrictions.h" |
19 #include "base/time.h" | 18 #include "base/time.h" |
20 #include "base/utf_string_conversions.h" | 19 #include "base/utf_string_conversions.h" |
21 #include "chrome/browser/autocomplete/autocomplete.h" | 20 #include "chrome/browser/autocomplete/autocomplete.h" |
22 #include "chrome/browser/autocomplete/history_provider_util.h" | 21 #include "chrome/browser/autocomplete/history_provider_util.h" |
23 #include "chrome/browser/history/url_database.h" | 22 #include "chrome/browser/history/url_database.h" |
24 #include "chrome/browser/profiles/profile.h" | 23 #include "chrome/browser/profiles/profile.h" |
(...skipping 27 matching lines...) Expand all Loading... |
52 HistoryInfoMapEntry; | 51 HistoryInfoMapEntry; |
53 | 52 |
54 const size_t InMemoryURLIndex::kNoCachedResultForTerm = -1; | 53 const size_t InMemoryURLIndex::kNoCachedResultForTerm = -1; |
55 | 54 |
56 // Score ranges used to get a 'base' score for each of the scoring factors | 55 // Score ranges used to get a 'base' score for each of the scoring factors |
57 // (such as recency of last visit, times visited, times the URL was typed, | 56 // (such as recency of last visit, times visited, times the URL was typed, |
58 // and the quality of the string match). There is a matching value range for | 57 // and the quality of the string match). There is a matching value range for |
59 // each of these scores for each factor. | 58 // each of these scores for each factor. |
60 const int kScoreRank[] = { 1425, 1200, 900, 400 }; | 59 const int kScoreRank[] = { 1425, 1200, 900, 400 }; |
61 | 60 |
62 ScoredHistoryMatch::ScoredHistoryMatch() | |
63 : raw_score(0), | |
64 can_inline(false) {} | |
65 | |
66 ScoredHistoryMatch::ScoredHistoryMatch(const URLRow& url_info) | |
67 : HistoryMatch(url_info, 0, false, false), | |
68 raw_score(0), | |
69 can_inline(false) {} | |
70 | |
71 ScoredHistoryMatch::~ScoredHistoryMatch() {} | |
72 | |
73 // Comparison function for sorting ScoredMatches by their scores. | |
74 bool ScoredHistoryMatch::MatchScoreGreater(const ScoredHistoryMatch& m1, | |
75 const ScoredHistoryMatch& m2) { | |
76 return m1.raw_score >= m2.raw_score; | |
77 } | |
78 | |
79 InMemoryURLIndex::SearchTermCacheItem::SearchTermCacheItem( | 61 InMemoryURLIndex::SearchTermCacheItem::SearchTermCacheItem( |
80 const WordIDSet& word_id_set, | 62 const WordIDSet& word_id_set, |
81 const HistoryIDSet& history_id_set) | 63 const HistoryIDSet& history_id_set) |
82 : word_id_set_(word_id_set), | 64 : word_id_set_(word_id_set), |
83 history_id_set_(history_id_set), | 65 history_id_set_(history_id_set), |
84 used_(true) {} | 66 used_(true) {} |
85 | 67 |
86 InMemoryURLIndex::SearchTermCacheItem::SearchTermCacheItem() | 68 InMemoryURLIndex::SearchTermCacheItem::SearchTermCacheItem() |
87 : used_(true) {} | 69 : used_(true) {} |
88 | 70 |
89 InMemoryURLIndex::SearchTermCacheItem::~SearchTermCacheItem() {} | 71 InMemoryURLIndex::SearchTermCacheItem::~SearchTermCacheItem() {} |
90 | 72 |
91 // Comparison function for sorting TermMatches by their offsets. | |
92 bool MatchOffsetLess(const TermMatch& m1, const TermMatch& m2) { | |
93 return m1.offset < m2.offset; | |
94 } | |
95 | |
96 // Comparison function for sorting search terms by descending length. | 73 // Comparison function for sorting search terms by descending length. |
97 bool LengthGreater(const string16& string_a, const string16& string_b) { | 74 bool LengthGreater(const string16& string_a, const string16& string_b) { |
98 return string_a.length() > string_b.length(); | 75 return string_a.length() > string_b.length(); |
99 } | 76 } |
100 | 77 |
101 // std::accumulate helper function to add up TermMatches' lengths. | 78 // std::accumulate helper function to add up TermMatches' lengths. |
102 int AccumulateMatchLength(int total, const TermMatch& match) { | 79 int AccumulateMatchLength(int total, const TermMatch& match) { |
103 return total + match.length; | 80 return total + match.length; |
104 } | 81 } |
105 | 82 |
(...skipping 24 matching lines...) Expand all Loading... |
130 if (i > 0) { | 107 if (i > 0) { |
131 score += (value - value_ranks[i]) * | 108 score += (value - value_ranks[i]) * |
132 (kScoreRank[i - 1] - kScoreRank[i]) / | 109 (kScoreRank[i - 1] - kScoreRank[i]) / |
133 (value_ranks[i - 1] - value_ranks[i]); | 110 (value_ranks[i - 1] - value_ranks[i]); |
134 } | 111 } |
135 return score; | 112 return score; |
136 } | 113 } |
137 | 114 |
138 InMemoryURLIndex::InMemoryURLIndex(const FilePath& history_dir) | 115 InMemoryURLIndex::InMemoryURLIndex(const FilePath& history_dir) |
139 : history_dir_(history_dir), | 116 : history_dir_(history_dir), |
140 history_item_count_(0), | 117 private_data_(new URLIndexPrivateData), |
141 cached_at_shutdown_(false) { | 118 cached_at_shutdown_(false) { |
142 InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_); | 119 InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_); |
143 } | 120 } |
144 | 121 |
145 // Called only by unit tests. | 122 // Called only by unit tests. |
146 InMemoryURLIndex::InMemoryURLIndex() | 123 InMemoryURLIndex::InMemoryURLIndex() |
147 : history_item_count_(0), | 124 : private_data_(new URLIndexPrivateData), |
148 cached_at_shutdown_(false) { | 125 cached_at_shutdown_(false) { |
149 InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_); | 126 InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_); |
150 } | 127 } |
151 | 128 |
152 InMemoryURLIndex::~InMemoryURLIndex() { | 129 InMemoryURLIndex::~InMemoryURLIndex() { |
153 // If there was a history directory (which there won't be for some unit tests) | 130 // If there was a history directory (which there won't be for some unit tests) |
154 // then insure that the cache has already been saved. | 131 // then insure that the cache has already been saved. |
155 DCHECK(history_dir_.empty() || cached_at_shutdown_); | 132 DCHECK(history_dir_.empty() || cached_at_shutdown_); |
156 } | 133 } |
157 | 134 |
158 // static | 135 // static |
159 void InMemoryURLIndex::InitializeSchemeWhitelist( | 136 void InMemoryURLIndex::InitializeSchemeWhitelist( |
160 std::set<std::string>* whitelist) { | 137 std::set<std::string>* whitelist) { |
161 DCHECK(whitelist); | 138 DCHECK(whitelist); |
162 whitelist->insert(std::string(chrome::kAboutScheme)); | 139 whitelist->insert(std::string(chrome::kAboutScheme)); |
163 whitelist->insert(std::string(chrome::kChromeUIScheme)); | 140 whitelist->insert(std::string(chrome::kChromeUIScheme)); |
164 whitelist->insert(std::string(chrome::kFileScheme)); | 141 whitelist->insert(std::string(chrome::kFileScheme)); |
165 whitelist->insert(std::string(chrome::kFtpScheme)); | 142 whitelist->insert(std::string(chrome::kFtpScheme)); |
166 whitelist->insert(std::string(chrome::kHttpScheme)); | 143 whitelist->insert(std::string(chrome::kHttpScheme)); |
167 whitelist->insert(std::string(chrome::kHttpsScheme)); | 144 whitelist->insert(std::string(chrome::kHttpsScheme)); |
168 whitelist->insert(std::string(chrome::kMailToScheme)); | 145 whitelist->insert(std::string(chrome::kMailToScheme)); |
169 } | 146 } |
170 | 147 |
171 // Indexing | 148 // Indexing |
172 | 149 |
173 bool InMemoryURLIndex::Init(history::URLDatabase* history_db, | 150 bool InMemoryURLIndex::Init(URLDatabase* history_db, |
174 const std::string& languages) { | 151 const std::string& languages) { |
175 // TODO(mrossetti): Register for profile/language change notifications. | 152 // TODO(mrossetti): Register for profile/language change notifications. |
176 languages_ = languages; | 153 languages_ = languages; |
177 return ReloadFromHistory(history_db, false); | 154 return ReloadFromHistory(history_db, false); |
178 } | 155 } |
179 | 156 |
180 void InMemoryURLIndex::ShutDown() { | 157 void InMemoryURLIndex::ShutDown() { |
181 // Write our cache. | 158 // Write our cache. |
182 SaveToCacheFile(); | 159 SaveToCacheFile(); |
183 cached_at_shutdown_ = true; | 160 cached_at_shutdown_ = true; |
184 } | 161 } |
185 | 162 |
186 bool InMemoryURLIndex::IndexRow(const URLRow& row) { | 163 void InMemoryURLIndex::IndexRow(const URLRow& row) { |
187 const GURL& gurl(row.url()); | 164 const GURL& gurl(row.url()); |
188 | 165 |
189 // Index only URLs with a whitelisted scheme. | 166 // Index only URLs with a whitelisted scheme. |
190 if (!InMemoryURLIndex::URLSchemeIsWhitelisted(gurl)) | 167 if (!InMemoryURLIndex::URLSchemeIsWhitelisted(gurl)) |
191 return true; | 168 return; |
192 | 169 |
| 170 URLID row_id = row.id(); |
| 171 // Strip out username and password before saving and indexing. |
193 string16 url(net::FormatUrl(gurl, languages_, | 172 string16 url(net::FormatUrl(gurl, languages_, |
194 net::kFormatUrlOmitUsernamePassword, | 173 net::kFormatUrlOmitUsernamePassword, |
195 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS, | 174 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS, |
196 NULL, NULL, NULL)); | 175 NULL, NULL, NULL)); |
197 | 176 |
198 HistoryID history_id = static_cast<HistoryID>(row.id()); | 177 HistoryID history_id = static_cast<HistoryID>(row_id); |
199 DCHECK_LT(row.id(), std::numeric_limits<HistoryID>::max()); | 178 DCHECK_LT(history_id, std::numeric_limits<HistoryID>::max()); |
200 | 179 |
201 // Add the row for quick lookup in the history info store. | 180 // Add the row for quick lookup in the history info store. |
202 URLRow new_row(GURL(url), row.id()); | 181 URLRow new_row(GURL(url), row_id); |
203 new_row.set_visit_count(row.visit_count()); | 182 new_row.set_visit_count(row.visit_count()); |
204 new_row.set_typed_count(row.typed_count()); | 183 new_row.set_typed_count(row.typed_count()); |
205 new_row.set_last_visit(row.last_visit()); | 184 new_row.set_last_visit(row.last_visit()); |
206 new_row.set_title(row.title()); | 185 new_row.set_title(row.title()); |
207 history_info_map_[history_id] = new_row; | 186 private_data_->history_info_map_[history_id] = new_row; |
208 | 187 |
| 188 // Index the words contained in the URL and title of the row. |
| 189 AddRowWordsToIndex(new_row); |
| 190 return; |
| 191 } |
| 192 |
| 193 void InMemoryURLIndex::AddRowWordsToIndex(const URLRow& row) { |
| 194 HistoryID history_id = static_cast<HistoryID>(row.id()); |
209 // Split URL into individual, unique words then add in the title words. | 195 // Split URL into individual, unique words then add in the title words. |
| 196 const GURL& gurl(row.url()); |
| 197 string16 url(net::FormatUrl(gurl, languages_, |
| 198 net::kFormatUrlOmitUsernamePassword, |
| 199 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS, |
| 200 NULL, NULL, NULL)); |
210 url = base::i18n::ToLower(url); | 201 url = base::i18n::ToLower(url); |
211 String16Set url_words = WordSetFromString16(url); | 202 String16Set url_words = String16SetFromString16(url); |
212 String16Set title_words = WordSetFromString16(row.title()); | 203 String16Set title_words = String16SetFromString16(row.title()); |
213 String16Set words; | 204 String16Set words; |
214 std::set_union(url_words.begin(), url_words.end(), | 205 std::set_union(url_words.begin(), url_words.end(), |
215 title_words.begin(), title_words.end(), | 206 title_words.begin(), title_words.end(), |
216 std::insert_iterator<String16Set>(words, words.begin())); | 207 std::insert_iterator<String16Set>(words, words.begin())); |
217 for (String16Set::iterator word_iter = words.begin(); | 208 for (String16Set::iterator word_iter = words.begin(); |
218 word_iter != words.end(); ++word_iter) | 209 word_iter != words.end(); ++word_iter) |
219 AddWordToIndex(*word_iter, history_id); | 210 AddWordToIndex(*word_iter, history_id); |
220 | 211 |
221 ++history_item_count_; | 212 search_term_cache_.clear(); // Invalidate the term cache. |
222 return true; | 213 } |
| 214 |
| 215 void InMemoryURLIndex::RemoveRowFromIndex(const URLRow& row) { |
| 216 RemoveRowWordsFromIndex(row); |
| 217 HistoryID history_id = static_cast<HistoryID>(row.id()); |
| 218 private_data_->history_info_map_.erase(history_id); |
| 219 } |
| 220 |
| 221 void InMemoryURLIndex::RemoveRowWordsFromIndex(const URLRow& row) { |
| 222 // Remove the entries in history_id_word_map_ and word_id_history_map_ for |
| 223 // this row. |
| 224 URLIndexPrivateData& private_data(*(private_data_.get())); |
| 225 HistoryID history_id = static_cast<HistoryID>(row.id()); |
| 226 WordIDSet word_id_set = private_data.history_id_word_map_[history_id]; |
| 227 private_data.history_id_word_map_.erase(history_id); |
| 228 |
| 229 // Reconcile any changes to word usage. |
| 230 for (WordIDSet::iterator word_id_iter = word_id_set.begin(); |
| 231 word_id_iter != word_id_set.end(); ++word_id_iter) { |
| 232 WordID word_id = *word_id_iter; |
| 233 private_data.word_id_history_map_[word_id].erase(history_id); |
| 234 if (!private_data.word_id_history_map_[word_id].empty()) |
| 235 continue; // The word is still in use. |
| 236 |
| 237 // The word is no longer in use. Reconcile any changes to character usage. |
| 238 string16 word = private_data.word_list_[word_id]; |
| 239 Char16Set characters = Char16SetFromString16(word); |
| 240 for (Char16Set::iterator uni_char_iter = characters.begin(); |
| 241 uni_char_iter != characters.end(); ++uni_char_iter) { |
| 242 char16 uni_char = *uni_char_iter; |
| 243 private_data.char_word_map_[uni_char].erase(word_id); |
| 244 if (private_data.char_word_map_[uni_char].empty()) |
| 245 private_data.char_word_map_.erase(uni_char); // No longer in use. |
| 246 } |
| 247 |
| 248 // Complete the removal of references to the word. |
| 249 private_data.word_id_history_map_.erase(word_id); |
| 250 private_data.word_map_.erase(word); |
| 251 private_data.word_list_[word_id] = string16(); |
| 252 private_data.available_words_.insert(word_id); |
| 253 } |
223 } | 254 } |
224 | 255 |
225 bool InMemoryURLIndex::ReloadFromHistory(history::URLDatabase* history_db, | 256 bool InMemoryURLIndex::ReloadFromHistory(history::URLDatabase* history_db, |
226 bool clear_cache) { | 257 bool clear_cache) { |
227 ClearPrivateData(); | 258 ClearPrivateData(); |
228 | 259 |
229 if (!history_db) | 260 if (!history_db) |
230 return false; | 261 return false; |
231 | 262 |
232 if (clear_cache || !RestoreFromCacheFile()) { | 263 if (clear_cache || !RestoreFromCacheFile()) { |
233 base::TimeTicks beginning_time = base::TimeTicks::Now(); | 264 base::TimeTicks beginning_time = base::TimeTicks::Now(); |
234 // The index has to be built from scratch. | 265 // The index has to be built from scratch. |
235 URLDatabase::URLEnumerator history_enum; | 266 URLDatabase::URLEnumerator history_enum; |
236 if (!history_db->InitURLEnumeratorForSignificant(&history_enum)) | 267 if (!history_db->InitURLEnumeratorForSignificant(&history_enum)) |
237 return false; | 268 return false; |
238 URLRow row; | 269 URLRow row; |
239 while (history_enum.GetNextURL(&row)) { | 270 while (history_enum.GetNextURL(&row)) |
240 if (!IndexRow(row)) | 271 IndexRow(row); |
241 return false; | |
242 } | |
243 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime", | 272 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime", |
244 base::TimeTicks::Now() - beginning_time); | 273 base::TimeTicks::Now() - beginning_time); |
245 SaveToCacheFile(); | 274 SaveToCacheFile(); |
246 } | 275 } |
247 return true; | 276 return true; |
248 } | 277 } |
249 | 278 |
250 void InMemoryURLIndex::ClearPrivateData() { | 279 void InMemoryURLIndex::ClearPrivateData() { |
251 history_item_count_ = 0; | 280 private_data_->Clear(); |
252 word_list_.clear(); | |
253 word_map_.clear(); | |
254 char_word_map_.clear(); | |
255 word_id_history_map_.clear(); | |
256 history_info_map_.clear(); | |
257 search_term_cache_.clear(); | 281 search_term_cache_.clear(); |
258 } | 282 } |
259 | 283 |
260 bool InMemoryURLIndex::RestoreFromCacheFile() { | 284 bool InMemoryURLIndex::RestoreFromCacheFile() { |
261 // TODO(mrossetti): Figure out how to determine if the cache is up-to-date. | 285 // TODO(mrossetti): Figure out how to determine if the cache is up-to-date. |
262 // That is: ensure that the database has not been modified since the cache | 286 // That is: ensure that the database has not been modified since the cache |
263 // was last saved. DB file modification date is inadequate. There are no | 287 // was last saved. DB file modification date is inadequate. There are no |
264 // SQLite table checksums automatically stored. | 288 // SQLite table checksums automatically stored. |
265 // FIXME(mrossetti): Move File IO to another thread. | 289 // FIXME(mrossetti): Move File IO to another thread. |
266 base::ThreadRestrictions::ScopedAllowIO allow_io; | 290 base::ThreadRestrictions::ScopedAllowIO allow_io; |
(...skipping 15 matching lines...) Expand all Loading... |
282 return false; | 306 return false; |
283 } | 307 } |
284 | 308 |
285 if (!RestorePrivateData(index_cache)) { | 309 if (!RestorePrivateData(index_cache)) { |
286 ClearPrivateData(); // Back to square one -- must build from scratch. | 310 ClearPrivateData(); // Back to square one -- must build from scratch. |
287 return false; | 311 return false; |
288 } | 312 } |
289 | 313 |
290 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime", | 314 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime", |
291 base::TimeTicks::Now() - beginning_time); | 315 base::TimeTicks::Now() - beginning_time); |
292 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems", history_item_count_); | 316 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems", |
| 317 private_data_->history_id_word_map_.size()); |
293 UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size()); | 318 UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size()); |
294 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", word_map_.size()); | 319 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", |
295 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", char_word_map_.size()); | 320 private_data_->word_map_.size()); |
| 321 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", |
| 322 private_data_->char_word_map_.size()); |
296 return true; | 323 return true; |
297 } | 324 } |
298 | 325 |
299 bool InMemoryURLIndex::SaveToCacheFile() { | 326 bool InMemoryURLIndex::SaveToCacheFile() { |
300 // TODO(mrossetti): Move File IO to another thread. | 327 // TODO(mrossetti): Move File IO to another thread. |
301 base::ThreadRestrictions::ScopedAllowIO allow_io; | 328 base::ThreadRestrictions::ScopedAllowIO allow_io; |
302 FilePath file_path; | 329 FilePath file_path; |
303 if (!GetCacheFilePath(&file_path)) | 330 if (!GetCacheFilePath(&file_path)) |
304 return false; | 331 return false; |
305 | 332 |
(...skipping 14 matching lines...) Expand all Loading... |
320 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexSaveCacheTime", | 347 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexSaveCacheTime", |
321 base::TimeTicks::Now() - beginning_time); | 348 base::TimeTicks::Now() - beginning_time); |
322 return true; | 349 return true; |
323 } | 350 } |
324 | 351 |
325 void InMemoryURLIndex::UpdateURL(URLID row_id, const URLRow& row) { | 352 void InMemoryURLIndex::UpdateURL(URLID row_id, const URLRow& row) { |
326 // The row may or may not already be in our index. If it is not already | 353 // The row may or may not already be in our index. If it is not already |
327 // indexed and it qualifies then it gets indexed. If it is already | 354 // indexed and it qualifies then it gets indexed. If it is already |
328 // indexed and still qualifies then it gets updated, otherwise it | 355 // indexed and still qualifies then it gets updated, otherwise it |
329 // is deleted from the index. | 356 // is deleted from the index. |
330 HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id); | 357 HistoryInfoMap::iterator row_pos = |
331 if (row_pos == history_info_map_.end()) { | 358 private_data_->history_info_map_.find(row_id); |
| 359 if (row_pos == private_data_->history_info_map_.end()) { |
332 // This new row should be indexed if it qualifies. | 360 // This new row should be indexed if it qualifies. |
333 if (RowQualifiesAsSignificant(row, base::Time())) | 361 URLRow new_row(row); |
334 IndexRow(row); | 362 new_row.set_id(row_id); |
| 363 if (RowQualifiesAsSignificant(new_row, base::Time())) |
| 364 IndexRow(new_row); |
335 } else if (RowQualifiesAsSignificant(row, base::Time())) { | 365 } else if (RowQualifiesAsSignificant(row, base::Time())) { |
336 // This indexed row still qualifies and will be re-indexed. | 366 // This indexed row still qualifies and will be re-indexed. |
337 // The url won't have changed but the title, visit count, etc. | 367 // The url won't have changed but the title, visit count, etc. |
338 // might have changed. | 368 // might have changed. |
339 URLRow& old_row = row_pos->second; | 369 URLRow& updated_row = row_pos->second; |
340 old_row.set_visit_count(row.visit_count()); | 370 updated_row.set_visit_count(row.visit_count()); |
341 old_row.set_typed_count(row.typed_count()); | 371 updated_row.set_typed_count(row.typed_count()); |
342 old_row.set_last_visit(row.last_visit()); | 372 updated_row.set_last_visit(row.last_visit()); |
343 // TODO(mrossetti): When we start indexing the title the next line | 373 // While the URL is guaranteed to remain stable, the title may have changed. |
344 // will need attention. | 374 // If so, then we need to update the index with the changed words. |
345 old_row.set_title(row.title()); | 375 if (updated_row.title() != row.title()) { |
| 376 // Clear all words associated with this row and re-index both the |
| 377 // URL and title. |
| 378 RemoveRowWordsFromIndex(updated_row); |
| 379 updated_row.set_title(row.title()); |
| 380 AddRowWordsToIndex(updated_row); |
| 381 } |
346 } else { | 382 } else { |
347 // This indexed row no longer qualifies and will be de-indexed. | 383 // This indexed row no longer qualifies and will be de-indexed by |
348 history_info_map_.erase(row_id); | 384 // clearing all words associated with this row. |
| 385 URLRow& removed_row = row_pos->second; |
| 386 RemoveRowFromIndex(removed_row); |
349 } | 387 } |
350 // This invalidates the cache. | 388 // This invalidates the cache. |
351 search_term_cache_.clear(); | 389 search_term_cache_.clear(); |
352 // TODO(mrossetti): Record this transaction in the cache. | |
353 } | 390 } |
354 | 391 |
355 void InMemoryURLIndex::DeleteURL(URLID row_id) { | 392 void InMemoryURLIndex::DeleteURL(URLID row_id) { |
356 // Note that this does not remove any reference to this row from the | 393 // Note that this does not remove any reference to this row from the |
357 // word_id_history_map_. That map will continue to contain (and return) | 394 // word_id_history_map_. That map will continue to contain (and return) |
358 // hits against this row until that map is rebuilt, but since the | 395 // hits against this row until that map is rebuilt, but since the |
359 // history_info_map_ no longer references the row no erroneous results | 396 // history_info_map_ no longer references the row no erroneous results |
360 // will propagate to the user. | 397 // will propagate to the user. |
361 history_info_map_.erase(row_id); | 398 private_data_->history_info_map_.erase(row_id); |
362 // This invalidates the word cache. | 399 // This invalidates the word cache. |
363 search_term_cache_.clear(); | 400 search_term_cache_.clear(); |
364 // TODO(mrossetti): Record this transaction in the cache. | |
365 } | 401 } |
366 | 402 |
367 // Searching | 403 // Searching |
368 | 404 |
369 ScoredHistoryMatches InMemoryURLIndex::HistoryItemsForTerms( | 405 ScoredHistoryMatches InMemoryURLIndex::HistoryItemsForTerms( |
370 const String16Vector& terms) { | 406 const String16Vector& terms) { |
371 ScoredHistoryMatches scored_items; | 407 ScoredHistoryMatches scored_items; |
| 408 |
| 409 // Do nothing if we have indexed no words (probably because we've not been |
| 410 // initialized yet). |
| 411 if (private_data_->word_list_.empty()) |
| 412 return scored_items; |
| 413 |
372 if (!terms.empty()) { | 414 if (!terms.empty()) { |
373 // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep | 415 // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep |
374 // approach. | 416 // approach. |
375 ResetSearchTermCache(); | 417 ResetSearchTermCache(); |
376 | 418 |
377 // Lowercase the terms. | 419 // Lowercase the terms. |
378 // TODO(mrossetti): Another opportunity for a transform algorithm. | 420 // TODO(mrossetti): Another opportunity for a transform algorithm. |
379 String16Vector lower_terms; | 421 String16Vector lower_terms; |
380 for (String16Vector::const_iterator term_iter = terms.begin(); | 422 for (String16Vector::const_iterator term_iter = terms.begin(); |
381 term_iter != terms.end(); ++term_iter) | 423 term_iter != terms.end(); ++term_iter) |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
419 | 461 |
420 return scored_items; | 462 return scored_items; |
421 } | 463 } |
422 | 464 |
423 void InMemoryURLIndex::ResetSearchTermCache() { | 465 void InMemoryURLIndex::ResetSearchTermCache() { |
424 for (SearchTermCacheMap::iterator iter = search_term_cache_.begin(); | 466 for (SearchTermCacheMap::iterator iter = search_term_cache_.begin(); |
425 iter != search_term_cache_.end(); ++iter) | 467 iter != search_term_cache_.end(); ++iter) |
426 iter->second.used_ = false; | 468 iter->second.used_ = false; |
427 } | 469 } |
428 | 470 |
429 InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDSetFromWords( | 471 HistoryIDSet InMemoryURLIndex::HistoryIDSetFromWords( |
430 const string16& uni_string) { | 472 const string16& uni_string) { |
431 // Break the terms down into individual terms (words), get the candidate | 473 // Break the terms down into individual terms (words), get the candidate |
432 // set for each term, and intersect each to get a final candidate list. | 474 // set for each term, and intersect each to get a final candidate list. |
433 // Note that a single 'term' from the user's perspective might be | 475 // Note that a single 'term' from the user's perspective might be |
434 // a string like "http://www.somewebsite.com" which, from our perspective, | 476 // a string like "http://www.somewebsite.com" which, from our perspective, |
435 // is four words: 'http', 'www', 'somewebsite', and 'com'. | 477 // is four words: 'http', 'www', 'somewebsite', and 'com'. |
436 HistoryIDSet history_id_set; | 478 HistoryIDSet history_id_set; |
437 String16Vector terms = WordVectorFromString16(uni_string, true); | 479 String16Vector terms = String16VectorFromString16(uni_string, true); |
438 // Sort the terms into the longest first as such are likely to narrow down | 480 // Sort the terms into the longest first as such are likely to narrow down |
439 // the results quicker. Also, single character terms are the most expensive | 481 // the results quicker. Also, single character terms are the most expensive |
440 // to process so save them for last. | 482 // to process so save them for last. |
441 std::sort(terms.begin(), terms.end(), LengthGreater); | 483 std::sort(terms.begin(), terms.end(), LengthGreater); |
442 for (String16Vector::iterator iter = terms.begin(); iter != terms.end(); | 484 for (String16Vector::iterator iter = terms.begin(); iter != terms.end(); |
443 ++iter) { | 485 ++iter) { |
444 string16 uni_word = *iter; | 486 string16 uni_word = *iter; |
445 HistoryIDSet term_history_set = HistoryIDsForTerm(uni_word); | 487 HistoryIDSet term_history_set = HistoryIDsForTerm(uni_word); |
446 if (term_history_set.empty()) { | 488 if (term_history_set.empty()) { |
447 history_id_set.clear(); | 489 history_id_set.clear(); |
448 break; | 490 break; |
449 } | 491 } |
450 if (iter == terms.begin()) { | 492 if (iter == terms.begin()) { |
451 history_id_set.swap(term_history_set); | 493 history_id_set.swap(term_history_set); |
452 } else { | 494 } else { |
453 HistoryIDSet new_history_id_set; | 495 HistoryIDSet new_history_id_set; |
454 std::set_intersection(history_id_set.begin(), history_id_set.end(), | 496 std::set_intersection(history_id_set.begin(), history_id_set.end(), |
455 term_history_set.begin(), term_history_set.end(), | 497 term_history_set.begin(), term_history_set.end(), |
456 std::inserter(new_history_id_set, | 498 std::inserter(new_history_id_set, |
457 new_history_id_set.begin())); | 499 new_history_id_set.begin())); |
458 history_id_set.swap(new_history_id_set); | 500 history_id_set.swap(new_history_id_set); |
459 } | 501 } |
460 } | 502 } |
461 return history_id_set; | 503 return history_id_set; |
462 } | 504 } |
463 | 505 |
464 InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm( | 506 HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm( |
465 const string16& term) { | 507 const string16& term) { |
466 if (term.empty()) | 508 if (term.empty()) |
467 return HistoryIDSet(); | 509 return HistoryIDSet(); |
468 | 510 |
469 // TODO(mrossetti): Consider optimizing for very common terms such as | 511 // TODO(mrossetti): Consider optimizing for very common terms such as |
470 // 'http[s]', 'www', 'com', etc. Or collect the top 100 more frequently | 512 // 'http[s]', 'www', 'com', etc. Or collect the top 100 more frequently |
471 // occuring words in the user's searches. | 513 // occuring words in the user's searches. |
472 | 514 |
473 size_t term_length = term.length(); | 515 size_t term_length = term.length(); |
474 InMemoryURLIndex::WordIDSet word_id_set; | 516 WordIDSet word_id_set; |
475 if (term_length > 1) { | 517 if (term_length > 1) { |
476 // See if this term or a prefix thereof is present in the cache. | 518 // See if this term or a prefix thereof is present in the cache. |
477 SearchTermCacheMap::iterator best_prefix(search_term_cache_.end()); | 519 SearchTermCacheMap::iterator best_prefix(search_term_cache_.end()); |
478 for (SearchTermCacheMap::iterator cache_iter = search_term_cache_.begin(); | 520 for (SearchTermCacheMap::iterator cache_iter = search_term_cache_.begin(); |
479 cache_iter != search_term_cache_.end(); ++cache_iter) { | 521 cache_iter != search_term_cache_.end(); ++cache_iter) { |
480 if (StartsWith(term, cache_iter->first, false) && | 522 if (StartsWith(term, cache_iter->first, false) && |
481 (best_prefix == search_term_cache_.end() || | 523 (best_prefix == search_term_cache_.end() || |
482 cache_iter->first.length() > best_prefix->first.length())) | 524 cache_iter->first.length() > best_prefix->first.length())) |
483 best_prefix = cache_iter; | 525 best_prefix = cache_iter; |
484 } | 526 } |
(...skipping 25 matching lines...) Expand all Loading... |
510 | 552 |
511 // Filter for each remaining, unique character in the term. | 553 // Filter for each remaining, unique character in the term. |
512 Char16Set leftover_chars = Char16SetFromString16(leftovers); | 554 Char16Set leftover_chars = Char16SetFromString16(leftovers); |
513 Char16Set unique_chars; | 555 Char16Set unique_chars; |
514 std::set_difference(leftover_chars.begin(), leftover_chars.end(), | 556 std::set_difference(leftover_chars.begin(), leftover_chars.end(), |
515 prefix_chars.begin(), prefix_chars.end(), | 557 prefix_chars.begin(), prefix_chars.end(), |
516 std::inserter(unique_chars, unique_chars.begin())); | 558 std::inserter(unique_chars, unique_chars.begin())); |
517 | 559 |
518 // Reduce the word set with any leftover, unprocessed characters. | 560 // Reduce the word set with any leftover, unprocessed characters. |
519 if (!unique_chars.empty()) { | 561 if (!unique_chars.empty()) { |
520 WordIDSet leftover_set(WordIDSetForTermChars(unique_chars)); | 562 WordIDSet leftover_set( |
| 563 private_data_->WordIDSetForTermChars(unique_chars)); |
521 // We might come up empty on the leftovers. | 564 // We might come up empty on the leftovers. |
522 if (leftover_set.empty()) { | 565 if (leftover_set.empty()) { |
523 search_term_cache_[term] = SearchTermCacheItem(); | 566 search_term_cache_[term] = SearchTermCacheItem(); |
524 return HistoryIDSet(); | 567 return HistoryIDSet(); |
525 } | 568 } |
526 // Or there may not have been a prefix from which to start. | 569 // Or there may not have been a prefix from which to start. |
527 if (prefix_chars.empty()) { | 570 if (prefix_chars.empty()) { |
528 word_id_set.swap(leftover_set); | 571 word_id_set.swap(leftover_set); |
529 } else { | 572 } else { |
530 WordIDSet new_word_id_set; | 573 WordIDSet new_word_id_set; |
531 std::set_intersection(word_id_set.begin(), word_id_set.end(), | 574 std::set_intersection(word_id_set.begin(), word_id_set.end(), |
532 leftover_set.begin(), leftover_set.end(), | 575 leftover_set.begin(), leftover_set.end(), |
533 std::inserter(new_word_id_set, | 576 std::inserter(new_word_id_set, |
534 new_word_id_set.begin())); | 577 new_word_id_set.begin())); |
535 word_id_set.swap(new_word_id_set); | 578 word_id_set.swap(new_word_id_set); |
536 } | 579 } |
537 } | 580 } |
538 | 581 |
539 // We must filter the word list because the resulting word set surely | 582 // We must filter the word list because the resulting word set surely |
540 // contains words which do not have the search term as a proper subset. | 583 // contains words which do not have the search term as a proper subset. |
541 for (WordIDSet::iterator word_set_iter = word_id_set.begin(); | 584 for (WordIDSet::iterator word_set_iter = word_id_set.begin(); |
542 word_set_iter != word_id_set.end(); ) { | 585 word_set_iter != word_id_set.end(); ) { |
543 if (word_list_[*word_set_iter].find(term) == string16::npos) | 586 if (private_data_->word_list_[*word_set_iter].find(term) == |
| 587 string16::npos) |
544 word_id_set.erase(word_set_iter++); | 588 word_id_set.erase(word_set_iter++); |
545 else | 589 else |
546 ++word_set_iter; | 590 ++word_set_iter; |
547 } | 591 } |
548 } else { | 592 } else { |
549 word_id_set = WordIDSetForTermChars(Char16SetFromString16(term)); | 593 word_id_set = |
| 594 private_data_->WordIDSetForTermChars(Char16SetFromString16(term)); |
550 } | 595 } |
551 | 596 |
552 // If any words resulted then we can compose a set of history IDs by unioning | 597 // If any words resulted then we can compose a set of history IDs by unioning |
553 // the sets from each word. | 598 // the sets from each word. |
554 HistoryIDSet history_id_set; | 599 HistoryIDSet history_id_set; |
555 if (!word_id_set.empty()) { | 600 if (!word_id_set.empty()) { |
556 for (WordIDSet::iterator word_id_iter = word_id_set.begin(); | 601 for (WordIDSet::iterator word_id_iter = word_id_set.begin(); |
557 word_id_iter != word_id_set.end(); ++word_id_iter) { | 602 word_id_iter != word_id_set.end(); ++word_id_iter) { |
558 WordID word_id = *word_id_iter; | 603 WordID word_id = *word_id_iter; |
559 WordIDHistoryMap::iterator word_iter = word_id_history_map_.find(word_id); | 604 WordIDHistoryMap::iterator word_iter = |
560 if (word_iter != word_id_history_map_.end()) { | 605 private_data_->word_id_history_map_.find(word_id); |
| 606 if (word_iter != private_data_->word_id_history_map_.end()) { |
561 HistoryIDSet& word_history_id_set(word_iter->second); | 607 HistoryIDSet& word_history_id_set(word_iter->second); |
562 history_id_set.insert(word_history_id_set.begin(), | 608 history_id_set.insert(word_history_id_set.begin(), |
563 word_history_id_set.end()); | 609 word_history_id_set.end()); |
564 } | 610 } |
565 } | 611 } |
566 } | 612 } |
567 | 613 |
568 // Record a new cache entry for this word if the term is longer than | 614 // Record a new cache entry for this word if the term is longer than |
569 // a single character. | 615 // a single character. |
570 if (term_length > 1) | 616 if (term_length > 1) |
571 search_term_cache_[term] = SearchTermCacheItem(word_id_set, history_id_set); | 617 search_term_cache_[term] = SearchTermCacheItem(word_id_set, history_id_set); |
572 | 618 |
573 return history_id_set; | 619 return history_id_set; |
574 } | 620 } |
575 | 621 |
576 // Utility Functions | 622 // Utility Functions |
577 | 623 |
578 // static | |
579 InMemoryURLIndex::String16Set InMemoryURLIndex::WordSetFromString16( | |
580 const string16& uni_string) { | |
581 const size_t kMaxWordLength = 64; | |
582 String16Vector words = WordVectorFromString16(uni_string, false); | |
583 String16Set word_set; | |
584 for (String16Vector::const_iterator iter = words.begin(); iter != words.end(); | |
585 ++iter) | |
586 word_set.insert(base::i18n::ToLower(*iter).substr(0, kMaxWordLength)); | |
587 return word_set; | |
588 } | |
589 | |
590 // static | |
591 InMemoryURLIndex::String16Vector InMemoryURLIndex::WordVectorFromString16( | |
592 const string16& uni_string, | |
593 bool break_on_space) { | |
594 base::i18n::BreakIterator iter( | |
595 uni_string, | |
596 break_on_space ? base::i18n::BreakIterator::BREAK_SPACE | |
597 : base::i18n::BreakIterator::BREAK_WORD); | |
598 String16Vector words; | |
599 if (!iter.Init()) | |
600 return words; | |
601 while (iter.Advance()) { | |
602 if (break_on_space || iter.IsWord()) { | |
603 string16 word = iter.GetString(); | |
604 if (break_on_space) | |
605 TrimWhitespace(word, TRIM_ALL, &word); | |
606 if (!word.empty()) | |
607 words.push_back(word); | |
608 } | |
609 } | |
610 return words; | |
611 } | |
612 | |
613 // static | |
614 InMemoryURLIndex::Char16Set InMemoryURLIndex::Char16SetFromString16( | |
615 const string16& term) { | |
616 Char16Set characters; | |
617 for (string16::const_iterator iter = term.begin(); iter != term.end(); | |
618 ++iter) | |
619 characters.insert(*iter); | |
620 return characters; | |
621 } | |
622 | |
623 void InMemoryURLIndex::AddWordToIndex(const string16& term, | 624 void InMemoryURLIndex::AddWordToIndex(const string16& term, |
624 HistoryID history_id) { | 625 HistoryID history_id) { |
625 WordMap::iterator word_pos = word_map_.find(term); | 626 WordMap::iterator word_pos = private_data_->word_map_.find(term); |
626 if (word_pos != word_map_.end()) | 627 if (word_pos != private_data_->word_map_.end()) |
627 UpdateWordHistory(word_pos->second, history_id); | 628 UpdateWordHistory(word_pos->second, history_id); |
628 else | 629 else |
629 AddWordHistory(term, history_id); | 630 AddWordHistory(term, history_id); |
630 } | 631 } |
631 | 632 |
632 void InMemoryURLIndex::UpdateWordHistory(WordID word_id, HistoryID history_id) { | 633 void InMemoryURLIndex::UpdateWordHistory(WordID word_id, HistoryID history_id) { |
633 WordIDHistoryMap::iterator history_pos = word_id_history_map_.find(word_id); | 634 WordIDHistoryMap::iterator history_pos = |
634 DCHECK(history_pos != word_id_history_map_.end()); | 635 private_data_->word_id_history_map_.find(word_id); |
635 HistoryIDSet& history_id_set(history_pos->second); | 636 DCHECK(history_pos != private_data_->word_id_history_map_.end()); |
636 history_id_set.insert(history_id); | 637 HistoryIDSet& history_id_set(history_pos->second); |
| 638 history_id_set.insert(history_id); |
| 639 private_data_->AddToHistoryIDWordMap(history_id, word_id); |
637 } | 640 } |
638 | 641 |
639 // Add a new word to the word list and the word map, and then create a | 642 // Add a new word to the word list and the word map, and then create a |
640 // new entry in the word/history map. | 643 // new entry in the word/history map. |
641 void InMemoryURLIndex::AddWordHistory(const string16& term, | 644 void InMemoryURLIndex::AddWordHistory(const string16& term, |
642 HistoryID history_id) { | 645 HistoryID history_id) { |
643 word_list_.push_back(term); | 646 URLIndexPrivateData& private_data(*(private_data_.get())); |
644 WordID word_id = word_list_.size() - 1; | 647 WordID word_id = private_data.word_list_.size(); |
645 word_map_[term] = word_id; | 648 if (private_data.available_words_.empty()) { |
| 649 private_data.word_list_.push_back(term); |
| 650 } else { |
| 651 word_id = *(private_data.available_words_.begin()); |
| 652 private_data.word_list_[word_id] = term; |
| 653 private_data.available_words_.erase(word_id); |
| 654 } |
| 655 private_data.word_map_[term] = word_id; |
| 656 |
646 HistoryIDSet history_id_set; | 657 HistoryIDSet history_id_set; |
647 history_id_set.insert(history_id); | 658 history_id_set.insert(history_id); |
648 word_id_history_map_[word_id] = history_id_set; | 659 private_data.word_id_history_map_[word_id] = history_id_set; |
| 660 private_data.AddToHistoryIDWordMap(history_id, word_id); |
| 661 |
649 // For each character in the newly added word (i.e. a word that is not | 662 // For each character in the newly added word (i.e. a word that is not |
650 // already in the word index), add the word to the character index. | 663 // already in the word index), add the word to the character index. |
651 Char16Set characters = Char16SetFromString16(term); | 664 Char16Set characters = Char16SetFromString16(term); |
652 for (Char16Set::iterator uni_char_iter = characters.begin(); | 665 for (Char16Set::iterator uni_char_iter = characters.begin(); |
653 uni_char_iter != characters.end(); ++uni_char_iter) { | 666 uni_char_iter != characters.end(); ++uni_char_iter) { |
654 char16 uni_char = *uni_char_iter; | 667 char16 uni_char = *uni_char_iter; |
655 CharWordIDMap::iterator char_iter = char_word_map_.find(uni_char); | 668 CharWordIDMap::iterator char_iter = |
656 if (char_iter != char_word_map_.end()) { | 669 private_data.char_word_map_.find(uni_char); |
| 670 if (char_iter != private_data.char_word_map_.end()) { |
657 // Update existing entry in the char/word index. | 671 // Update existing entry in the char/word index. |
658 WordIDSet& word_id_set(char_iter->second); | 672 WordIDSet& word_id_set(char_iter->second); |
659 word_id_set.insert(word_id); | 673 word_id_set.insert(word_id); |
660 } else { | 674 } else { |
661 // Create a new entry in the char/word index. | 675 // Create a new entry in the char/word index. |
662 WordIDSet word_id_set; | 676 WordIDSet word_id_set; |
663 word_id_set.insert(word_id); | 677 word_id_set.insert(word_id); |
664 char_word_map_[uni_char] = word_id_set; | 678 private_data.char_word_map_[uni_char] = word_id_set; |
665 } | 679 } |
666 } | 680 } |
667 } | 681 } |
668 | 682 |
669 InMemoryURLIndex::WordIDSet InMemoryURLIndex::WordIDSetForTermChars( | |
670 const Char16Set& term_chars) { | |
671 WordIDSet word_id_set; | |
672 for (Char16Set::const_iterator c_iter = term_chars.begin(); | |
673 c_iter != term_chars.end(); ++c_iter) { | |
674 CharWordIDMap::iterator char_iter = char_word_map_.find(*c_iter); | |
675 if (char_iter == char_word_map_.end()) { | |
676 // A character was not found so there are no matching results: bail. | |
677 word_id_set.clear(); | |
678 break; | |
679 } | |
680 WordIDSet& char_word_id_set(char_iter->second); | |
681 // It is possible for there to no longer be any words associated with | |
682 // a particular character. Give up in that case. | |
683 if (char_word_id_set.empty()) { | |
684 word_id_set.clear(); | |
685 break; | |
686 } | |
687 | |
688 if (c_iter == term_chars.begin()) { | |
689 // First character results becomes base set of results. | |
690 word_id_set = char_word_id_set; | |
691 } else { | |
692 // Subsequent character results get intersected in. | |
693 WordIDSet new_word_id_set; | |
694 std::set_intersection(word_id_set.begin(), word_id_set.end(), | |
695 char_word_id_set.begin(), char_word_id_set.end(), | |
696 std::inserter(new_word_id_set, | |
697 new_word_id_set.begin())); | |
698 word_id_set.swap(new_word_id_set); | |
699 } | |
700 } | |
701 return word_id_set; | |
702 } | |
703 | |
704 // static | 683 // static |
705 TermMatches InMemoryURLIndex::MatchTermInString(const string16& term, | 684 // TODO(mrossetti): This can be made a ctor for ScoredHistoryMatch. |
706 const string16& string, | |
707 int term_num) { | |
708 const size_t kMaxCompareLength = 2048; | |
709 const string16& short_string = (string.length() > kMaxCompareLength) ? | |
710 string.substr(0, kMaxCompareLength) : string; | |
711 TermMatches matches; | |
712 for (size_t location = short_string.find(term); location != string16::npos; | |
713 location = short_string.find(term, location + 1)) { | |
714 matches.push_back(TermMatch(term_num, location, term.length())); | |
715 } | |
716 return matches; | |
717 } | |
718 | |
719 // static | |
720 TermMatches InMemoryURLIndex::SortAndDeoverlap(const TermMatches& matches) { | |
721 if (matches.empty()) | |
722 return matches; | |
723 TermMatches sorted_matches = matches; | |
724 std::sort(sorted_matches.begin(), sorted_matches.end(), MatchOffsetLess); | |
725 TermMatches clean_matches; | |
726 TermMatch last_match = sorted_matches[0]; | |
727 clean_matches.push_back(last_match); | |
728 for (TermMatches::const_iterator iter = sorted_matches.begin() + 1; | |
729 iter != sorted_matches.end(); ++iter) { | |
730 if (iter->offset >= last_match.offset + last_match.length) { | |
731 last_match = *iter; | |
732 clean_matches.push_back(last_match); | |
733 } | |
734 } | |
735 return clean_matches; | |
736 } | |
737 | |
738 // static | |
739 std::vector<size_t> InMemoryURLIndex::OffsetsFromTermMatches( | |
740 const TermMatches& matches) { | |
741 std::vector<size_t> offsets; | |
742 for (TermMatches::const_iterator i = matches.begin(); i != matches.end(); ++i) | |
743 offsets.push_back(i->offset); | |
744 return offsets; | |
745 } | |
746 | |
747 // static | |
748 TermMatches InMemoryURLIndex::ReplaceOffsetsInTermMatches( | |
749 const TermMatches& matches, | |
750 const std::vector<size_t>& offsets) { | |
751 DCHECK_EQ(matches.size(), offsets.size()); | |
752 TermMatches new_matches; | |
753 std::vector<size_t>::const_iterator offset_iter = offsets.begin(); | |
754 for (TermMatches::const_iterator term_iter = matches.begin(); | |
755 term_iter != matches.end(); ++term_iter, ++offset_iter) { | |
756 if (*offset_iter != string16::npos) { | |
757 TermMatch new_match(*term_iter); | |
758 new_match.offset = *offset_iter; | |
759 new_matches.push_back(new_match); | |
760 } | |
761 } | |
762 return new_matches; | |
763 } | |
764 | |
765 // static | |
766 ScoredHistoryMatch InMemoryURLIndex::ScoredMatchForURL( | 685 ScoredHistoryMatch InMemoryURLIndex::ScoredMatchForURL( |
767 const URLRow& row, | 686 const URLRow& row, |
768 const String16Vector& terms) { | 687 const String16Vector& terms) { |
769 ScoredHistoryMatch match(row); | 688 ScoredHistoryMatch match(row); |
770 GURL gurl = row.url(); | 689 GURL gurl = row.url(); |
771 if (!gurl.is_valid()) | 690 if (!gurl.is_valid()) |
772 return match; | 691 return match; |
773 | 692 |
774 // Figure out where each search term appears in the URL and/or page title | 693 // Figure out where each search term appears in the URL and/or page title |
775 // so that we can score as well as provide autocomplete highlighting. | 694 // so that we can score as well as provide autocomplete highlighting. |
776 string16 url = base::i18n::ToLower(UTF8ToUTF16(gurl.spec())); | 695 string16 url = base::i18n::ToLower(UTF8ToUTF16(gurl.spec())); |
777 string16 title = base::i18n::ToLower(row.title()); | 696 string16 title = base::i18n::ToLower(row.title()); |
778 int term_num = 0; | 697 int term_num = 0; |
779 for (String16Vector::const_iterator iter = terms.begin(); iter != terms.end(); | 698 for (String16Vector::const_iterator iter = terms.begin(); iter != terms.end(); |
780 ++iter, ++term_num) { | 699 ++iter, ++term_num) { |
781 string16 term = *iter; | 700 string16 term = *iter; |
782 TermMatches url_term_matches = MatchTermInString(term, url, term_num); | 701 TermMatches url_term_matches = MatchTermInString(term, url, term_num); |
783 TermMatches title_term_matches = MatchTermInString(term, title, term_num); | 702 TermMatches title_term_matches = MatchTermInString(term, title, term_num); |
784 if (url_term_matches.empty() && title_term_matches.empty()) | 703 if (url_term_matches.empty() && title_term_matches.empty()) |
785 return match; // A term was not found in either URL or title - reject. | 704 return match; // A term was not found in either URL or title - reject. |
786 match.url_matches.insert(match.url_matches.end(), url_term_matches.begin(), | 705 match.url_matches.insert(match.url_matches.end(), url_term_matches.begin(), |
787 url_term_matches.end()); | 706 url_term_matches.end()); |
788 match.title_matches.insert(match.title_matches.end(), | 707 match.title_matches.insert(match.title_matches.end(), |
789 title_term_matches.begin(), | 708 title_term_matches.begin(), |
790 title_term_matches.end()); | 709 title_term_matches.end()); |
791 } | 710 } |
792 | 711 |
793 // Sort matches by offset and eliminate any which overlap. | 712 // Sort matches by offset and eliminate any which overlap. |
794 match.url_matches = SortAndDeoverlap(match.url_matches); | 713 match.url_matches = SortAndDeoverlapMatches(match.url_matches); |
795 match.title_matches = SortAndDeoverlap(match.title_matches); | 714 match.title_matches = SortAndDeoverlapMatches(match.title_matches); |
796 | 715 |
797 // We should not (currently) inline autocomplete a result unless both of the | 716 // We should not (currently) inline autocomplete a result unless both of the |
798 // following are true: | 717 // following are true: |
799 // * There is exactly one substring matches in the URL, and | 718 // * There is exactly one substring matches in the URL, and |
800 // * The one URL match starts at the beginning of the URL. | 719 // * The one URL match starts at the beginning of the URL. |
801 match.can_inline = | 720 match.can_inline = |
802 match.url_matches.size() == 1 && match.url_matches[0].offset == 0; | 721 match.url_matches.size() == 1 && match.url_matches[0].offset == 0; |
803 | 722 |
804 // Get partial scores based on term matching. Note that the score for | 723 // Get partial scores based on term matching. Note that the score for |
805 // each of the URL and title are adjusted by the fraction of the | 724 // each of the URL and title are adjusted by the fraction of the |
(...skipping 93 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
899 | 818 |
900 // Scale the sum of the three components above into a single score component | 819 // Scale the sum of the three components above into a single score component |
901 // on the same scale as that used in ScoredMatchForURL(). | 820 // on the same scale as that used in ScoredMatchForURL(). |
902 return ScoreForValue(raw_score, kTermScoreLevel); | 821 return ScoreForValue(raw_score, kTermScoreLevel); |
903 } | 822 } |
904 | 823 |
905 InMemoryURLIndex::AddHistoryMatch::AddHistoryMatch( | 824 InMemoryURLIndex::AddHistoryMatch::AddHistoryMatch( |
906 const InMemoryURLIndex& index, | 825 const InMemoryURLIndex& index, |
907 const String16Vector& lower_terms) | 826 const String16Vector& lower_terms) |
908 : index_(index), | 827 : index_(index), |
909 lower_terms_(lower_terms) { | 828 lower_terms_(lower_terms) {} |
910 } | |
911 | 829 |
912 InMemoryURLIndex::AddHistoryMatch::~AddHistoryMatch() {} | 830 InMemoryURLIndex::AddHistoryMatch::~AddHistoryMatch() {} |
913 | 831 |
914 void InMemoryURLIndex::AddHistoryMatch::operator()( | 832 void InMemoryURLIndex::AddHistoryMatch::operator()(const HistoryID history_id) { |
915 const InMemoryURLIndex::HistoryID history_id) { | |
916 HistoryInfoMap::const_iterator hist_pos = | 833 HistoryInfoMap::const_iterator hist_pos = |
917 index_.history_info_map_.find(history_id); | 834 index_.private_data_->history_info_map_.find(history_id); |
918 // Note that a history_id may be present in the word_id_history_map_ yet not | 835 // Note that a history_id may be present in the word_id_history_map_ yet not |
919 // be found in the history_info_map_. This occurs when an item has been | 836 // be found in the history_info_map_. This occurs when an item has been |
920 // deleted by the user or the item no longer qualifies as a quick result. | 837 // deleted by the user or the item no longer qualifies as a quick result. |
921 if (hist_pos != index_.history_info_map_.end()) { | 838 if (hist_pos != index_.private_data_->history_info_map_.end()) { |
922 const URLRow& hist_item = hist_pos->second; | 839 const URLRow& hist_item = hist_pos->second; |
923 ScoredHistoryMatch match(ScoredMatchForURL(hist_item, lower_terms_)); | 840 ScoredHistoryMatch match(ScoredMatchForURL(hist_item, lower_terms_)); |
924 if (match.raw_score > 0) | 841 if (match.raw_score > 0) |
925 scored_matches_.push_back(match); | 842 scored_matches_.push_back(match); |
926 } | 843 } |
927 } | 844 } |
928 | 845 |
929 bool InMemoryURLIndex::GetCacheFilePath(FilePath* file_path) { | 846 bool InMemoryURLIndex::GetCacheFilePath(FilePath* file_path) { |
930 if (history_dir_.empty()) | 847 if (history_dir_.empty()) |
931 return false; | 848 return false; |
932 *file_path = history_dir_.Append(FILE_PATH_LITERAL("History Provider Cache")); | 849 *file_path = history_dir_.Append(FILE_PATH_LITERAL("History Provider Cache")); |
933 return true; | 850 return true; |
934 } | 851 } |
935 | 852 |
936 bool InMemoryURLIndex::URLSchemeIsWhitelisted(const GURL& gurl) const { | 853 bool InMemoryURLIndex::URLSchemeIsWhitelisted(const GURL& gurl) const { |
937 return scheme_whitelist_.find(gurl.scheme()) != scheme_whitelist_.end(); | 854 return scheme_whitelist_.find(gurl.scheme()) != scheme_whitelist_.end(); |
938 } | 855 } |
939 | 856 |
940 void InMemoryURLIndex::SavePrivateData(InMemoryURLIndexCacheItem* cache) const { | 857 void InMemoryURLIndex::SavePrivateData(InMemoryURLIndexCacheItem* cache) const { |
941 DCHECK(cache); | 858 DCHECK(cache); |
942 cache->set_timestamp(base::Time::Now().ToInternalValue()); | 859 cache->set_timestamp(base::Time::Now().ToInternalValue()); |
943 cache->set_history_item_count(history_item_count_); | 860 // history_item_count_ is no longer used but rather than change the protobuf |
| 861 // definition use a placeholder. This will go away with the switch to SQLite. |
| 862 cache->set_history_item_count(0); |
944 SaveWordList(cache); | 863 SaveWordList(cache); |
945 SaveWordMap(cache); | 864 SaveWordMap(cache); |
946 SaveCharWordMap(cache); | 865 SaveCharWordMap(cache); |
947 SaveWordIDHistoryMap(cache); | 866 SaveWordIDHistoryMap(cache); |
948 SaveHistoryInfoMap(cache); | 867 SaveHistoryInfoMap(cache); |
949 } | 868 } |
950 | 869 |
951 bool InMemoryURLIndex::RestorePrivateData( | 870 bool InMemoryURLIndex::RestorePrivateData( |
952 const InMemoryURLIndexCacheItem& cache) { | 871 const InMemoryURLIndexCacheItem& cache) { |
953 last_saved_ = base::Time::FromInternalValue(cache.timestamp()); | 872 last_saved_ = base::Time::FromInternalValue(cache.timestamp()); |
954 history_item_count_ = cache.history_item_count(); | 873 return RestoreWordList(cache) && RestoreWordMap(cache) && |
955 return (history_item_count_ == 0) || (RestoreWordList(cache) && | 874 RestoreCharWordMap(cache) && RestoreWordIDHistoryMap(cache) && |
956 RestoreWordMap(cache) && RestoreCharWordMap(cache) && | 875 RestoreHistoryInfoMap(cache); |
957 RestoreWordIDHistoryMap(cache) && RestoreHistoryInfoMap(cache)); | |
958 } | 876 } |
959 | 877 |
960 | |
961 void InMemoryURLIndex::SaveWordList(InMemoryURLIndexCacheItem* cache) const { | 878 void InMemoryURLIndex::SaveWordList(InMemoryURLIndexCacheItem* cache) const { |
962 if (word_list_.empty()) | 879 if (private_data_->word_list_.empty()) |
963 return; | 880 return; |
964 WordListItem* list_item = cache->mutable_word_list(); | 881 WordListItem* list_item = cache->mutable_word_list(); |
965 list_item->set_word_count(word_list_.size()); | 882 list_item->set_word_count(private_data_->word_list_.size()); |
966 for (String16Vector::const_iterator iter = word_list_.begin(); | 883 for (String16Vector::const_iterator iter = private_data_->word_list_.begin(); |
967 iter != word_list_.end(); ++iter) | 884 iter != private_data_->word_list_.end(); ++iter) |
968 list_item->add_word(UTF16ToUTF8(*iter)); | 885 list_item->add_word(UTF16ToUTF8(*iter)); |
969 } | 886 } |
970 | 887 |
971 bool InMemoryURLIndex::RestoreWordList(const InMemoryURLIndexCacheItem& cache) { | 888 bool InMemoryURLIndex::RestoreWordList(const InMemoryURLIndexCacheItem& cache) { |
972 if (!cache.has_word_list()) | 889 if (!cache.has_word_list()) |
973 return false; | 890 return false; |
974 const WordListItem& list_item(cache.word_list()); | 891 const WordListItem& list_item(cache.word_list()); |
975 uint32 expected_item_count = list_item.word_count(); | 892 uint32 expected_item_count = list_item.word_count(); |
976 uint32 actual_item_count = list_item.word_size(); | 893 uint32 actual_item_count = list_item.word_size(); |
977 if (actual_item_count == 0 || actual_item_count != expected_item_count) | 894 if (actual_item_count == 0 || actual_item_count != expected_item_count) |
978 return false; | 895 return false; |
979 const RepeatedPtrField<std::string>& words(list_item.word()); | 896 const RepeatedPtrField<std::string>& words(list_item.word()); |
980 for (RepeatedPtrField<std::string>::const_iterator iter = words.begin(); | 897 for (RepeatedPtrField<std::string>::const_iterator iter = words.begin(); |
981 iter != words.end(); ++iter) | 898 iter != words.end(); ++iter) |
982 word_list_.push_back(UTF8ToUTF16(*iter)); | 899 private_data_->word_list_.push_back(UTF8ToUTF16(*iter)); |
983 return true; | 900 return true; |
984 } | 901 } |
985 | 902 |
986 void InMemoryURLIndex::SaveWordMap(InMemoryURLIndexCacheItem* cache) const { | 903 void InMemoryURLIndex::SaveWordMap(InMemoryURLIndexCacheItem* cache) const { |
987 if (word_map_.empty()) | 904 if (private_data_->word_map_.empty()) |
988 return; | 905 return; |
989 WordMapItem* map_item = cache->mutable_word_map(); | 906 WordMapItem* map_item = cache->mutable_word_map(); |
990 map_item->set_item_count(word_map_.size()); | 907 map_item->set_item_count(private_data_->word_map_.size()); |
991 for (WordMap::const_iterator iter = word_map_.begin(); | 908 for (WordMap::const_iterator iter = private_data_->word_map_.begin(); |
992 iter != word_map_.end(); ++iter) { | 909 iter != private_data_->word_map_.end(); ++iter) { |
993 WordMapEntry* map_entry = map_item->add_word_map_entry(); | 910 WordMapEntry* map_entry = map_item->add_word_map_entry(); |
994 map_entry->set_word(UTF16ToUTF8(iter->first)); | 911 map_entry->set_word(UTF16ToUTF8(iter->first)); |
995 map_entry->set_word_id(iter->second); | 912 map_entry->set_word_id(iter->second); |
996 } | 913 } |
997 } | 914 } |
998 | 915 |
999 bool InMemoryURLIndex::RestoreWordMap(const InMemoryURLIndexCacheItem& cache) { | 916 bool InMemoryURLIndex::RestoreWordMap(const InMemoryURLIndexCacheItem& cache) { |
1000 if (!cache.has_word_map()) | 917 if (!cache.has_word_map()) |
1001 return false; | 918 return false; |
1002 const WordMapItem& list_item(cache.word_map()); | 919 const WordMapItem& list_item(cache.word_map()); |
1003 uint32 expected_item_count = list_item.item_count(); | 920 uint32 expected_item_count = list_item.item_count(); |
1004 uint32 actual_item_count = list_item.word_map_entry_size(); | 921 uint32 actual_item_count = list_item.word_map_entry_size(); |
1005 if (actual_item_count == 0 || actual_item_count != expected_item_count) | 922 if (actual_item_count == 0 || actual_item_count != expected_item_count) |
1006 return false; | 923 return false; |
1007 const RepeatedPtrField<WordMapEntry>& entries(list_item.word_map_entry()); | 924 const RepeatedPtrField<WordMapEntry>& entries(list_item.word_map_entry()); |
1008 for (RepeatedPtrField<WordMapEntry>::const_iterator iter = entries.begin(); | 925 for (RepeatedPtrField<WordMapEntry>::const_iterator iter = entries.begin(); |
1009 iter != entries.end(); ++iter) | 926 iter != entries.end(); ++iter) |
1010 word_map_[UTF8ToUTF16(iter->word())] = iter->word_id(); | 927 private_data_->word_map_[UTF8ToUTF16(iter->word())] = iter->word_id(); |
1011 return true; | 928 return true; |
1012 } | 929 } |
1013 | 930 |
1014 void InMemoryURLIndex::SaveCharWordMap(InMemoryURLIndexCacheItem* cache) const { | 931 void InMemoryURLIndex::SaveCharWordMap(InMemoryURLIndexCacheItem* cache) const { |
1015 if (char_word_map_.empty()) | 932 if (private_data_->char_word_map_.empty()) |
1016 return; | 933 return; |
1017 CharWordMapItem* map_item = cache->mutable_char_word_map(); | 934 CharWordMapItem* map_item = cache->mutable_char_word_map(); |
1018 map_item->set_item_count(char_word_map_.size()); | 935 map_item->set_item_count(private_data_->char_word_map_.size()); |
1019 for (CharWordIDMap::const_iterator iter = char_word_map_.begin(); | 936 for (CharWordIDMap::const_iterator iter = |
1020 iter != char_word_map_.end(); ++iter) { | 937 private_data_->char_word_map_.begin(); |
| 938 iter != private_data_->char_word_map_.end(); ++iter) { |
1021 CharWordMapEntry* map_entry = map_item->add_char_word_map_entry(); | 939 CharWordMapEntry* map_entry = map_item->add_char_word_map_entry(); |
1022 map_entry->set_char_16(iter->first); | 940 map_entry->set_char_16(iter->first); |
1023 const WordIDSet& word_id_set(iter->second); | 941 const WordIDSet& word_id_set(iter->second); |
1024 map_entry->set_item_count(word_id_set.size()); | 942 map_entry->set_item_count(word_id_set.size()); |
1025 for (WordIDSet::const_iterator set_iter = word_id_set.begin(); | 943 for (WordIDSet::const_iterator set_iter = word_id_set.begin(); |
1026 set_iter != word_id_set.end(); ++set_iter) | 944 set_iter != word_id_set.end(); ++set_iter) |
1027 map_entry->add_word_id(*set_iter); | 945 map_entry->add_word_id(*set_iter); |
1028 } | 946 } |
1029 } | 947 } |
1030 | 948 |
(...skipping 13 matching lines...) Expand all Loading... |
1044 expected_item_count = iter->item_count(); | 962 expected_item_count = iter->item_count(); |
1045 actual_item_count = iter->word_id_size(); | 963 actual_item_count = iter->word_id_size(); |
1046 if (actual_item_count == 0 || actual_item_count != expected_item_count) | 964 if (actual_item_count == 0 || actual_item_count != expected_item_count) |
1047 return false; | 965 return false; |
1048 char16 uni_char = static_cast<char16>(iter->char_16()); | 966 char16 uni_char = static_cast<char16>(iter->char_16()); |
1049 WordIDSet word_id_set; | 967 WordIDSet word_id_set; |
1050 const RepeatedField<int32>& word_ids(iter->word_id()); | 968 const RepeatedField<int32>& word_ids(iter->word_id()); |
1051 for (RepeatedField<int32>::const_iterator jiter = word_ids.begin(); | 969 for (RepeatedField<int32>::const_iterator jiter = word_ids.begin(); |
1052 jiter != word_ids.end(); ++jiter) | 970 jiter != word_ids.end(); ++jiter) |
1053 word_id_set.insert(*jiter); | 971 word_id_set.insert(*jiter); |
1054 char_word_map_[uni_char] = word_id_set; | 972 private_data_->char_word_map_[uni_char] = word_id_set; |
1055 } | 973 } |
1056 return true; | 974 return true; |
1057 } | 975 } |
1058 | 976 |
1059 void InMemoryURLIndex::SaveWordIDHistoryMap(InMemoryURLIndexCacheItem* cache) | 977 void InMemoryURLIndex::SaveWordIDHistoryMap(InMemoryURLIndexCacheItem* cache) |
1060 const { | 978 const { |
1061 if (word_id_history_map_.empty()) | 979 if (private_data_->word_id_history_map_.empty()) |
1062 return; | 980 return; |
1063 WordIDHistoryMapItem* map_item = cache->mutable_word_id_history_map(); | 981 WordIDHistoryMapItem* map_item = cache->mutable_word_id_history_map(); |
1064 map_item->set_item_count(word_id_history_map_.size()); | 982 map_item->set_item_count(private_data_->word_id_history_map_.size()); |
1065 for (WordIDHistoryMap::const_iterator iter = word_id_history_map_.begin(); | 983 for (WordIDHistoryMap::const_iterator iter = |
1066 iter != word_id_history_map_.end(); ++iter) { | 984 private_data_->word_id_history_map_.begin(); |
| 985 iter != private_data_->word_id_history_map_.end(); ++iter) { |
1067 WordIDHistoryMapEntry* map_entry = | 986 WordIDHistoryMapEntry* map_entry = |
1068 map_item->add_word_id_history_map_entry(); | 987 map_item->add_word_id_history_map_entry(); |
1069 map_entry->set_word_id(iter->first); | 988 map_entry->set_word_id(iter->first); |
1070 const HistoryIDSet& history_id_set(iter->second); | 989 const HistoryIDSet& history_id_set(iter->second); |
1071 map_entry->set_item_count(history_id_set.size()); | 990 map_entry->set_item_count(history_id_set.size()); |
1072 for (HistoryIDSet::const_iterator set_iter = history_id_set.begin(); | 991 for (HistoryIDSet::const_iterator set_iter = history_id_set.begin(); |
1073 set_iter != history_id_set.end(); ++set_iter) | 992 set_iter != history_id_set.end(); ++set_iter) |
1074 map_entry->add_history_id(*set_iter); | 993 map_entry->add_history_id(*set_iter); |
1075 } | 994 } |
1076 } | 995 } |
(...skipping 12 matching lines...) Expand all Loading... |
1089 for (RepeatedPtrField<WordIDHistoryMapEntry>::const_iterator iter = | 1008 for (RepeatedPtrField<WordIDHistoryMapEntry>::const_iterator iter = |
1090 entries.begin(); iter != entries.end(); ++iter) { | 1009 entries.begin(); iter != entries.end(); ++iter) { |
1091 expected_item_count = iter->item_count(); | 1010 expected_item_count = iter->item_count(); |
1092 actual_item_count = iter->history_id_size(); | 1011 actual_item_count = iter->history_id_size(); |
1093 if (actual_item_count == 0 || actual_item_count != expected_item_count) | 1012 if (actual_item_count == 0 || actual_item_count != expected_item_count) |
1094 return false; | 1013 return false; |
1095 WordID word_id = iter->word_id(); | 1014 WordID word_id = iter->word_id(); |
1096 HistoryIDSet history_id_set; | 1015 HistoryIDSet history_id_set; |
1097 const RepeatedField<int64>& history_ids(iter->history_id()); | 1016 const RepeatedField<int64>& history_ids(iter->history_id()); |
1098 for (RepeatedField<int64>::const_iterator jiter = history_ids.begin(); | 1017 for (RepeatedField<int64>::const_iterator jiter = history_ids.begin(); |
1099 jiter != history_ids.end(); ++jiter) | 1018 jiter != history_ids.end(); ++jiter) { |
1100 history_id_set.insert(*jiter); | 1019 history_id_set.insert(*jiter); |
1101 word_id_history_map_[word_id] = history_id_set; | 1020 private_data_->AddToHistoryIDWordMap(*jiter, word_id); |
| 1021 } |
| 1022 private_data_->word_id_history_map_[word_id] = history_id_set; |
1102 } | 1023 } |
1103 return true; | 1024 return true; |
1104 } | 1025 } |
1105 | 1026 |
1106 void InMemoryURLIndex::SaveHistoryInfoMap( | 1027 void InMemoryURLIndex::SaveHistoryInfoMap( |
1107 InMemoryURLIndexCacheItem* cache) const { | 1028 InMemoryURLIndexCacheItem* cache) const { |
1108 if (history_info_map_.empty()) | 1029 if (private_data_->history_info_map_.empty()) |
1109 return; | 1030 return; |
1110 HistoryInfoMapItem* map_item = cache->mutable_history_info_map(); | 1031 HistoryInfoMapItem* map_item = cache->mutable_history_info_map(); |
1111 map_item->set_item_count(history_info_map_.size()); | 1032 map_item->set_item_count(private_data_->history_info_map_.size()); |
1112 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin(); | 1033 for (HistoryInfoMap::const_iterator iter = |
1113 iter != history_info_map_.end(); ++iter) { | 1034 private_data_->history_info_map_.begin(); |
| 1035 iter != private_data_->history_info_map_.end(); ++iter) { |
1114 HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry(); | 1036 HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry(); |
1115 map_entry->set_history_id(iter->first); | 1037 map_entry->set_history_id(iter->first); |
1116 const URLRow& url_row(iter->second); | 1038 const URLRow& url_row(iter->second); |
1117 // Note: We only save information that contributes to the index so there | 1039 // Note: We only save information that contributes to the index so there |
1118 // is no need to save search_term_cache_ (not persistent), | 1040 // is no need to save search_term_cache_ (not persistent), |
1119 // languages_, etc. | 1041 // languages_, etc. |
1120 map_entry->set_visit_count(url_row.visit_count()); | 1042 map_entry->set_visit_count(url_row.visit_count()); |
1121 map_entry->set_typed_count(url_row.typed_count()); | 1043 map_entry->set_typed_count(url_row.typed_count()); |
1122 map_entry->set_last_visit(url_row.last_visit().ToInternalValue()); | 1044 map_entry->set_last_visit(url_row.last_visit().ToInternalValue()); |
1123 map_entry->set_url(url_row.url().spec()); | 1045 map_entry->set_url(url_row.url().spec()); |
(...skipping 17 matching lines...) Expand all Loading... |
1141 HistoryID history_id = iter->history_id(); | 1063 HistoryID history_id = iter->history_id(); |
1142 GURL url(iter->url()); | 1064 GURL url(iter->url()); |
1143 URLRow url_row(url, history_id); | 1065 URLRow url_row(url, history_id); |
1144 url_row.set_visit_count(iter->visit_count()); | 1066 url_row.set_visit_count(iter->visit_count()); |
1145 url_row.set_typed_count(iter->typed_count()); | 1067 url_row.set_typed_count(iter->typed_count()); |
1146 url_row.set_last_visit(base::Time::FromInternalValue(iter->last_visit())); | 1068 url_row.set_last_visit(base::Time::FromInternalValue(iter->last_visit())); |
1147 if (iter->has_title()) { | 1069 if (iter->has_title()) { |
1148 string16 title(UTF8ToUTF16(iter->title())); | 1070 string16 title(UTF8ToUTF16(iter->title())); |
1149 url_row.set_title(title); | 1071 url_row.set_title(title); |
1150 } | 1072 } |
1151 history_info_map_[history_id] = url_row; | 1073 private_data_->history_info_map_[history_id] = url_row; |
1152 } | 1074 } |
1153 return true; | 1075 return true; |
1154 } | 1076 } |
1155 | 1077 |
1156 } // namespace history | 1078 } // namespace history |
OLD | NEW |