OLD | NEW |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/browser/history/in_memory_url_index.h" | 5 #include "chrome/browser/history/in_memory_url_index.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <functional> | 8 #include <functional> |
9 #include <iterator> | 9 #include <iterator> |
10 #include <limits> | 10 #include <limits> |
11 #include <numeric> | 11 #include <numeric> |
12 | 12 |
13 #include "base/file_util.h" | 13 #include "base/file_util.h" |
14 #include "base/i18n/break_iterator.h" | |
15 #include "base/i18n/case_conversion.h" | 14 #include "base/i18n/case_conversion.h" |
16 #include "base/metrics/histogram.h" | 15 #include "base/metrics/histogram.h" |
17 #include "base/string_util.h" | |
18 #include "base/threading/thread_restrictions.h" | 16 #include "base/threading/thread_restrictions.h" |
19 #include "base/time.h" | 17 #include "base/time.h" |
20 #include "base/utf_string_conversions.h" | 18 #include "base/utf_string_conversions.h" |
21 #include "chrome/browser/autocomplete/autocomplete.h" | 19 #include "chrome/browser/autocomplete/autocomplete.h" |
22 #include "chrome/browser/autocomplete/history_provider_util.h" | 20 #include "chrome/browser/autocomplete/history_provider_util.h" |
| 21 #include "chrome/browser/history/history_notifications.h" |
23 #include "chrome/browser/history/url_database.h" | 22 #include "chrome/browser/history/url_database.h" |
24 #include "chrome/browser/profiles/profile.h" | 23 #include "chrome/browser/profiles/profile.h" |
| 24 #include "chrome/common/chrome_notification_types.h" |
25 #include "chrome/common/url_constants.h" | 25 #include "chrome/common/url_constants.h" |
| 26 #include "content/common/notification_details.h" |
| 27 #include "content/common/notification_source.h" |
26 #include "googleurl/src/url_parse.h" | 28 #include "googleurl/src/url_parse.h" |
27 #include "googleurl/src/url_util.h" | 29 #include "googleurl/src/url_util.h" |
28 #include "net/base/escape.h" | 30 #include "net/base/escape.h" |
29 #include "net/base/net_util.h" | 31 #include "net/base/net_util.h" |
30 #include "third_party/protobuf/src/google/protobuf/repeated_field.h" | 32 #include "third_party/protobuf/src/google/protobuf/repeated_field.h" |
31 #include "ui/base/l10n/l10n_util.h" | 33 #include "ui/base/l10n/l10n_util.h" |
32 | 34 |
33 using google::protobuf::RepeatedField; | 35 using google::protobuf::RepeatedField; |
34 using google::protobuf::RepeatedPtrField; | 36 using google::protobuf::RepeatedPtrField; |
35 using in_memory_url_index::InMemoryURLIndexCacheItem; | 37 using in_memory_url_index::InMemoryURLIndexCacheItem; |
(...skipping 16 matching lines...) Expand all Loading... |
52 HistoryInfoMapEntry; | 54 HistoryInfoMapEntry; |
53 | 55 |
54 const size_t InMemoryURLIndex::kNoCachedResultForTerm = -1; | 56 const size_t InMemoryURLIndex::kNoCachedResultForTerm = -1; |
55 | 57 |
56 // Score ranges used to get a 'base' score for each of the scoring factors | 58 // Score ranges used to get a 'base' score for each of the scoring factors |
57 // (such as recency of last visit, times visited, times the URL was typed, | 59 // (such as recency of last visit, times visited, times the URL was typed, |
58 // and the quality of the string match). There is a matching value range for | 60 // and the quality of the string match). There is a matching value range for |
59 // each of these scores for each factor. | 61 // each of these scores for each factor. |
60 const int kScoreRank[] = { 1425, 1200, 900, 400 }; | 62 const int kScoreRank[] = { 1425, 1200, 900, 400 }; |
61 | 63 |
62 ScoredHistoryMatch::ScoredHistoryMatch() | |
63 : raw_score(0), | |
64 can_inline(false) {} | |
65 | |
66 ScoredHistoryMatch::ScoredHistoryMatch(const URLRow& url_info) | |
67 : HistoryMatch(url_info, 0, false, false), | |
68 raw_score(0), | |
69 can_inline(false) {} | |
70 | |
71 ScoredHistoryMatch::~ScoredHistoryMatch() {} | |
72 | |
73 // Comparison function for sorting ScoredMatches by their scores. | |
74 bool ScoredHistoryMatch::MatchScoreGreater(const ScoredHistoryMatch& m1, | |
75 const ScoredHistoryMatch& m2) { | |
76 return m1.raw_score >= m2.raw_score; | |
77 } | |
78 | |
79 InMemoryURLIndex::SearchTermCacheItem::SearchTermCacheItem( | 64 InMemoryURLIndex::SearchTermCacheItem::SearchTermCacheItem( |
80 const WordIDSet& word_id_set, | 65 const WordIDSet& word_id_set, |
81 const HistoryIDSet& history_id_set) | 66 const HistoryIDSet& history_id_set) |
82 : word_id_set_(word_id_set), | 67 : word_id_set_(word_id_set), |
83 history_id_set_(history_id_set), | 68 history_id_set_(history_id_set), |
84 used_(true) {} | 69 used_(true) {} |
85 | 70 |
86 InMemoryURLIndex::SearchTermCacheItem::SearchTermCacheItem() | 71 InMemoryURLIndex::SearchTermCacheItem::SearchTermCacheItem() |
87 : used_(true) {} | 72 : used_(true) {} |
88 | 73 |
89 InMemoryURLIndex::SearchTermCacheItem::~SearchTermCacheItem() {} | 74 InMemoryURLIndex::SearchTermCacheItem::~SearchTermCacheItem() {} |
90 | 75 |
91 // Comparison function for sorting TermMatches by their offsets. | |
92 bool MatchOffsetLess(const TermMatch& m1, const TermMatch& m2) { | |
93 return m1.offset < m2.offset; | |
94 } | |
95 | |
96 // Comparison function for sorting search terms by descending length. | 76 // Comparison function for sorting search terms by descending length. |
97 bool LengthGreater(const string16& string_a, const string16& string_b) { | 77 bool LengthGreater(const string16& string_a, const string16& string_b) { |
98 return string_a.length() > string_b.length(); | 78 return string_a.length() > string_b.length(); |
99 } | 79 } |
100 | 80 |
101 // std::accumulate helper function to add up TermMatches' lengths. | 81 // std::accumulate helper function to add up TermMatches' lengths. |
102 int AccumulateMatchLength(int total, const TermMatch& match) { | 82 int AccumulateMatchLength(int total, const TermMatch& match) { |
103 return total + match.length; | 83 return total + match.length; |
104 } | 84 } |
105 | 85 |
(...skipping 22 matching lines...) Expand all Loading... |
128 return 0; | 108 return 0; |
129 int score = kScoreRank[i]; | 109 int score = kScoreRank[i]; |
130 if (i > 0) { | 110 if (i > 0) { |
131 score += (value - value_ranks[i]) * | 111 score += (value - value_ranks[i]) * |
132 (kScoreRank[i - 1] - kScoreRank[i]) / | 112 (kScoreRank[i - 1] - kScoreRank[i]) / |
133 (value_ranks[i - 1] - value_ranks[i]); | 113 (value_ranks[i - 1] - value_ranks[i]); |
134 } | 114 } |
135 return score; | 115 return score; |
136 } | 116 } |
137 | 117 |
138 InMemoryURLIndex::InMemoryURLIndex(const FilePath& history_dir) | 118 InMemoryURLIndex::InMemoryURLIndex(Profile* profile, |
| 119 const FilePath& history_dir) |
139 : history_dir_(history_dir), | 120 : history_dir_(history_dir), |
140 history_item_count_(0) { | 121 private_data_(new URLIndexPrivateData) { |
141 InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_); | 122 InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_); |
| 123 if (profile) { |
| 124 Source<Profile> source(profile); |
| 125 registrar_.Add(this, chrome::NOTIFICATION_HISTORY_URL_VISITED, source); |
| 126 registrar_.Add(this, chrome::NOTIFICATION_HISTORY_TYPED_URLS_MODIFIED, |
| 127 source); |
| 128 registrar_.Add(this, chrome::NOTIFICATION_HISTORY_URLS_DELETED, source); |
| 129 } |
142 } | 130 } |
143 | 131 |
144 // Called only by unit tests. | 132 // Called only by unit tests. |
145 InMemoryURLIndex::InMemoryURLIndex() | 133 InMemoryURLIndex::InMemoryURLIndex() |
146 : history_item_count_(0) { | 134 : private_data_(new URLIndexPrivateData) { |
147 InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_); | 135 InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_); |
148 } | 136 } |
149 | 137 |
150 InMemoryURLIndex::~InMemoryURLIndex() {} | 138 InMemoryURLIndex::~InMemoryURLIndex() {} |
151 | 139 |
152 // static | 140 // static |
153 void InMemoryURLIndex::InitializeSchemeWhitelist( | 141 void InMemoryURLIndex::InitializeSchemeWhitelist( |
154 std::set<std::string>* whitelist) { | 142 std::set<std::string>* whitelist) { |
155 DCHECK(whitelist); | 143 DCHECK(whitelist); |
156 whitelist->insert(std::string(chrome::kAboutScheme)); | 144 whitelist->insert(std::string(chrome::kAboutScheme)); |
157 whitelist->insert(std::string(chrome::kChromeUIScheme)); | 145 whitelist->insert(std::string(chrome::kChromeUIScheme)); |
158 whitelist->insert(std::string(chrome::kFileScheme)); | 146 whitelist->insert(std::string(chrome::kFileScheme)); |
159 whitelist->insert(std::string(chrome::kFtpScheme)); | 147 whitelist->insert(std::string(chrome::kFtpScheme)); |
160 whitelist->insert(std::string(chrome::kHttpScheme)); | 148 whitelist->insert(std::string(chrome::kHttpScheme)); |
161 whitelist->insert(std::string(chrome::kHttpsScheme)); | 149 whitelist->insert(std::string(chrome::kHttpsScheme)); |
162 whitelist->insert(std::string(chrome::kMailToScheme)); | 150 whitelist->insert(std::string(chrome::kMailToScheme)); |
163 } | 151 } |
164 | 152 |
165 // Indexing | 153 // Indexing |
166 | 154 |
167 bool InMemoryURLIndex::Init(history::URLDatabase* history_db, | 155 bool InMemoryURLIndex::Init(URLDatabase* history_db, |
168 const std::string& languages) { | 156 const std::string& languages) { |
169 // TODO(mrossetti): Register for profile/language change notifications. | 157 // TODO(mrossetti): Register for profile/language change notifications. |
170 languages_ = languages; | 158 languages_ = languages; |
171 return ReloadFromHistory(history_db, false); | 159 return ReloadFromHistory(history_db, false); |
172 } | 160 } |
173 | 161 |
174 void InMemoryURLIndex::ShutDown() { | 162 void InMemoryURLIndex::ShutDown() { |
| 163 registrar_.RemoveAll(); |
175 // Write our cache. | 164 // Write our cache. |
176 SaveToCacheFile(); | 165 SaveToCacheFile(); |
177 } | 166 } |
178 | 167 |
| 168 void InMemoryURLIndex::Observe(int type, |
| 169 const NotificationSource& source, |
| 170 const NotificationDetails& details) { |
| 171 switch (type) { |
| 172 case chrome::NOTIFICATION_HISTORY_URL_VISITED: |
| 173 OnURLVisited(Details<URLVisitedDetails>(details).ptr()); |
| 174 break; |
| 175 case chrome::NOTIFICATION_HISTORY_TYPED_URLS_MODIFIED: |
| 176 OnURLsModified(Details<history::URLsModifiedDetails>(details).ptr()); |
| 177 break; |
| 178 case chrome::NOTIFICATION_HISTORY_URLS_DELETED: |
| 179 OnURLsDeleted(Details<history::URLsDeletedDetails>(details).ptr()); |
| 180 break; |
| 181 default: |
| 182 // For simplicity, the unit tests send us all notifications, even when |
| 183 // we haven't registered for them, so don't assert here. |
| 184 break; |
| 185 } |
| 186 } |
| 187 |
| 188 void InMemoryURLIndex::OnURLVisited(const URLVisitedDetails* details) { |
| 189 UpdateURL(details->row); |
| 190 } |
| 191 |
| 192 void InMemoryURLIndex::OnURLsModified(const URLsModifiedDetails* details) { |
| 193 for (std::vector<history::URLRow>::const_iterator row = |
| 194 details->changed_urls.begin(); |
| 195 row != details->changed_urls.end(); ++row) |
| 196 UpdateURL(*row); |
| 197 } |
| 198 |
| 199 void InMemoryURLIndex::OnURLsDeleted(const URLsDeletedDetails* details) { |
| 200 if (details->all_history) { |
| 201 ClearPrivateData(); |
| 202 } else { |
| 203 for (std::vector<URLRow>::const_iterator row = details->rows.begin(); |
| 204 row != details->rows.end(); ++row) |
| 205 DeleteURL(*row); |
| 206 } |
| 207 } |
| 208 |
179 bool InMemoryURLIndex::IndexRow(const URLRow& row) { | 209 bool InMemoryURLIndex::IndexRow(const URLRow& row) { |
180 const GURL& gurl(row.url()); | 210 const GURL& gurl(row.url()); |
181 | 211 |
182 // Index only URLs with a whitelisted scheme. | 212 // Index only URLs with a whitelisted scheme. |
183 if (!InMemoryURLIndex::URLSchemeIsWhitelisted(gurl)) | 213 if (!InMemoryURLIndex::URLSchemeIsWhitelisted(gurl)) |
184 return true; | 214 return true; |
185 | 215 |
| 216 URLID row_id = row.id(); |
| 217 // Strip out username and password before saving and indexing. |
186 string16 url(net::FormatUrl(gurl, languages_, | 218 string16 url(net::FormatUrl(gurl, languages_, |
187 net::kFormatUrlOmitUsernamePassword, | 219 net::kFormatUrlOmitUsernamePassword, |
188 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS, | 220 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS, |
189 NULL, NULL, NULL)); | 221 NULL, NULL, NULL)); |
190 | 222 HistoryID history_id = static_cast<HistoryID>(row_id); |
191 HistoryID history_id = static_cast<HistoryID>(row.id()); | 223 DCHECK_LT(history_id, std::numeric_limits<HistoryID>::max()); |
192 DCHECK_LT(row.id(), std::numeric_limits<HistoryID>::max()); | |
193 | 224 |
194 // Add the row for quick lookup in the history info store. | 225 // Add the row for quick lookup in the history info store. |
195 URLRow new_row(GURL(url), row.id()); | 226 URLRow new_row(GURL(url), row_id); |
196 new_row.set_visit_count(row.visit_count()); | 227 new_row.set_visit_count(row.visit_count()); |
197 new_row.set_typed_count(row.typed_count()); | 228 new_row.set_typed_count(row.typed_count()); |
198 new_row.set_last_visit(row.last_visit()); | 229 new_row.set_last_visit(row.last_visit()); |
199 new_row.set_title(row.title()); | 230 new_row.set_title(row.title()); |
200 history_info_map_[history_id] = new_row; | 231 private_data_->history_info_map_[history_id] = new_row; |
201 | 232 |
| 233 // Index the words contained in the URL and title of the row. |
| 234 AddRowWordsToIndex(new_row); |
| 235 return true; |
| 236 } |
| 237 |
| 238 void InMemoryURLIndex::AddRowWordsToIndex(const URLRow& row) { |
| 239 HistoryID history_id = static_cast<HistoryID>(row.id()); |
202 // Split URL into individual, unique words then add in the title words. | 240 // Split URL into individual, unique words then add in the title words. |
| 241 const GURL& gurl(row.url()); |
| 242 string16 url(net::FormatUrl(gurl, languages_, |
| 243 net::kFormatUrlOmitUsernamePassword, |
| 244 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS, |
| 245 NULL, NULL, NULL)); |
203 url = base::i18n::ToLower(url); | 246 url = base::i18n::ToLower(url); |
204 String16Set url_words = WordSetFromString16(url); | 247 String16Set url_words = String16SetFromString16(url); |
205 String16Set title_words = WordSetFromString16(row.title()); | 248 String16Set title_words = String16SetFromString16(row.title()); |
206 String16Set words; | 249 String16Set words; |
207 std::set_union(url_words.begin(), url_words.end(), | 250 std::set_union(url_words.begin(), url_words.end(), |
208 title_words.begin(), title_words.end(), | 251 title_words.begin(), title_words.end(), |
209 std::insert_iterator<String16Set>(words, words.begin())); | 252 std::insert_iterator<String16Set>(words, words.begin())); |
210 for (String16Set::iterator word_iter = words.begin(); | 253 for (String16Set::iterator word_iter = words.begin(); |
211 word_iter != words.end(); ++word_iter) | 254 word_iter != words.end(); ++word_iter) |
212 AddWordToIndex(*word_iter, history_id); | 255 AddWordToIndex(*word_iter, history_id); |
213 | 256 |
214 ++history_item_count_; | 257 search_term_cache_.clear(); // Invalidate the term cache. |
215 return true; | 258 } |
| 259 |
| 260 void InMemoryURLIndex::RemoveRowFromIndex(const URLRow& row) { |
| 261 RemoveRowWordsFromIndex(row); |
| 262 HistoryID history_id = static_cast<HistoryID>(row.id()); |
| 263 private_data_->history_info_map_.erase(history_id); |
| 264 } |
| 265 |
| 266 void InMemoryURLIndex::RemoveRowWordsFromIndex(const URLRow& row) { |
| 267 // Remove the entries in history_id_word_map_ and word_id_history_map_ for |
| 268 // this row. |
| 269 URLIndexPrivateData& private_data(*(private_data_.get())); |
| 270 HistoryID history_id = static_cast<HistoryID>(row.id()); |
| 271 WordIDSet word_id_set = private_data.history_id_word_map_[history_id]; |
| 272 private_data.history_id_word_map_.erase(history_id); |
| 273 |
| 274 // Reconcile any changes to word usage. |
| 275 for (WordIDSet::iterator word_id_iter = word_id_set.begin(); |
| 276 word_id_iter != word_id_set.end(); ++word_id_iter) { |
| 277 WordID word_id = *word_id_iter; |
| 278 private_data.word_id_history_map_[word_id].erase(history_id); |
| 279 if (!private_data.word_id_history_map_[word_id].empty()) |
| 280 continue; // The word is still in use. |
| 281 |
| 282 // The word is no longer in use. Reconcile any changes to character usage. |
| 283 string16 word = private_data.word_list_[word_id]; |
| 284 Char16Set characters = Char16SetFromString16(word); |
| 285 for (Char16Set::iterator uni_char_iter = characters.begin(); |
| 286 uni_char_iter != characters.end(); ++uni_char_iter) { |
| 287 char16 uni_char = *uni_char_iter; |
| 288 private_data.char_word_map_[uni_char].erase(word_id); |
| 289 if (private_data.char_word_map_[uni_char].empty()) |
| 290 private_data.char_word_map_.erase(uni_char); // No longer in use. |
| 291 } |
| 292 |
| 293 // Complete the removal of references to the word. |
| 294 private_data.word_id_history_map_.erase(word_id); |
| 295 private_data.word_map_.erase(word); |
| 296 private_data.word_list_[word_id] = string16(); |
| 297 private_data.available_words_.insert(word_id); |
| 298 } |
216 } | 299 } |
217 | 300 |
218 bool InMemoryURLIndex::ReloadFromHistory(history::URLDatabase* history_db, | 301 bool InMemoryURLIndex::ReloadFromHistory(history::URLDatabase* history_db, |
219 bool clear_cache) { | 302 bool clear_cache) { |
220 ClearPrivateData(); | 303 ClearPrivateData(); |
221 | 304 |
222 if (!history_db) | 305 if (!history_db) |
223 return false; | 306 return false; |
224 | 307 |
225 if (clear_cache || !RestoreFromCacheFile()) { | 308 if (clear_cache || !RestoreFromCacheFile()) { |
226 base::TimeTicks beginning_time = base::TimeTicks::Now(); | 309 base::TimeTicks beginning_time = base::TimeTicks::Now(); |
227 // The index has to be built from scratch. | 310 // The index has to be built from scratch. |
228 URLDatabase::URLEnumerator history_enum; | 311 URLDatabase::URLEnumerator history_enum; |
229 if (!history_db->InitURLEnumeratorForSignificant(&history_enum)) | 312 if (!history_db->InitURLEnumeratorForSignificant(&history_enum)) |
230 return false; | 313 return false; |
231 URLRow row; | 314 URLRow row; |
232 while (history_enum.GetNextURL(&row)) { | 315 while (history_enum.GetNextURL(&row)) { |
233 if (!IndexRow(row)) | 316 if (!IndexRow(row)) |
234 return false; | 317 return false; |
235 } | 318 } |
236 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime", | 319 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime", |
237 base::TimeTicks::Now() - beginning_time); | 320 base::TimeTicks::Now() - beginning_time); |
238 SaveToCacheFile(); | 321 SaveToCacheFile(); |
239 } | 322 } |
240 return true; | 323 return true; |
241 } | 324 } |
242 | 325 |
243 void InMemoryURLIndex::ClearPrivateData() { | 326 void InMemoryURLIndex::ClearPrivateData() { |
244 history_item_count_ = 0; | 327 private_data_->Clear(); |
245 word_list_.clear(); | |
246 word_map_.clear(); | |
247 char_word_map_.clear(); | |
248 word_id_history_map_.clear(); | |
249 history_info_map_.clear(); | |
250 search_term_cache_.clear(); | 328 search_term_cache_.clear(); |
251 } | 329 } |
252 | 330 |
253 bool InMemoryURLIndex::RestoreFromCacheFile() { | 331 bool InMemoryURLIndex::RestoreFromCacheFile() { |
254 // TODO(mrossetti): Figure out how to determine if the cache is up-to-date. | 332 // TODO(mrossetti): Figure out how to determine if the cache is up-to-date. |
255 // That is: ensure that the database has not been modified since the cache | 333 // That is: ensure that the database has not been modified since the cache |
256 // was last saved. DB file modification date is inadequate. There are no | 334 // was last saved. DB file modification date is inadequate. There are no |
257 // SQLite table checksums automatically stored. | 335 // SQLite table checksums automatically stored. |
258 // FIXME(mrossetti): Move File IO to another thread. | 336 // FIXME(mrossetti): Move File IO to another thread. |
259 base::ThreadRestrictions::ScopedAllowIO allow_io; | 337 base::ThreadRestrictions::ScopedAllowIO allow_io; |
(...skipping 15 matching lines...) Expand all Loading... |
275 return false; | 353 return false; |
276 } | 354 } |
277 | 355 |
278 if (!RestorePrivateData(index_cache)) { | 356 if (!RestorePrivateData(index_cache)) { |
279 ClearPrivateData(); // Back to square one -- must build from scratch. | 357 ClearPrivateData(); // Back to square one -- must build from scratch. |
280 return false; | 358 return false; |
281 } | 359 } |
282 | 360 |
283 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime", | 361 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime", |
284 base::TimeTicks::Now() - beginning_time); | 362 base::TimeTicks::Now() - beginning_time); |
285 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems", history_item_count_); | 363 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems", |
| 364 private_data_->history_id_word_map_.size()); |
286 UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size()); | 365 UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size()); |
287 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", word_map_.size()); | 366 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", |
288 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", char_word_map_.size()); | 367 private_data_->word_map_.size()); |
| 368 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", |
| 369 private_data_->char_word_map_.size()); |
289 return true; | 370 return true; |
290 } | 371 } |
291 | 372 |
292 bool InMemoryURLIndex::SaveToCacheFile() { | 373 bool InMemoryURLIndex::SaveToCacheFile() { |
293 // TODO(mrossetti): Move File IO to another thread. | 374 // TODO(mrossetti): Move File IO to another thread. |
294 base::ThreadRestrictions::ScopedAllowIO allow_io; | 375 base::ThreadRestrictions::ScopedAllowIO allow_io; |
295 base::TimeTicks beginning_time = base::TimeTicks::Now(); | 376 base::TimeTicks beginning_time = base::TimeTicks::Now(); |
296 InMemoryURLIndexCacheItem index_cache; | 377 InMemoryURLIndexCacheItem index_cache; |
297 SavePrivateData(&index_cache); | 378 SavePrivateData(&index_cache); |
298 std::string data; | 379 std::string data; |
(...skipping 11 matching lines...) Expand all Loading... |
310 int size = data.size(); | 391 int size = data.size(); |
311 if (file_util::WriteFile(file_path, data.c_str(), size) != size) { | 392 if (file_util::WriteFile(file_path, data.c_str(), size) != size) { |
312 LOG(WARNING) << "Failed to write " << file_path.value(); | 393 LOG(WARNING) << "Failed to write " << file_path.value(); |
313 return false; | 394 return false; |
314 } | 395 } |
315 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexSaveCacheTime", | 396 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexSaveCacheTime", |
316 base::TimeTicks::Now() - beginning_time); | 397 base::TimeTicks::Now() - beginning_time); |
317 return true; | 398 return true; |
318 } | 399 } |
319 | 400 |
320 void InMemoryURLIndex::UpdateURL(URLID row_id, const URLRow& row) { | 401 void InMemoryURLIndex::UpdateURL(const URLRow& row) { |
321 // The row may or may not already be in our index. If it is not already | 402 // The row may or may not already be in our index. If it is not already |
322 // indexed and it qualifies then it gets indexed. If it is already | 403 // indexed and it qualifies then it gets indexed. If it is already |
323 // indexed and still qualifies then it gets updated, otherwise it | 404 // indexed and still qualifies then it gets updated, otherwise it |
324 // is deleted from the index. | 405 // is deleted from the index. |
325 HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id); | 406 HistoryInfoMap::iterator row_pos = |
326 if (row_pos == history_info_map_.end()) { | 407 private_data_->history_info_map_.find(row.id()); |
| 408 if (row_pos == private_data_->history_info_map_.end()) { |
327 // This new row should be indexed if it qualifies. | 409 // This new row should be indexed if it qualifies. |
328 if (RowQualifiesAsSignificant(row, base::Time())) | 410 if (RowQualifiesAsSignificant(row, base::Time())) |
329 IndexRow(row); | 411 IndexRow(row); |
330 } else if (RowQualifiesAsSignificant(row, base::Time())) { | 412 } else if (RowQualifiesAsSignificant(row, base::Time())) { |
331 // This indexed row still qualifies and will be re-indexed. | 413 // This indexed row still qualifies and will be re-indexed. |
332 // The url won't have changed but the title, visit count, etc. | 414 // The url won't have changed but the title, visit count, etc. |
333 // might have changed. | 415 // might have changed. |
334 URLRow& old_row = row_pos->second; | 416 URLRow& old_row = row_pos->second; |
335 old_row.set_visit_count(row.visit_count()); | 417 old_row.set_visit_count(row.visit_count()); |
336 old_row.set_typed_count(row.typed_count()); | 418 old_row.set_typed_count(row.typed_count()); |
337 old_row.set_last_visit(row.last_visit()); | 419 old_row.set_last_visit(row.last_visit()); |
338 // TODO(mrossetti): When we start indexing the title the next line | 420 // While the URL is guaranteed to remain stable, the title may have changed. |
339 // will need attention. | 421 // If so, then we need to update the index with the changed words. |
340 old_row.set_title(row.title()); | 422 if (old_row.title() != row.title()) { |
| 423 // Clear all words associated with this row and re-index both the |
| 424 // URL and title. |
| 425 RemoveRowWordsFromIndex(row); |
| 426 old_row.set_title(row.title()); |
| 427 AddRowWordsToIndex(old_row); |
| 428 } |
341 } else { | 429 } else { |
342 // This indexed row no longer qualifies and will be de-indexed. | 430 // This indexed row no longer qualifies and will be de-indexed by |
343 history_info_map_.erase(row_id); | 431 // clearing all words associated with this row. |
| 432 RemoveRowFromIndex(row); |
344 } | 433 } |
345 // This invalidates the cache. | 434 // This invalidates the cache. |
346 search_term_cache_.clear(); | 435 search_term_cache_.clear(); |
347 // TODO(mrossetti): Record this transaction in the cache. | |
348 } | 436 } |
349 | 437 |
350 void InMemoryURLIndex::DeleteURL(URLID row_id) { | 438 void InMemoryURLIndex::DeleteURL(const URLRow& row) { |
351 // Note that this does not remove any reference to this row from the | 439 RemoveRowFromIndex(row); |
352 // word_id_history_map_. That map will continue to contain (and return) | |
353 // hits against this row until that map is rebuilt, but since the | |
354 // history_info_map_ no longer references the row no erroneous results | |
355 // will propagate to the user. | |
356 history_info_map_.erase(row_id); | |
357 // This invalidates the word cache. | 440 // This invalidates the word cache. |
358 search_term_cache_.clear(); | 441 search_term_cache_.clear(); |
359 // TODO(mrossetti): Record this transaction in the cache. | 442 // TODO(mrossetti): Record this transaction in the cache. |
360 } | 443 } |
361 | 444 |
362 // Searching | 445 // Searching |
363 | 446 |
364 ScoredHistoryMatches InMemoryURLIndex::HistoryItemsForTerms( | 447 ScoredHistoryMatches InMemoryURLIndex::HistoryItemsForTerms( |
365 const String16Vector& terms) { | 448 const String16Vector& terms) { |
366 ScoredHistoryMatches scored_items; | 449 ScoredHistoryMatches scored_items; |
| 450 |
| 451 // Do nothing if we have indexed no words (probably because we've not been |
| 452 // initialized yet). |
| 453 if (private_data_->word_list_.empty()) |
| 454 return scored_items; |
| 455 |
367 if (!terms.empty()) { | 456 if (!terms.empty()) { |
368 // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep | 457 // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep |
369 // approach. | 458 // approach. |
370 ResetSearchTermCache(); | 459 ResetSearchTermCache(); |
371 | 460 |
372 // Lowercase the terms. | 461 // Lowercase the terms. |
373 // TODO(mrossetti): Another opportunity for a transform algorithm. | 462 // TODO(mrossetti): Another opportunity for a transform algorithm. |
374 String16Vector lower_terms; | 463 String16Vector lower_terms; |
375 for (String16Vector::const_iterator term_iter = terms.begin(); | 464 for (String16Vector::const_iterator term_iter = terms.begin(); |
376 term_iter != terms.end(); ++term_iter) | 465 term_iter != terms.end(); ++term_iter) |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
414 | 503 |
415 return scored_items; | 504 return scored_items; |
416 } | 505 } |
417 | 506 |
418 void InMemoryURLIndex::ResetSearchTermCache() { | 507 void InMemoryURLIndex::ResetSearchTermCache() { |
419 for (SearchTermCacheMap::iterator iter = search_term_cache_.begin(); | 508 for (SearchTermCacheMap::iterator iter = search_term_cache_.begin(); |
420 iter != search_term_cache_.end(); ++iter) | 509 iter != search_term_cache_.end(); ++iter) |
421 iter->second.used_ = false; | 510 iter->second.used_ = false; |
422 } | 511 } |
423 | 512 |
424 InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDSetFromWords( | 513 HistoryIDSet InMemoryURLIndex::HistoryIDSetFromWords( |
425 const string16& uni_string) { | 514 const string16& uni_string) { |
426 // Break the terms down into individual terms (words), get the candidate | 515 // Break the terms down into individual terms (words), get the candidate |
427 // set for each term, and intersect each to get a final candidate list. | 516 // set for each term, and intersect each to get a final candidate list. |
428 // Note that a single 'term' from the user's perspective might be | 517 // Note that a single 'term' from the user's perspective might be |
429 // a string like "http://www.somewebsite.com" which, from our perspective, | 518 // a string like "http://www.somewebsite.com" which, from our perspective, |
430 // is four words: 'http', 'www', 'somewebsite', and 'com'. | 519 // is four words: 'http', 'www', 'somewebsite', and 'com'. |
431 HistoryIDSet history_id_set; | 520 HistoryIDSet history_id_set; |
432 String16Vector terms = WordVectorFromString16(uni_string, true); | 521 String16Vector terms = String16VectorFromString16(uni_string, true); |
433 // Sort the terms into the longest first as such are likely to narrow down | 522 // Sort the terms into the longest first as such are likely to narrow down |
434 // the results quicker. Also, single character terms are the most expensive | 523 // the results quicker. Also, single character terms are the most expensive |
435 // to process so save them for last. | 524 // to process so save them for last. |
436 std::sort(terms.begin(), terms.end(), LengthGreater); | 525 std::sort(terms.begin(), terms.end(), LengthGreater); |
437 for (String16Vector::iterator iter = terms.begin(); iter != terms.end(); | 526 for (String16Vector::iterator iter = terms.begin(); iter != terms.end(); |
438 ++iter) { | 527 ++iter) { |
439 string16 uni_word = *iter; | 528 string16 uni_word = *iter; |
440 HistoryIDSet term_history_set = HistoryIDsForTerm(uni_word); | 529 HistoryIDSet term_history_set = HistoryIDsForTerm(uni_word); |
441 if (term_history_set.empty()) { | 530 if (term_history_set.empty()) { |
442 history_id_set.clear(); | 531 history_id_set.clear(); |
443 break; | 532 break; |
444 } | 533 } |
445 if (iter == terms.begin()) { | 534 if (iter == terms.begin()) { |
446 history_id_set.swap(term_history_set); | 535 history_id_set.swap(term_history_set); |
447 } else { | 536 } else { |
448 HistoryIDSet new_history_id_set; | 537 HistoryIDSet new_history_id_set; |
449 std::set_intersection(history_id_set.begin(), history_id_set.end(), | 538 std::set_intersection(history_id_set.begin(), history_id_set.end(), |
450 term_history_set.begin(), term_history_set.end(), | 539 term_history_set.begin(), term_history_set.end(), |
451 std::inserter(new_history_id_set, | 540 std::inserter(new_history_id_set, |
452 new_history_id_set.begin())); | 541 new_history_id_set.begin())); |
453 history_id_set.swap(new_history_id_set); | 542 history_id_set.swap(new_history_id_set); |
454 } | 543 } |
455 } | 544 } |
456 return history_id_set; | 545 return history_id_set; |
457 } | 546 } |
458 | 547 |
459 InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm( | 548 HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm( |
460 const string16& term) { | 549 const string16& term) { |
461 if (term.empty()) | 550 if (term.empty()) |
462 return HistoryIDSet(); | 551 return HistoryIDSet(); |
463 | 552 |
464 // TODO(mrossetti): Consider optimizing for very common terms such as | 553 // TODO(mrossetti): Consider optimizing for very common terms such as |
465 // 'http[s]', 'www', 'com', etc. Or collect the top 100 more frequently | 554 // 'http[s]', 'www', 'com', etc. Or collect the top 100 more frequently |
466 // occuring words in the user's searches. | 555 // occuring words in the user's searches. |
467 | 556 |
468 size_t term_length = term.length(); | 557 size_t term_length = term.length(); |
469 InMemoryURLIndex::WordIDSet word_id_set; | 558 WordIDSet word_id_set; |
470 if (term_length > 1) { | 559 if (term_length > 1) { |
471 // See if this term or a prefix thereof is present in the cache. | 560 // See if this term or a prefix thereof is present in the cache. |
472 SearchTermCacheMap::iterator best_prefix(search_term_cache_.end()); | 561 SearchTermCacheMap::iterator best_prefix(search_term_cache_.end()); |
473 for (SearchTermCacheMap::iterator cache_iter = search_term_cache_.begin(); | 562 for (SearchTermCacheMap::iterator cache_iter = search_term_cache_.begin(); |
474 cache_iter != search_term_cache_.end(); ++cache_iter) { | 563 cache_iter != search_term_cache_.end(); ++cache_iter) { |
475 if (StartsWith(term, cache_iter->first, false) && | 564 if (StartsWith(term, cache_iter->first, false) && |
476 (best_prefix == search_term_cache_.end() || | 565 (best_prefix == search_term_cache_.end() || |
477 cache_iter->first.length() > best_prefix->first.length())) | 566 cache_iter->first.length() > best_prefix->first.length())) |
478 best_prefix = cache_iter; | 567 best_prefix = cache_iter; |
479 } | 568 } |
(...skipping 25 matching lines...) Expand all Loading... |
505 | 594 |
506 // Filter for each remaining, unique character in the term. | 595 // Filter for each remaining, unique character in the term. |
507 Char16Set leftover_chars = Char16SetFromString16(leftovers); | 596 Char16Set leftover_chars = Char16SetFromString16(leftovers); |
508 Char16Set unique_chars; | 597 Char16Set unique_chars; |
509 std::set_difference(leftover_chars.begin(), leftover_chars.end(), | 598 std::set_difference(leftover_chars.begin(), leftover_chars.end(), |
510 prefix_chars.begin(), prefix_chars.end(), | 599 prefix_chars.begin(), prefix_chars.end(), |
511 std::inserter(unique_chars, unique_chars.begin())); | 600 std::inserter(unique_chars, unique_chars.begin())); |
512 | 601 |
513 // Reduce the word set with any leftover, unprocessed characters. | 602 // Reduce the word set with any leftover, unprocessed characters. |
514 if (!unique_chars.empty()) { | 603 if (!unique_chars.empty()) { |
515 WordIDSet leftover_set(WordIDSetForTermChars(unique_chars)); | 604 WordIDSet leftover_set( |
| 605 private_data_->WordIDSetForTermChars(unique_chars)); |
516 // We might come up empty on the leftovers. | 606 // We might come up empty on the leftovers. |
517 if (leftover_set.empty()) { | 607 if (leftover_set.empty()) { |
518 search_term_cache_[term] = SearchTermCacheItem(); | 608 search_term_cache_[term] = SearchTermCacheItem(); |
519 return HistoryIDSet(); | 609 return HistoryIDSet(); |
520 } | 610 } |
521 // Or there may not have been a prefix from which to start. | 611 // Or there may not have been a prefix from which to start. |
522 if (prefix_chars.empty()) { | 612 if (prefix_chars.empty()) { |
523 word_id_set.swap(leftover_set); | 613 word_id_set.swap(leftover_set); |
524 } else { | 614 } else { |
525 WordIDSet new_word_id_set; | 615 WordIDSet new_word_id_set; |
526 std::set_intersection(word_id_set.begin(), word_id_set.end(), | 616 std::set_intersection(word_id_set.begin(), word_id_set.end(), |
527 leftover_set.begin(), leftover_set.end(), | 617 leftover_set.begin(), leftover_set.end(), |
528 std::inserter(new_word_id_set, | 618 std::inserter(new_word_id_set, |
529 new_word_id_set.begin())); | 619 new_word_id_set.begin())); |
530 word_id_set.swap(new_word_id_set); | 620 word_id_set.swap(new_word_id_set); |
531 } | 621 } |
532 } | 622 } |
533 | 623 |
534 // We must filter the word list because the resulting word set surely | 624 // We must filter the word list because the resulting word set surely |
535 // contains words which do not have the search term as a proper subset. | 625 // contains words which do not have the search term as a proper subset. |
536 for (WordIDSet::iterator word_set_iter = word_id_set.begin(); | 626 for (WordIDSet::iterator word_set_iter = word_id_set.begin(); |
537 word_set_iter != word_id_set.end(); ) { | 627 word_set_iter != word_id_set.end(); ) { |
538 if (word_list_[*word_set_iter].find(term) == string16::npos) | 628 if (private_data_->word_list_[*word_set_iter].find(term) == |
| 629 string16::npos) |
539 word_id_set.erase(word_set_iter++); | 630 word_id_set.erase(word_set_iter++); |
540 else | 631 else |
541 ++word_set_iter; | 632 ++word_set_iter; |
542 } | 633 } |
543 } else { | 634 } else { |
544 word_id_set = WordIDSetForTermChars(Char16SetFromString16(term)); | 635 word_id_set = |
| 636 private_data_->WordIDSetForTermChars(Char16SetFromString16(term)); |
545 } | 637 } |
546 | 638 |
547 // If any words resulted then we can compose a set of history IDs by unioning | 639 // If any words resulted then we can compose a set of history IDs by unioning |
548 // the sets from each word. | 640 // the sets from each word. |
549 HistoryIDSet history_id_set; | 641 HistoryIDSet history_id_set; |
550 if (!word_id_set.empty()) { | 642 if (!word_id_set.empty()) { |
551 for (WordIDSet::iterator word_id_iter = word_id_set.begin(); | 643 for (WordIDSet::iterator word_id_iter = word_id_set.begin(); |
552 word_id_iter != word_id_set.end(); ++word_id_iter) { | 644 word_id_iter != word_id_set.end(); ++word_id_iter) { |
553 WordID word_id = *word_id_iter; | 645 WordID word_id = *word_id_iter; |
554 WordIDHistoryMap::iterator word_iter = word_id_history_map_.find(word_id); | 646 WordIDHistoryMap::iterator word_iter = |
555 if (word_iter != word_id_history_map_.end()) { | 647 private_data_->word_id_history_map_.find(word_id); |
| 648 if (word_iter != private_data_->word_id_history_map_.end()) { |
556 HistoryIDSet& word_history_id_set(word_iter->second); | 649 HistoryIDSet& word_history_id_set(word_iter->second); |
557 history_id_set.insert(word_history_id_set.begin(), | 650 history_id_set.insert(word_history_id_set.begin(), |
558 word_history_id_set.end()); | 651 word_history_id_set.end()); |
559 } | 652 } |
560 } | 653 } |
561 } | 654 } |
562 | 655 |
563 // Record a new cache entry for this word if the term is longer than | 656 // Record a new cache entry for this word if the term is longer than |
564 // a single character. | 657 // a single character. |
565 if (term_length > 1) | 658 if (term_length > 1) |
566 search_term_cache_[term] = SearchTermCacheItem(word_id_set, history_id_set); | 659 search_term_cache_[term] = SearchTermCacheItem(word_id_set, history_id_set); |
567 | 660 |
568 return history_id_set; | 661 return history_id_set; |
569 } | 662 } |
570 | 663 |
571 // Utility Functions | 664 // Utility Functions |
572 | 665 |
573 // static | |
574 InMemoryURLIndex::String16Set InMemoryURLIndex::WordSetFromString16( | |
575 const string16& uni_string) { | |
576 const size_t kMaxWordLength = 64; | |
577 String16Vector words = WordVectorFromString16(uni_string, false); | |
578 String16Set word_set; | |
579 for (String16Vector::const_iterator iter = words.begin(); iter != words.end(); | |
580 ++iter) | |
581 word_set.insert(base::i18n::ToLower(*iter).substr(0, kMaxWordLength)); | |
582 return word_set; | |
583 } | |
584 | |
585 // static | |
586 InMemoryURLIndex::String16Vector InMemoryURLIndex::WordVectorFromString16( | |
587 const string16& uni_string, | |
588 bool break_on_space) { | |
589 base::i18n::BreakIterator iter( | |
590 uni_string, | |
591 break_on_space ? base::i18n::BreakIterator::BREAK_SPACE | |
592 : base::i18n::BreakIterator::BREAK_WORD); | |
593 String16Vector words; | |
594 if (!iter.Init()) | |
595 return words; | |
596 while (iter.Advance()) { | |
597 if (break_on_space || iter.IsWord()) { | |
598 string16 word = iter.GetString(); | |
599 if (break_on_space) | |
600 TrimWhitespace(word, TRIM_ALL, &word); | |
601 if (!word.empty()) | |
602 words.push_back(word); | |
603 } | |
604 } | |
605 return words; | |
606 } | |
607 | |
608 // static | |
609 InMemoryURLIndex::Char16Set InMemoryURLIndex::Char16SetFromString16( | |
610 const string16& term) { | |
611 Char16Set characters; | |
612 for (string16::const_iterator iter = term.begin(); iter != term.end(); | |
613 ++iter) | |
614 characters.insert(*iter); | |
615 return characters; | |
616 } | |
617 | |
618 void InMemoryURLIndex::AddWordToIndex(const string16& term, | 666 void InMemoryURLIndex::AddWordToIndex(const string16& term, |
619 HistoryID history_id) { | 667 HistoryID history_id) { |
620 WordMap::iterator word_pos = word_map_.find(term); | 668 WordMap::iterator word_pos = private_data_->word_map_.find(term); |
621 if (word_pos != word_map_.end()) | 669 if (word_pos != private_data_->word_map_.end()) |
622 UpdateWordHistory(word_pos->second, history_id); | 670 UpdateWordHistory(word_pos->second, history_id); |
623 else | 671 else |
624 AddWordHistory(term, history_id); | 672 AddWordHistory(term, history_id); |
625 } | 673 } |
626 | 674 |
627 void InMemoryURLIndex::UpdateWordHistory(WordID word_id, HistoryID history_id) { | 675 void InMemoryURLIndex::UpdateWordHistory(WordID word_id, HistoryID history_id) { |
628 WordIDHistoryMap::iterator history_pos = word_id_history_map_.find(word_id); | 676 WordIDHistoryMap::iterator history_pos = |
629 DCHECK(history_pos != word_id_history_map_.end()); | 677 private_data_->word_id_history_map_.find(word_id); |
630 HistoryIDSet& history_id_set(history_pos->second); | 678 DCHECK(history_pos != private_data_->word_id_history_map_.end()); |
631 history_id_set.insert(history_id); | 679 HistoryIDSet& history_id_set(history_pos->second); |
| 680 history_id_set.insert(history_id); |
| 681 private_data_->AddToHistoryIDWordMap(history_id, word_id); |
632 } | 682 } |
633 | 683 |
634 // Add a new word to the word list and the word map, and then create a | 684 // Add a new word to the word list and the word map, and then create a |
635 // new entry in the word/history map. | 685 // new entry in the word/history map. |
636 void InMemoryURLIndex::AddWordHistory(const string16& term, | 686 void InMemoryURLIndex::AddWordHistory(const string16& term, |
637 HistoryID history_id) { | 687 HistoryID history_id) { |
638 word_list_.push_back(term); | 688 URLIndexPrivateData& private_data(*(private_data_.get())); |
639 WordID word_id = word_list_.size() - 1; | 689 WordID word_id = private_data.word_list_.size(); |
640 word_map_[term] = word_id; | 690 if (private_data.available_words_.empty()) { |
| 691 private_data.word_list_.push_back(term); |
| 692 } else { |
| 693 word_id = *(private_data.available_words_.begin()); |
| 694 private_data.word_list_[word_id] = term; |
| 695 private_data.available_words_.erase(word_id); |
| 696 } |
| 697 private_data.word_map_[term] = word_id; |
| 698 |
641 HistoryIDSet history_id_set; | 699 HistoryIDSet history_id_set; |
642 history_id_set.insert(history_id); | 700 history_id_set.insert(history_id); |
643 word_id_history_map_[word_id] = history_id_set; | 701 private_data.word_id_history_map_[word_id] = history_id_set; |
| 702 private_data_->AddToHistoryIDWordMap(history_id, word_id); |
| 703 |
644 // For each character in the newly added word (i.e. a word that is not | 704 // For each character in the newly added word (i.e. a word that is not |
645 // already in the word index), add the word to the character index. | 705 // already in the word index), add the word to the character index. |
646 Char16Set characters = Char16SetFromString16(term); | 706 Char16Set characters = Char16SetFromString16(term); |
647 for (Char16Set::iterator uni_char_iter = characters.begin(); | 707 for (Char16Set::iterator uni_char_iter = characters.begin(); |
648 uni_char_iter != characters.end(); ++uni_char_iter) { | 708 uni_char_iter != characters.end(); ++uni_char_iter) { |
649 char16 uni_char = *uni_char_iter; | 709 char16 uni_char = *uni_char_iter; |
650 CharWordIDMap::iterator char_iter = char_word_map_.find(uni_char); | 710 CharWordIDMap::iterator char_iter = |
651 if (char_iter != char_word_map_.end()) { | 711 private_data.char_word_map_.find(uni_char); |
| 712 if (char_iter != private_data.char_word_map_.end()) { |
652 // Update existing entry in the char/word index. | 713 // Update existing entry in the char/word index. |
653 WordIDSet& word_id_set(char_iter->second); | 714 WordIDSet& word_id_set(char_iter->second); |
654 word_id_set.insert(word_id); | 715 word_id_set.insert(word_id); |
655 } else { | 716 } else { |
656 // Create a new entry in the char/word index. | 717 // Create a new entry in the char/word index. |
657 WordIDSet word_id_set; | 718 WordIDSet word_id_set; |
658 word_id_set.insert(word_id); | 719 word_id_set.insert(word_id); |
659 char_word_map_[uni_char] = word_id_set; | 720 private_data.char_word_map_[uni_char] = word_id_set; |
660 } | 721 } |
661 } | 722 } |
662 } | 723 } |
663 | 724 |
664 InMemoryURLIndex::WordIDSet InMemoryURLIndex::WordIDSetForTermChars( | |
665 const Char16Set& term_chars) { | |
666 WordIDSet word_id_set; | |
667 for (Char16Set::const_iterator c_iter = term_chars.begin(); | |
668 c_iter != term_chars.end(); ++c_iter) { | |
669 CharWordIDMap::iterator char_iter = char_word_map_.find(*c_iter); | |
670 if (char_iter == char_word_map_.end()) { | |
671 // A character was not found so there are no matching results: bail. | |
672 word_id_set.clear(); | |
673 break; | |
674 } | |
675 WordIDSet& char_word_id_set(char_iter->second); | |
676 // It is possible for there to no longer be any words associated with | |
677 // a particular character. Give up in that case. | |
678 if (char_word_id_set.empty()) { | |
679 word_id_set.clear(); | |
680 break; | |
681 } | |
682 | |
683 if (c_iter == term_chars.begin()) { | |
684 // First character results becomes base set of results. | |
685 word_id_set = char_word_id_set; | |
686 } else { | |
687 // Subsequent character results get intersected in. | |
688 WordIDSet new_word_id_set; | |
689 std::set_intersection(word_id_set.begin(), word_id_set.end(), | |
690 char_word_id_set.begin(), char_word_id_set.end(), | |
691 std::inserter(new_word_id_set, | |
692 new_word_id_set.begin())); | |
693 word_id_set.swap(new_word_id_set); | |
694 } | |
695 } | |
696 return word_id_set; | |
697 } | |
698 | |
699 // static | 725 // static |
700 TermMatches InMemoryURLIndex::MatchTermInString(const string16& term, | 726 // TODO(mrossetti): This can be made a ctor for ScoredHistoryMatch. |
701 const string16& string, | |
702 int term_num) { | |
703 const size_t kMaxCompareLength = 2048; | |
704 const string16& short_string = (string.length() > kMaxCompareLength) ? | |
705 string.substr(0, kMaxCompareLength) : string; | |
706 TermMatches matches; | |
707 for (size_t location = short_string.find(term); location != string16::npos; | |
708 location = short_string.find(term, location + 1)) { | |
709 matches.push_back(TermMatch(term_num, location, term.length())); | |
710 } | |
711 return matches; | |
712 } | |
713 | |
714 // static | |
715 TermMatches InMemoryURLIndex::SortAndDeoverlap(const TermMatches& matches) { | |
716 if (matches.empty()) | |
717 return matches; | |
718 TermMatches sorted_matches = matches; | |
719 std::sort(sorted_matches.begin(), sorted_matches.end(), MatchOffsetLess); | |
720 TermMatches clean_matches; | |
721 TermMatch last_match = sorted_matches[0]; | |
722 clean_matches.push_back(last_match); | |
723 for (TermMatches::const_iterator iter = sorted_matches.begin() + 1; | |
724 iter != sorted_matches.end(); ++iter) { | |
725 if (iter->offset >= last_match.offset + last_match.length) { | |
726 last_match = *iter; | |
727 clean_matches.push_back(last_match); | |
728 } | |
729 } | |
730 return clean_matches; | |
731 } | |
732 | |
733 // static | |
734 std::vector<size_t> InMemoryURLIndex::OffsetsFromTermMatches( | |
735 const TermMatches& matches) { | |
736 std::vector<size_t> offsets; | |
737 for (TermMatches::const_iterator i = matches.begin(); i != matches.end(); ++i) | |
738 offsets.push_back(i->offset); | |
739 return offsets; | |
740 } | |
741 | |
742 // static | |
743 TermMatches InMemoryURLIndex::ReplaceOffsetsInTermMatches( | |
744 const TermMatches& matches, | |
745 const std::vector<size_t>& offsets) { | |
746 DCHECK_EQ(matches.size(), offsets.size()); | |
747 TermMatches new_matches; | |
748 std::vector<size_t>::const_iterator offset_iter = offsets.begin(); | |
749 for (TermMatches::const_iterator term_iter = matches.begin(); | |
750 term_iter != matches.end(); ++term_iter, ++offset_iter) { | |
751 if (*offset_iter != string16::npos) { | |
752 TermMatch new_match(*term_iter); | |
753 new_match.offset = *offset_iter; | |
754 new_matches.push_back(new_match); | |
755 } | |
756 } | |
757 return new_matches; | |
758 } | |
759 | |
760 // static | |
761 ScoredHistoryMatch InMemoryURLIndex::ScoredMatchForURL( | 727 ScoredHistoryMatch InMemoryURLIndex::ScoredMatchForURL( |
762 const URLRow& row, | 728 const URLRow& row, |
763 const String16Vector& terms) { | 729 const String16Vector& terms) { |
764 ScoredHistoryMatch match(row); | 730 ScoredHistoryMatch match(row); |
765 GURL gurl = row.url(); | 731 GURL gurl = row.url(); |
766 if (!gurl.is_valid()) | 732 if (!gurl.is_valid()) |
767 return match; | 733 return match; |
768 | 734 |
769 // Figure out where each search term appears in the URL and/or page title | 735 // Figure out where each search term appears in the URL and/or page title |
770 // so that we can score as well as provide autocomplete highlighting. | 736 // so that we can score as well as provide autocomplete highlighting. |
771 string16 url = base::i18n::ToLower(UTF8ToUTF16(gurl.spec())); | 737 string16 url = base::i18n::ToLower(UTF8ToUTF16(gurl.spec())); |
772 string16 title = base::i18n::ToLower(row.title()); | 738 string16 title = base::i18n::ToLower(row.title()); |
773 int term_num = 0; | 739 int term_num = 0; |
774 for (String16Vector::const_iterator iter = terms.begin(); iter != terms.end(); | 740 for (String16Vector::const_iterator iter = terms.begin(); iter != terms.end(); |
775 ++iter, ++term_num) { | 741 ++iter, ++term_num) { |
776 string16 term = *iter; | 742 string16 term = *iter; |
777 TermMatches url_term_matches = MatchTermInString(term, url, term_num); | 743 TermMatches url_term_matches = MatchTermInString(term, url, term_num); |
778 TermMatches title_term_matches = MatchTermInString(term, title, term_num); | 744 TermMatches title_term_matches = MatchTermInString(term, title, term_num); |
779 if (url_term_matches.empty() && title_term_matches.empty()) | 745 if (url_term_matches.empty() && title_term_matches.empty()) |
780 return match; // A term was not found in either URL or title - reject. | 746 return match; // A term was not found in either URL or title - reject. |
781 match.url_matches.insert(match.url_matches.end(), url_term_matches.begin(), | 747 match.url_matches.insert(match.url_matches.end(), url_term_matches.begin(), |
782 url_term_matches.end()); | 748 url_term_matches.end()); |
783 match.title_matches.insert(match.title_matches.end(), | 749 match.title_matches.insert(match.title_matches.end(), |
784 title_term_matches.begin(), | 750 title_term_matches.begin(), |
785 title_term_matches.end()); | 751 title_term_matches.end()); |
786 } | 752 } |
787 | 753 |
788 // Sort matches by offset and eliminate any which overlap. | 754 // Sort matches by offset and eliminate any which overlap. |
789 match.url_matches = SortAndDeoverlap(match.url_matches); | 755 match.url_matches = SortAndDeoverlapMatches(match.url_matches); |
790 match.title_matches = SortAndDeoverlap(match.title_matches); | 756 match.title_matches = SortAndDeoverlapMatches(match.title_matches); |
791 | 757 |
792 // We should not (currently) inline autocomplete a result unless both of the | 758 // We should not (currently) inline autocomplete a result unless both of the |
793 // following are true: | 759 // following are true: |
794 // * There is exactly one substring matches in the URL, and | 760 // * There is exactly one substring matches in the URL, and |
795 // * The one URL match starts at the beginning of the URL. | 761 // * The one URL match starts at the beginning of the URL. |
796 match.can_inline = | 762 match.can_inline = |
797 match.url_matches.size() == 1 && match.url_matches[0].offset == 0; | 763 match.url_matches.size() == 1 && match.url_matches[0].offset == 0; |
798 | 764 |
799 // Get partial scores based on term matching. Note that the score for | 765 // Get partial scores based on term matching. Note that the score for |
800 // each of the URL and title are adjusted by the fraction of the | 766 // each of the URL and title are adjusted by the fraction of the |
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
832 int factor[] = {term_score, days_ago_value, visit_count_value, | 798 int factor[] = {term_score, days_ago_value, visit_count_value, |
833 visit_count_value, typed_count_value, typed_count_value, | 799 visit_count_value, typed_count_value, typed_count_value, |
834 typed_count_value}; | 800 typed_count_value}; |
835 const int kInsignificantFactors = 2; | 801 const int kInsignificantFactors = 2; |
836 const int kSignificantFactors = arraysize(factor) - kInsignificantFactors; | 802 const int kSignificantFactors = arraysize(factor) - kInsignificantFactors; |
837 std::partial_sort(factor, factor + kSignificantFactors, | 803 std::partial_sort(factor, factor + kSignificantFactors, |
838 factor + arraysize(factor), std::greater<int>()); | 804 factor + arraysize(factor), std::greater<int>()); |
839 for (int i = 0; i < kSignificantFactors; ++i) | 805 for (int i = 0; i < kSignificantFactors; ++i) |
840 match.raw_score += factor[i]; | 806 match.raw_score += factor[i]; |
841 match.raw_score /= kSignificantFactors; | 807 match.raw_score /= kSignificantFactors; |
842 | |
843 return match; | 808 return match; |
844 } | 809 } |
845 | 810 |
846 int InMemoryURLIndex::ScoreComponentForMatches(const TermMatches& matches, | 811 int InMemoryURLIndex::ScoreComponentForMatches(const TermMatches& matches, |
847 size_t max_length) { | 812 size_t max_length) { |
848 // TODO(mrossetti): This is good enough for now but must be fine-tuned. | 813 // TODO(mrossetti): This is good enough for now but must be fine-tuned. |
849 if (matches.empty()) | 814 if (matches.empty()) |
850 return 0; | 815 return 0; |
851 | 816 |
852 // Score component for whether the input terms (if more than one) were found | 817 // Score component for whether the input terms (if more than one) were found |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
894 | 859 |
895 // Scale the sum of the three components above into a single score component | 860 // Scale the sum of the three components above into a single score component |
896 // on the same scale as that used in ScoredMatchForURL(). | 861 // on the same scale as that used in ScoredMatchForURL(). |
897 return ScoreForValue(raw_score, kTermScoreLevel); | 862 return ScoreForValue(raw_score, kTermScoreLevel); |
898 } | 863 } |
899 | 864 |
900 InMemoryURLIndex::AddHistoryMatch::AddHistoryMatch( | 865 InMemoryURLIndex::AddHistoryMatch::AddHistoryMatch( |
901 const InMemoryURLIndex& index, | 866 const InMemoryURLIndex& index, |
902 const String16Vector& lower_terms) | 867 const String16Vector& lower_terms) |
903 : index_(index), | 868 : index_(index), |
904 lower_terms_(lower_terms) { | 869 lower_terms_(lower_terms) {} |
905 } | |
906 | 870 |
907 InMemoryURLIndex::AddHistoryMatch::~AddHistoryMatch() {} | 871 InMemoryURLIndex::AddHistoryMatch::~AddHistoryMatch() {} |
908 | 872 |
909 void InMemoryURLIndex::AddHistoryMatch::operator()( | 873 void InMemoryURLIndex::AddHistoryMatch::operator()(const HistoryID history_id) { |
910 const InMemoryURLIndex::HistoryID history_id) { | |
911 HistoryInfoMap::const_iterator hist_pos = | 874 HistoryInfoMap::const_iterator hist_pos = |
912 index_.history_info_map_.find(history_id); | 875 index_.private_data_->history_info_map_.find(history_id); |
913 // Note that a history_id may be present in the word_id_history_map_ yet not | 876 // Note that a history_id may be present in the word_id_history_map_ yet not |
914 // be found in the history_info_map_. This occurs when an item has been | 877 // be found in the history_info_map_. This occurs when an item has been |
915 // deleted by the user or the item no longer qualifies as a quick result. | 878 // deleted by the user or the item no longer qualifies as a quick result. |
916 if (hist_pos != index_.history_info_map_.end()) { | 879 if (hist_pos != index_.private_data_->history_info_map_.end()) { |
917 const URLRow& hist_item = hist_pos->second; | 880 const URLRow& hist_item = hist_pos->second; |
918 ScoredHistoryMatch match(ScoredMatchForURL(hist_item, lower_terms_)); | 881 ScoredHistoryMatch match(ScoredMatchForURL(hist_item, lower_terms_)); |
919 if (match.raw_score > 0) | 882 if (match.raw_score > 0) |
920 scored_matches_.push_back(match); | 883 scored_matches_.push_back(match); |
921 } | 884 } |
922 } | 885 } |
923 | 886 |
924 bool InMemoryURLIndex::GetCacheFilePath(FilePath* file_path) { | 887 bool InMemoryURLIndex::GetCacheFilePath(FilePath* file_path) { |
925 if (history_dir_.empty()) | 888 if (history_dir_.empty()) |
926 return false; | 889 return false; |
927 *file_path = history_dir_.Append(FILE_PATH_LITERAL("History Provider Cache")); | 890 *file_path = history_dir_.Append(FILE_PATH_LITERAL("History Provider Cache")); |
928 return true; | 891 return true; |
929 } | 892 } |
930 | 893 |
931 bool InMemoryURLIndex::URLSchemeIsWhitelisted(const GURL& gurl) const { | 894 bool InMemoryURLIndex::URLSchemeIsWhitelisted(const GURL& gurl) const { |
932 return scheme_whitelist_.find(gurl.scheme()) != scheme_whitelist_.end(); | 895 return scheme_whitelist_.find(gurl.scheme()) != scheme_whitelist_.end(); |
933 } | 896 } |
934 | 897 |
935 void InMemoryURLIndex::SavePrivateData(InMemoryURLIndexCacheItem* cache) const { | 898 void InMemoryURLIndex::SavePrivateData(InMemoryURLIndexCacheItem* cache) const { |
936 DCHECK(cache); | 899 DCHECK(cache); |
937 cache->set_timestamp(base::Time::Now().ToInternalValue()); | 900 cache->set_timestamp(base::Time::Now().ToInternalValue()); |
938 cache->set_history_item_count(history_item_count_); | 901 // history_item_count_ is no longer used but rather than change the protobuf |
| 902 // definition use a placeholder. This will go away with the switch to SQLite. |
| 903 cache->set_history_item_count(0); |
939 SaveWordList(cache); | 904 SaveWordList(cache); |
940 SaveWordMap(cache); | 905 SaveWordMap(cache); |
941 SaveCharWordMap(cache); | 906 SaveCharWordMap(cache); |
942 SaveWordIDHistoryMap(cache); | 907 SaveWordIDHistoryMap(cache); |
943 SaveHistoryInfoMap(cache); | 908 SaveHistoryInfoMap(cache); |
944 } | 909 } |
945 | 910 |
946 bool InMemoryURLIndex::RestorePrivateData( | 911 bool InMemoryURLIndex::RestorePrivateData( |
947 const InMemoryURLIndexCacheItem& cache) { | 912 const InMemoryURLIndexCacheItem& cache) { |
948 last_saved_ = base::Time::FromInternalValue(cache.timestamp()); | 913 last_saved_ = base::Time::FromInternalValue(cache.timestamp()); |
949 history_item_count_ = cache.history_item_count(); | 914 return RestoreWordList(cache) && RestoreWordMap(cache) && |
950 return (history_item_count_ == 0) || (RestoreWordList(cache) && | 915 RestoreCharWordMap(cache) && RestoreWordIDHistoryMap(cache) && |
951 RestoreWordMap(cache) && RestoreCharWordMap(cache) && | 916 RestoreHistoryInfoMap(cache); |
952 RestoreWordIDHistoryMap(cache) && RestoreHistoryInfoMap(cache)); | |
953 } | 917 } |
954 | 918 |
955 | |
956 void InMemoryURLIndex::SaveWordList(InMemoryURLIndexCacheItem* cache) const { | 919 void InMemoryURLIndex::SaveWordList(InMemoryURLIndexCacheItem* cache) const { |
957 if (word_list_.empty()) | 920 if (private_data_->word_list_.empty()) |
958 return; | 921 return; |
959 WordListItem* list_item = cache->mutable_word_list(); | 922 WordListItem* list_item = cache->mutable_word_list(); |
960 list_item->set_word_count(word_list_.size()); | 923 list_item->set_word_count(private_data_->word_list_.size()); |
961 for (String16Vector::const_iterator iter = word_list_.begin(); | 924 for (String16Vector::const_iterator iter = private_data_->word_list_.begin(); |
962 iter != word_list_.end(); ++iter) | 925 iter != private_data_->word_list_.end(); ++iter) |
963 list_item->add_word(UTF16ToUTF8(*iter)); | 926 list_item->add_word(UTF16ToUTF8(*iter)); |
964 } | 927 } |
965 | 928 |
966 bool InMemoryURLIndex::RestoreWordList(const InMemoryURLIndexCacheItem& cache) { | 929 bool InMemoryURLIndex::RestoreWordList(const InMemoryURLIndexCacheItem& cache) { |
967 if (!cache.has_word_list()) | 930 if (!cache.has_word_list()) |
968 return false; | 931 return false; |
969 const WordListItem& list_item(cache.word_list()); | 932 const WordListItem& list_item(cache.word_list()); |
970 uint32 expected_item_count = list_item.word_count(); | 933 uint32 expected_item_count = list_item.word_count(); |
971 uint32 actual_item_count = list_item.word_size(); | 934 uint32 actual_item_count = list_item.word_size(); |
972 if (actual_item_count == 0 || actual_item_count != expected_item_count) | 935 if (actual_item_count == 0 || actual_item_count != expected_item_count) |
973 return false; | 936 return false; |
974 const RepeatedPtrField<std::string>& words(list_item.word()); | 937 const RepeatedPtrField<std::string>& words(list_item.word()); |
975 for (RepeatedPtrField<std::string>::const_iterator iter = words.begin(); | 938 for (RepeatedPtrField<std::string>::const_iterator iter = words.begin(); |
976 iter != words.end(); ++iter) | 939 iter != words.end(); ++iter) |
977 word_list_.push_back(UTF8ToUTF16(*iter)); | 940 private_data_->word_list_.push_back(UTF8ToUTF16(*iter)); |
978 return true; | 941 return true; |
979 } | 942 } |
980 | 943 |
981 void InMemoryURLIndex::SaveWordMap(InMemoryURLIndexCacheItem* cache) const { | 944 void InMemoryURLIndex::SaveWordMap(InMemoryURLIndexCacheItem* cache) const { |
982 if (word_map_.empty()) | 945 if (private_data_->word_map_.empty()) |
983 return; | 946 return; |
984 WordMapItem* map_item = cache->mutable_word_map(); | 947 WordMapItem* map_item = cache->mutable_word_map(); |
985 map_item->set_item_count(word_map_.size()); | 948 map_item->set_item_count(private_data_->word_map_.size()); |
986 for (WordMap::const_iterator iter = word_map_.begin(); | 949 for (WordMap::const_iterator iter = private_data_->word_map_.begin(); |
987 iter != word_map_.end(); ++iter) { | 950 iter != private_data_->word_map_.end(); ++iter) { |
988 WordMapEntry* map_entry = map_item->add_word_map_entry(); | 951 WordMapEntry* map_entry = map_item->add_word_map_entry(); |
989 map_entry->set_word(UTF16ToUTF8(iter->first)); | 952 map_entry->set_word(UTF16ToUTF8(iter->first)); |
990 map_entry->set_word_id(iter->second); | 953 map_entry->set_word_id(iter->second); |
991 } | 954 } |
992 } | 955 } |
993 | 956 |
994 bool InMemoryURLIndex::RestoreWordMap(const InMemoryURLIndexCacheItem& cache) { | 957 bool InMemoryURLIndex::RestoreWordMap(const InMemoryURLIndexCacheItem& cache) { |
995 if (!cache.has_word_map()) | 958 if (!cache.has_word_map()) |
996 return false; | 959 return false; |
997 const WordMapItem& list_item(cache.word_map()); | 960 const WordMapItem& list_item(cache.word_map()); |
998 uint32 expected_item_count = list_item.item_count(); | 961 uint32 expected_item_count = list_item.item_count(); |
999 uint32 actual_item_count = list_item.word_map_entry_size(); | 962 uint32 actual_item_count = list_item.word_map_entry_size(); |
1000 if (actual_item_count == 0 || actual_item_count != expected_item_count) | 963 if (actual_item_count == 0 || actual_item_count != expected_item_count) |
1001 return false; | 964 return false; |
1002 const RepeatedPtrField<WordMapEntry>& entries(list_item.word_map_entry()); | 965 const RepeatedPtrField<WordMapEntry>& entries(list_item.word_map_entry()); |
1003 for (RepeatedPtrField<WordMapEntry>::const_iterator iter = entries.begin(); | 966 for (RepeatedPtrField<WordMapEntry>::const_iterator iter = entries.begin(); |
1004 iter != entries.end(); ++iter) | 967 iter != entries.end(); ++iter) |
1005 word_map_[UTF8ToUTF16(iter->word())] = iter->word_id(); | 968 private_data_->word_map_[UTF8ToUTF16(iter->word())] = iter->word_id(); |
1006 return true; | 969 return true; |
1007 } | 970 } |
1008 | 971 |
1009 void InMemoryURLIndex::SaveCharWordMap(InMemoryURLIndexCacheItem* cache) const { | 972 void InMemoryURLIndex::SaveCharWordMap(InMemoryURLIndexCacheItem* cache) const { |
1010 if (char_word_map_.empty()) | 973 if (private_data_->char_word_map_.empty()) |
1011 return; | 974 return; |
1012 CharWordMapItem* map_item = cache->mutable_char_word_map(); | 975 CharWordMapItem* map_item = cache->mutable_char_word_map(); |
1013 map_item->set_item_count(char_word_map_.size()); | 976 map_item->set_item_count(private_data_->char_word_map_.size()); |
1014 for (CharWordIDMap::const_iterator iter = char_word_map_.begin(); | 977 for (CharWordIDMap::const_iterator iter = |
1015 iter != char_word_map_.end(); ++iter) { | 978 private_data_->char_word_map_.begin(); |
| 979 iter != private_data_->char_word_map_.end(); ++iter) { |
1016 CharWordMapEntry* map_entry = map_item->add_char_word_map_entry(); | 980 CharWordMapEntry* map_entry = map_item->add_char_word_map_entry(); |
1017 map_entry->set_char_16(iter->first); | 981 map_entry->set_char_16(iter->first); |
1018 const WordIDSet& word_id_set(iter->second); | 982 const WordIDSet& word_id_set(iter->second); |
1019 map_entry->set_item_count(word_id_set.size()); | 983 map_entry->set_item_count(word_id_set.size()); |
1020 for (WordIDSet::const_iterator set_iter = word_id_set.begin(); | 984 for (WordIDSet::const_iterator set_iter = word_id_set.begin(); |
1021 set_iter != word_id_set.end(); ++set_iter) | 985 set_iter != word_id_set.end(); ++set_iter) |
1022 map_entry->add_word_id(*set_iter); | 986 map_entry->add_word_id(*set_iter); |
1023 } | 987 } |
1024 } | 988 } |
1025 | 989 |
(...skipping 13 matching lines...) Expand all Loading... |
1039 expected_item_count = iter->item_count(); | 1003 expected_item_count = iter->item_count(); |
1040 actual_item_count = iter->word_id_size(); | 1004 actual_item_count = iter->word_id_size(); |
1041 if (actual_item_count == 0 || actual_item_count != expected_item_count) | 1005 if (actual_item_count == 0 || actual_item_count != expected_item_count) |
1042 return false; | 1006 return false; |
1043 char16 uni_char = static_cast<char16>(iter->char_16()); | 1007 char16 uni_char = static_cast<char16>(iter->char_16()); |
1044 WordIDSet word_id_set; | 1008 WordIDSet word_id_set; |
1045 const RepeatedField<int32>& word_ids(iter->word_id()); | 1009 const RepeatedField<int32>& word_ids(iter->word_id()); |
1046 for (RepeatedField<int32>::const_iterator jiter = word_ids.begin(); | 1010 for (RepeatedField<int32>::const_iterator jiter = word_ids.begin(); |
1047 jiter != word_ids.end(); ++jiter) | 1011 jiter != word_ids.end(); ++jiter) |
1048 word_id_set.insert(*jiter); | 1012 word_id_set.insert(*jiter); |
1049 char_word_map_[uni_char] = word_id_set; | 1013 private_data_->char_word_map_[uni_char] = word_id_set; |
1050 } | 1014 } |
1051 return true; | 1015 return true; |
1052 } | 1016 } |
1053 | 1017 |
1054 void InMemoryURLIndex::SaveWordIDHistoryMap(InMemoryURLIndexCacheItem* cache) | 1018 void InMemoryURLIndex::SaveWordIDHistoryMap(InMemoryURLIndexCacheItem* cache) |
1055 const { | 1019 const { |
1056 if (word_id_history_map_.empty()) | 1020 if (private_data_->word_id_history_map_.empty()) |
1057 return; | 1021 return; |
1058 WordIDHistoryMapItem* map_item = cache->mutable_word_id_history_map(); | 1022 WordIDHistoryMapItem* map_item = cache->mutable_word_id_history_map(); |
1059 map_item->set_item_count(word_id_history_map_.size()); | 1023 map_item->set_item_count(private_data_->word_id_history_map_.size()); |
1060 for (WordIDHistoryMap::const_iterator iter = word_id_history_map_.begin(); | 1024 for (WordIDHistoryMap::const_iterator iter = |
1061 iter != word_id_history_map_.end(); ++iter) { | 1025 private_data_->word_id_history_map_.begin(); |
| 1026 iter != private_data_->word_id_history_map_.end(); ++iter) { |
1062 WordIDHistoryMapEntry* map_entry = | 1027 WordIDHistoryMapEntry* map_entry = |
1063 map_item->add_word_id_history_map_entry(); | 1028 map_item->add_word_id_history_map_entry(); |
1064 map_entry->set_word_id(iter->first); | 1029 map_entry->set_word_id(iter->first); |
1065 const HistoryIDSet& history_id_set(iter->second); | 1030 const HistoryIDSet& history_id_set(iter->second); |
1066 map_entry->set_item_count(history_id_set.size()); | 1031 map_entry->set_item_count(history_id_set.size()); |
1067 for (HistoryIDSet::const_iterator set_iter = history_id_set.begin(); | 1032 for (HistoryIDSet::const_iterator set_iter = history_id_set.begin(); |
1068 set_iter != history_id_set.end(); ++set_iter) | 1033 set_iter != history_id_set.end(); ++set_iter) |
1069 map_entry->add_history_id(*set_iter); | 1034 map_entry->add_history_id(*set_iter); |
1070 } | 1035 } |
1071 } | 1036 } |
(...skipping 12 matching lines...) Expand all Loading... |
1084 for (RepeatedPtrField<WordIDHistoryMapEntry>::const_iterator iter = | 1049 for (RepeatedPtrField<WordIDHistoryMapEntry>::const_iterator iter = |
1085 entries.begin(); iter != entries.end(); ++iter) { | 1050 entries.begin(); iter != entries.end(); ++iter) { |
1086 expected_item_count = iter->item_count(); | 1051 expected_item_count = iter->item_count(); |
1087 actual_item_count = iter->history_id_size(); | 1052 actual_item_count = iter->history_id_size(); |
1088 if (actual_item_count == 0 || actual_item_count != expected_item_count) | 1053 if (actual_item_count == 0 || actual_item_count != expected_item_count) |
1089 return false; | 1054 return false; |
1090 WordID word_id = iter->word_id(); | 1055 WordID word_id = iter->word_id(); |
1091 HistoryIDSet history_id_set; | 1056 HistoryIDSet history_id_set; |
1092 const RepeatedField<int64>& history_ids(iter->history_id()); | 1057 const RepeatedField<int64>& history_ids(iter->history_id()); |
1093 for (RepeatedField<int64>::const_iterator jiter = history_ids.begin(); | 1058 for (RepeatedField<int64>::const_iterator jiter = history_ids.begin(); |
1094 jiter != history_ids.end(); ++jiter) | 1059 jiter != history_ids.end(); ++jiter) { |
1095 history_id_set.insert(*jiter); | 1060 history_id_set.insert(*jiter); |
1096 word_id_history_map_[word_id] = history_id_set; | 1061 private_data_->AddToHistoryIDWordMap(*jiter, word_id); |
| 1062 } |
| 1063 private_data_->word_id_history_map_[word_id] = history_id_set; |
1097 } | 1064 } |
1098 return true; | 1065 return true; |
1099 } | 1066 } |
1100 | 1067 |
1101 void InMemoryURLIndex::SaveHistoryInfoMap( | 1068 void InMemoryURLIndex::SaveHistoryInfoMap( |
1102 InMemoryURLIndexCacheItem* cache) const { | 1069 InMemoryURLIndexCacheItem* cache) const { |
1103 if (history_info_map_.empty()) | 1070 if (private_data_->history_info_map_.empty()) |
1104 return; | 1071 return; |
1105 HistoryInfoMapItem* map_item = cache->mutable_history_info_map(); | 1072 HistoryInfoMapItem* map_item = cache->mutable_history_info_map(); |
1106 map_item->set_item_count(history_info_map_.size()); | 1073 map_item->set_item_count(private_data_->history_info_map_.size()); |
1107 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin(); | 1074 for (HistoryInfoMap::const_iterator iter = |
1108 iter != history_info_map_.end(); ++iter) { | 1075 private_data_->history_info_map_.begin(); |
| 1076 iter != private_data_->history_info_map_.end(); ++iter) { |
1109 HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry(); | 1077 HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry(); |
1110 map_entry->set_history_id(iter->first); | 1078 map_entry->set_history_id(iter->first); |
1111 const URLRow& url_row(iter->second); | 1079 const URLRow& url_row(iter->second); |
1112 // Note: We only save information that contributes to the index so there | 1080 // Note: We only save information that contributes to the index so there |
1113 // is no need to save search_term_cache_ (not persistent), | 1081 // is no need to save search_term_cache_ (not persistent), |
1114 // languages_, etc. | 1082 // languages_, etc. |
1115 map_entry->set_visit_count(url_row.visit_count()); | 1083 map_entry->set_visit_count(url_row.visit_count()); |
1116 map_entry->set_typed_count(url_row.typed_count()); | 1084 map_entry->set_typed_count(url_row.typed_count()); |
1117 map_entry->set_last_visit(url_row.last_visit().ToInternalValue()); | 1085 map_entry->set_last_visit(url_row.last_visit().ToInternalValue()); |
1118 map_entry->set_url(url_row.url().spec()); | 1086 map_entry->set_url(url_row.url().spec()); |
(...skipping 17 matching lines...) Expand all Loading... |
1136 HistoryID history_id = iter->history_id(); | 1104 HistoryID history_id = iter->history_id(); |
1137 GURL url(iter->url()); | 1105 GURL url(iter->url()); |
1138 URLRow url_row(url, history_id); | 1106 URLRow url_row(url, history_id); |
1139 url_row.set_visit_count(iter->visit_count()); | 1107 url_row.set_visit_count(iter->visit_count()); |
1140 url_row.set_typed_count(iter->typed_count()); | 1108 url_row.set_typed_count(iter->typed_count()); |
1141 url_row.set_last_visit(base::Time::FromInternalValue(iter->last_visit())); | 1109 url_row.set_last_visit(base::Time::FromInternalValue(iter->last_visit())); |
1142 if (iter->has_title()) { | 1110 if (iter->has_title()) { |
1143 string16 title(UTF8ToUTF16(iter->title())); | 1111 string16 title(UTF8ToUTF16(iter->title())); |
1144 url_row.set_title(title); | 1112 url_row.set_title(title); |
1145 } | 1113 } |
1146 history_info_map_[history_id] = url_row; | 1114 private_data_->history_info_map_[history_id] = url_row; |
1147 } | 1115 } |
1148 return true; | 1116 return true; |
1149 } | 1117 } |
1150 | 1118 |
1151 } // namespace history | 1119 } // namespace history |
OLD | NEW |