Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/browser/history/in_memory_url_index.h" | 5 #include "chrome/browser/history/in_memory_url_index.h" |
| 6 | 6 |
| 7 #include <algorithm> | 7 #include <algorithm> |
| 8 #include <functional> | 8 #include <functional> |
| 9 #include <iterator> | 9 #include <iterator> |
| 10 #include <limits> | 10 #include <limits> |
| 11 #include <numeric> | 11 #include <numeric> |
| 12 | 12 |
| 13 #include "base/file_util.h" | 13 #include "base/file_util.h" |
| 14 #include "base/i18n/break_iterator.h" | |
| 15 #include "base/i18n/case_conversion.h" | 14 #include "base/i18n/case_conversion.h" |
| 16 #include "base/metrics/histogram.h" | 15 #include "base/metrics/histogram.h" |
| 17 #include "base/string_util.h" | |
| 18 #include "base/threading/thread_restrictions.h" | 16 #include "base/threading/thread_restrictions.h" |
| 19 #include "base/time.h" | 17 #include "base/time.h" |
| 20 #include "base/utf_string_conversions.h" | 18 #include "base/utf_string_conversions.h" |
| 21 #include "chrome/browser/autocomplete/autocomplete.h" | 19 #include "chrome/browser/autocomplete/autocomplete.h" |
| 22 #include "chrome/browser/autocomplete/history_provider_util.h" | 20 #include "chrome/browser/autocomplete/history_provider_util.h" |
| 21 #include "chrome/browser/history/history_notifications.h" | |
| 23 #include "chrome/browser/history/url_database.h" | 22 #include "chrome/browser/history/url_database.h" |
| 24 #include "chrome/browser/profiles/profile.h" | 23 #include "chrome/browser/profiles/profile.h" |
| 24 #include "chrome/common/chrome_notification_types.h" | |
| 25 #include "chrome/common/url_constants.h" | 25 #include "chrome/common/url_constants.h" |
| 26 #include "content/common/notification_details.h" | |
| 27 #include "content/common/notification_source.h" | |
| 26 #include "googleurl/src/url_parse.h" | 28 #include "googleurl/src/url_parse.h" |
| 27 #include "googleurl/src/url_util.h" | 29 #include "googleurl/src/url_util.h" |
| 28 #include "net/base/escape.h" | 30 #include "net/base/escape.h" |
| 29 #include "net/base/net_util.h" | 31 #include "net/base/net_util.h" |
| 30 #include "third_party/protobuf/src/google/protobuf/repeated_field.h" | 32 #include "third_party/protobuf/src/google/protobuf/repeated_field.h" |
| 31 #include "ui/base/l10n/l10n_util.h" | 33 #include "ui/base/l10n/l10n_util.h" |
| 32 | 34 |
| 33 using google::protobuf::RepeatedField; | 35 using google::protobuf::RepeatedField; |
| 34 using google::protobuf::RepeatedPtrField; | 36 using google::protobuf::RepeatedPtrField; |
| 35 using in_memory_url_index::InMemoryURLIndexCacheItem; | 37 using in_memory_url_index::InMemoryURLIndexCacheItem; |
| (...skipping 16 matching lines...) Expand all Loading... | |
| 52 HistoryInfoMapEntry; | 54 HistoryInfoMapEntry; |
| 53 | 55 |
| 54 const size_t InMemoryURLIndex::kNoCachedResultForTerm = -1; | 56 const size_t InMemoryURLIndex::kNoCachedResultForTerm = -1; |
| 55 | 57 |
| 56 // Score ranges used to get a 'base' score for each of the scoring factors | 58 // Score ranges used to get a 'base' score for each of the scoring factors |
| 57 // (such as recency of last visit, times visited, times the URL was typed, | 59 // (such as recency of last visit, times visited, times the URL was typed, |
| 58 // and the quality of the string match). There is a matching value range for | 60 // and the quality of the string match). There is a matching value range for |
| 59 // each of these scores for each factor. | 61 // each of these scores for each factor. |
| 60 const int kScoreRank[] = { 1425, 1200, 900, 400 }; | 62 const int kScoreRank[] = { 1425, 1200, 900, 400 }; |
| 61 | 63 |
| 62 ScoredHistoryMatch::ScoredHistoryMatch() | |
| 63 : raw_score(0), | |
| 64 can_inline(false) {} | |
| 65 | |
| 66 ScoredHistoryMatch::ScoredHistoryMatch(const URLRow& url_info) | |
| 67 : HistoryMatch(url_info, 0, false, false), | |
| 68 raw_score(0), | |
| 69 can_inline(false) {} | |
| 70 | |
| 71 ScoredHistoryMatch::~ScoredHistoryMatch() {} | |
| 72 | |
| 73 // Comparison function for sorting ScoredMatches by their scores. | |
| 74 bool ScoredHistoryMatch::MatchScoreGreater(const ScoredHistoryMatch& m1, | |
| 75 const ScoredHistoryMatch& m2) { | |
| 76 return m1.raw_score >= m2.raw_score; | |
| 77 } | |
| 78 | |
| 79 InMemoryURLIndex::SearchTermCacheItem::SearchTermCacheItem( | 64 InMemoryURLIndex::SearchTermCacheItem::SearchTermCacheItem( |
| 80 const WordIDSet& word_id_set, | 65 const WordIDSet& word_id_set, |
| 81 const HistoryIDSet& history_id_set) | 66 const HistoryIDSet& history_id_set) |
| 82 : word_id_set_(word_id_set), | 67 : word_id_set_(word_id_set), |
| 83 history_id_set_(history_id_set), | 68 history_id_set_(history_id_set), |
| 84 used_(true) {} | 69 used_(true) {} |
| 85 | 70 |
| 86 InMemoryURLIndex::SearchTermCacheItem::SearchTermCacheItem() | 71 InMemoryURLIndex::SearchTermCacheItem::SearchTermCacheItem() |
| 87 : used_(true) {} | 72 : used_(true) {} |
| 88 | 73 |
| 89 InMemoryURLIndex::SearchTermCacheItem::~SearchTermCacheItem() {} | 74 InMemoryURLIndex::SearchTermCacheItem::~SearchTermCacheItem() {} |
| 90 | 75 |
| 91 // Comparison function for sorting TermMatches by their offsets. | |
| 92 bool MatchOffsetLess(const TermMatch& m1, const TermMatch& m2) { | |
| 93 return m1.offset < m2.offset; | |
| 94 } | |
| 95 | |
| 96 // Comparison function for sorting search terms by descending length. | 76 // Comparison function for sorting search terms by descending length. |
| 97 bool LengthGreater(const string16& string_a, const string16& string_b) { | 77 bool LengthGreater(const string16& string_a, const string16& string_b) { |
| 98 return string_a.length() > string_b.length(); | 78 return string_a.length() > string_b.length(); |
| 99 } | 79 } |
| 100 | 80 |
| 101 // std::accumulate helper function to add up TermMatches' lengths. | 81 // std::accumulate helper function to add up TermMatches' lengths. |
| 102 int AccumulateMatchLength(int total, const TermMatch& match) { | 82 int AccumulateMatchLength(int total, const TermMatch& match) { |
| 103 return total + match.length; | 83 return total + match.length; |
| 104 } | 84 } |
| 105 | 85 |
| (...skipping 22 matching lines...) Expand all Loading... | |
| 128 return 0; | 108 return 0; |
| 129 int score = kScoreRank[i]; | 109 int score = kScoreRank[i]; |
| 130 if (i > 0) { | 110 if (i > 0) { |
| 131 score += (value - value_ranks[i]) * | 111 score += (value - value_ranks[i]) * |
| 132 (kScoreRank[i - 1] - kScoreRank[i]) / | 112 (kScoreRank[i - 1] - kScoreRank[i]) / |
| 133 (value_ranks[i - 1] - value_ranks[i]); | 113 (value_ranks[i - 1] - value_ranks[i]); |
| 134 } | 114 } |
| 135 return score; | 115 return score; |
| 136 } | 116 } |
| 137 | 117 |
| 138 InMemoryURLIndex::InMemoryURLIndex(const FilePath& history_dir) | 118 InMemoryURLIndex::InMemoryURLIndex(Profile* profile, |
| 119 const FilePath& history_dir) | |
| 139 : history_dir_(history_dir), | 120 : history_dir_(history_dir), |
| 140 history_item_count_(0) { | 121 private_data_(new URLIndexPrivateData) { |
| 141 InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_); | 122 InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_); |
| 123 if (profile) { | |
| 124 Source<Profile> source(profile); | |
| 125 registrar_.Add(this, chrome::NOTIFICATION_HISTORY_URL_VISITED, source); | |
| 126 registrar_.Add(this, chrome::NOTIFICATION_HISTORY_TYPED_URLS_MODIFIED, | |
| 127 source); | |
| 128 registrar_.Add(this, chrome::NOTIFICATION_HISTORY_URLS_DELETED, source); | |
| 129 } | |
| 142 } | 130 } |
| 143 | 131 |
| 144 // Called only by unit tests. | 132 // Called only by unit tests. |
| 145 InMemoryURLIndex::InMemoryURLIndex() | 133 InMemoryURLIndex::InMemoryURLIndex() |
| 146 : history_item_count_(0) { | 134 : private_data_(new URLIndexPrivateData) { |
| 147 InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_); | 135 InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_); |
| 148 } | 136 } |
| 149 | 137 |
| 150 InMemoryURLIndex::~InMemoryURLIndex() {} | 138 InMemoryURLIndex::~InMemoryURLIndex() {} |
| 151 | 139 |
| 152 // static | 140 // static |
| 153 void InMemoryURLIndex::InitializeSchemeWhitelist( | 141 void InMemoryURLIndex::InitializeSchemeWhitelist( |
| 154 std::set<std::string>* whitelist) { | 142 std::set<std::string>* whitelist) { |
| 155 DCHECK(whitelist); | 143 DCHECK(whitelist); |
| 156 whitelist->insert(std::string(chrome::kAboutScheme)); | 144 whitelist->insert(std::string(chrome::kAboutScheme)); |
| 157 whitelist->insert(std::string(chrome::kChromeUIScheme)); | 145 whitelist->insert(std::string(chrome::kChromeUIScheme)); |
| 158 whitelist->insert(std::string(chrome::kFileScheme)); | 146 whitelist->insert(std::string(chrome::kFileScheme)); |
| 159 whitelist->insert(std::string(chrome::kFtpScheme)); | 147 whitelist->insert(std::string(chrome::kFtpScheme)); |
| 160 whitelist->insert(std::string(chrome::kHttpScheme)); | 148 whitelist->insert(std::string(chrome::kHttpScheme)); |
| 161 whitelist->insert(std::string(chrome::kHttpsScheme)); | 149 whitelist->insert(std::string(chrome::kHttpsScheme)); |
| 162 whitelist->insert(std::string(chrome::kMailToScheme)); | 150 whitelist->insert(std::string(chrome::kMailToScheme)); |
| 163 } | 151 } |
| 164 | 152 |
| 165 // Indexing | 153 // Indexing |
| 166 | 154 |
| 167 bool InMemoryURLIndex::Init(history::URLDatabase* history_db, | 155 bool InMemoryURLIndex::Init(URLDatabase* history_db, |
| 168 const std::string& languages) { | 156 const std::string& languages) { |
| 169 // TODO(mrossetti): Register for profile/language change notifications. | 157 // TODO(mrossetti): Register for profile/language change notifications. |
| 170 languages_ = languages; | 158 languages_ = languages; |
| 171 return ReloadFromHistory(history_db, false); | 159 return ReloadFromHistory(history_db, false); |
| 172 } | 160 } |
| 173 | 161 |
| 174 void InMemoryURLIndex::ShutDown() { | 162 void InMemoryURLIndex::ShutDown() { |
| 163 registrar_.RemoveAll(); | |
| 175 // Write our cache. | 164 // Write our cache. |
| 176 SaveToCacheFile(); | 165 SaveToCacheFile(); |
| 177 } | 166 } |
| 178 | 167 |
| 168 void InMemoryURLIndex::Observe(int type, | |
| 169 const NotificationSource& source, | |
| 170 const NotificationDetails& details) { | |
| 171 switch (type) { | |
| 172 case chrome::NOTIFICATION_HISTORY_URL_VISITED: | |
| 173 OnURLVisited(Details<URLVisitedDetails>(details).ptr()); | |
| 174 break; | |
| 175 case chrome::NOTIFICATION_HISTORY_TYPED_URLS_MODIFIED: | |
| 176 OnURLsModified(Details<history::URLsModifiedDetails>(details).ptr()); | |
| 177 break; | |
| 178 case chrome::NOTIFICATION_HISTORY_URLS_DELETED: | |
| 179 OnURLsDeleted(Details<history::URLsDeletedDetails>(details).ptr()); | |
| 180 break; | |
| 181 default: | |
| 182 // For simplicity, the unit tests send us all notifications, even when | |
| 183 // we haven't registered for them, so don't assert here. | |
| 184 break; | |
| 185 } | |
| 186 } | |
| 187 | |
| 188 void InMemoryURLIndex::OnURLVisited(const URLVisitedDetails* details) { | |
| 189 UpdateURL(details->row); | |
| 190 } | |
| 191 | |
| 192 void InMemoryURLIndex::OnURLsModified(const URLsModifiedDetails* details) { | |
| 193 for (std::vector<history::URLRow>::const_iterator row = | |
| 194 details->changed_urls.begin(); | |
| 195 row != details->changed_urls.end(); ++row) | |
| 196 UpdateURL(*row); | |
| 197 } | |
| 198 | |
| 199 void InMemoryURLIndex::OnURLsDeleted(const URLsDeletedDetails* details) { | |
| 200 if (details->all_history) { | |
| 201 ClearPrivateData(); | |
| 202 } else { | |
| 203 for (std::vector<URLRow>::const_iterator row = details->rows.begin(); | |
| 204 row != details->rows.end(); ++row) | |
| 205 DeleteURL(*row); | |
| 206 } | |
| 207 } | |
| 208 | |
| 179 bool InMemoryURLIndex::IndexRow(const URLRow& row) { | 209 bool InMemoryURLIndex::IndexRow(const URLRow& row) { |
| 180 const GURL& gurl(row.url()); | 210 const GURL& gurl(row.url()); |
| 181 | 211 |
| 182 // Index only URLs with a whitelisted scheme. | 212 // Index only URLs with a whitelisted scheme. |
| 183 if (!InMemoryURLIndex::URLSchemeIsWhitelisted(gurl)) | 213 if (!InMemoryURLIndex::URLSchemeIsWhitelisted(gurl)) |
| 184 return true; | 214 return true; |
| 185 | 215 |
| 216 URLID row_id = row.id(); | |
| 217 // Strip out username and password before saving and indexing. | |
| 186 string16 url(net::FormatUrl(gurl, languages_, | 218 string16 url(net::FormatUrl(gurl, languages_, |
| 187 net::kFormatUrlOmitUsernamePassword, | 219 net::kFormatUrlOmitUsernamePassword, |
| 188 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS, | 220 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS, |
| 189 NULL, NULL, NULL)); | 221 NULL, NULL, NULL)); |
| 190 | 222 HistoryID history_id = static_cast<HistoryID>(row_id); |
| 191 HistoryID history_id = static_cast<HistoryID>(row.id()); | 223 DCHECK_LT(history_id, std::numeric_limits<HistoryID>::max()); |
| 192 DCHECK_LT(row.id(), std::numeric_limits<HistoryID>::max()); | |
| 193 | 224 |
| 194 // Add the row for quick lookup in the history info store. | 225 // Add the row for quick lookup in the history info store. |
| 195 URLRow new_row(GURL(url), row.id()); | 226 URLRow new_row(GURL(url), row_id); |
| 196 new_row.set_visit_count(row.visit_count()); | 227 new_row.set_visit_count(row.visit_count()); |
| 197 new_row.set_typed_count(row.typed_count()); | 228 new_row.set_typed_count(row.typed_count()); |
| 198 new_row.set_last_visit(row.last_visit()); | 229 new_row.set_last_visit(row.last_visit()); |
| 199 new_row.set_title(row.title()); | 230 new_row.set_title(row.title()); |
| 200 history_info_map_[history_id] = new_row; | 231 private_data_->history_info_map_[history_id] = new_row; |
| 201 | 232 |
| 233 // Index the words contained in the URL and title of the row. | |
| 234 AddRowWordsToIndex(new_row); | |
| 235 ++(private_data_->history_item_count_); | |
| 236 return true; | |
| 237 } | |
| 238 | |
| 239 void InMemoryURLIndex::AddRowWordsToIndex(const URLRow& row) { | |
| 240 HistoryID history_id = static_cast<HistoryID>(row.id()); | |
| 202 // Split URL into individual, unique words then add in the title words. | 241 // Split URL into individual, unique words then add in the title words. |
| 242 const GURL& gurl(row.url()); | |
| 243 string16 url(net::FormatUrl(gurl, languages_, | |
| 244 net::kFormatUrlOmitUsernamePassword, | |
| 245 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS, | |
| 246 NULL, NULL, NULL)); | |
| 203 url = base::i18n::ToLower(url); | 247 url = base::i18n::ToLower(url); |
| 204 String16Set url_words = WordSetFromString16(url); | 248 String16Set url_words = String16SetFromString16(url); |
| 205 String16Set title_words = WordSetFromString16(row.title()); | 249 String16Set title_words = String16SetFromString16(row.title()); |
| 206 String16Set words; | 250 String16Set words; |
| 207 std::set_union(url_words.begin(), url_words.end(), | 251 std::set_union(url_words.begin(), url_words.end(), |
| 208 title_words.begin(), title_words.end(), | 252 title_words.begin(), title_words.end(), |
| 209 std::insert_iterator<String16Set>(words, words.begin())); | 253 std::insert_iterator<String16Set>(words, words.begin())); |
| 210 for (String16Set::iterator word_iter = words.begin(); | 254 for (String16Set::iterator word_iter = words.begin(); |
| 211 word_iter != words.end(); ++word_iter) | 255 word_iter != words.end(); ++word_iter) |
| 212 AddWordToIndex(*word_iter, history_id); | 256 AddWordToIndex(*word_iter, history_id); |
| 213 | 257 |
| 214 ++history_item_count_; | 258 search_term_cache_.clear(); // Invalidate the term cache. |
| 215 return true; | 259 } |
| 260 | |
| 261 void InMemoryURLIndex::RemoveRowFromIndex(const URLRow& row) { | |
| 262 RemoveRowWordsFromIndex(row); | |
| 263 HistoryID history_id = static_cast<HistoryID>(row.id()); | |
| 264 private_data_->history_info_map_.erase(history_id); | |
| 265 } | |
| 266 | |
| 267 void InMemoryURLIndex::RemoveRowWordsFromIndex(const URLRow& row) { | |
| 268 // Remove the entries in history_id_word_map_ and word_id_history_map_ for | |
| 269 // this row. | |
| 270 URLIndexPrivateData& private_data(*(private_data_.get())); | |
| 271 HistoryID history_id = static_cast<HistoryID>(row.id()); | |
| 272 WordIDSet word_id_set = private_data.history_id_word_map_[history_id]; | |
| 273 private_data.history_id_word_map_.erase(history_id); | |
| 274 | |
| 275 // Reconcile any changes to word usage. | |
| 276 for (WordIDSet::iterator word_id_iter = word_id_set.begin(); | |
| 277 word_id_iter != word_id_set.end(); ++word_id_iter) { | |
| 278 WordID word_id = *word_id_iter; | |
| 279 HistoryIDSet history_ids = private_data.word_id_history_map_[word_id]; | |
|
Peter Kasting
2011/10/07 17:50:37
Why do you copy this out here, modify the copy, an
mrossetti
2011/10/07 22:24:37
Yeah, great catch! There used to be some more code
| |
| 280 history_ids.erase(history_id); | |
| 281 if (!history_ids.empty()) { | |
| 282 // The word is still in use. | |
| 283 private_data.word_id_history_map_[word_id] = history_ids; | |
| 284 continue; | |
| 285 } | |
| 286 | |
| 287 // The word is no longer in use. Reconcile any changes to character usage. | |
| 288 string16 word = private_data.word_list_[word_id]; | |
| 289 Char16Set characters = Char16SetFromString16(word); | |
| 290 for (Char16Set::iterator uni_char_iter = characters.begin(); | |
| 291 uni_char_iter != characters.end(); ++uni_char_iter) { | |
| 292 char16 uni_char = *uni_char_iter; | |
| 293 WordIDSet word_ids = private_data.char_word_map_[uni_char]; | |
|
Peter Kasting
2011/10/07 17:50:37
Same comment.
mrossetti
2011/10/07 22:24:37
Done.
| |
| 294 word_ids.erase(word_id); | |
| 295 if (!word_ids.empty()) { | |
| 296 // The character is still in use. | |
| 297 private_data.char_word_map_[uni_char] = word_ids; | |
| 298 continue; | |
| 299 } | |
| 300 // The character is no longer in use. | |
| 301 private_data.char_word_map_.erase(uni_char); | |
| 302 } | |
| 303 | |
| 304 // Complete the removal of references to the word. | |
| 305 private_data.word_id_history_map_.erase(word_id); | |
| 306 private_data.word_map_.erase(word); | |
| 307 private_data.word_list_[word_id] = string16(); | |
| 308 private_data.available_words_.insert(word_id); | |
| 309 } | |
| 216 } | 310 } |
| 217 | 311 |
| 218 bool InMemoryURLIndex::ReloadFromHistory(history::URLDatabase* history_db, | 312 bool InMemoryURLIndex::ReloadFromHistory(history::URLDatabase* history_db, |
| 219 bool clear_cache) { | 313 bool clear_cache) { |
| 220 ClearPrivateData(); | 314 ClearPrivateData(); |
| 221 | 315 |
| 222 if (!history_db) | 316 if (!history_db) |
| 223 return false; | 317 return false; |
| 224 | 318 |
| 225 if (clear_cache || !RestoreFromCacheFile()) { | 319 if (clear_cache || !RestoreFromCacheFile()) { |
| 226 base::TimeTicks beginning_time = base::TimeTicks::Now(); | 320 base::TimeTicks beginning_time = base::TimeTicks::Now(); |
| 227 // The index has to be built from scratch. | 321 // The index has to be built from scratch. |
| 228 URLDatabase::URLEnumerator history_enum; | 322 URLDatabase::URLEnumerator history_enum; |
| 229 if (!history_db->InitURLEnumeratorForSignificant(&history_enum)) | 323 if (!history_db->InitURLEnumeratorForSignificant(&history_enum)) |
| 230 return false; | 324 return false; |
| 231 URLRow row; | 325 URLRow row; |
| 232 while (history_enum.GetNextURL(&row)) { | 326 while (history_enum.GetNextURL(&row)) { |
| 233 if (!IndexRow(row)) | 327 if (!IndexRow(row)) |
| 234 return false; | 328 return false; |
| 235 } | 329 } |
| 236 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime", | 330 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime", |
| 237 base::TimeTicks::Now() - beginning_time); | 331 base::TimeTicks::Now() - beginning_time); |
| 238 SaveToCacheFile(); | 332 SaveToCacheFile(); |
| 239 } | 333 } |
| 240 return true; | 334 return true; |
| 241 } | 335 } |
| 242 | 336 |
| 243 void InMemoryURLIndex::ClearPrivateData() { | 337 void InMemoryURLIndex::ClearPrivateData() { |
| 244 history_item_count_ = 0; | 338 private_data_->Clear(); |
| 245 word_list_.clear(); | |
| 246 word_map_.clear(); | |
| 247 char_word_map_.clear(); | |
| 248 word_id_history_map_.clear(); | |
| 249 history_info_map_.clear(); | |
| 250 search_term_cache_.clear(); | 339 search_term_cache_.clear(); |
| 251 } | 340 } |
| 252 | 341 |
| 253 bool InMemoryURLIndex::RestoreFromCacheFile() { | 342 bool InMemoryURLIndex::RestoreFromCacheFile() { |
| 254 // TODO(mrossetti): Figure out how to determine if the cache is up-to-date. | 343 // TODO(mrossetti): Figure out how to determine if the cache is up-to-date. |
| 255 // That is: ensure that the database has not been modified since the cache | 344 // That is: ensure that the database has not been modified since the cache |
| 256 // was last saved. DB file modification date is inadequate. There are no | 345 // was last saved. DB file modification date is inadequate. There are no |
| 257 // SQLite table checksums automatically stored. | 346 // SQLite table checksums automatically stored. |
| 258 // FIXME(mrossetti): Move File IO to another thread. | 347 // FIXME(mrossetti): Move File IO to another thread. |
| 259 base::ThreadRestrictions::ScopedAllowIO allow_io; | 348 base::ThreadRestrictions::ScopedAllowIO allow_io; |
| (...skipping 15 matching lines...) Expand all Loading... | |
| 275 return false; | 364 return false; |
| 276 } | 365 } |
| 277 | 366 |
| 278 if (!RestorePrivateData(index_cache)) { | 367 if (!RestorePrivateData(index_cache)) { |
| 279 ClearPrivateData(); // Back to square one -- must build from scratch. | 368 ClearPrivateData(); // Back to square one -- must build from scratch. |
| 280 return false; | 369 return false; |
| 281 } | 370 } |
| 282 | 371 |
| 283 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime", | 372 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime", |
| 284 base::TimeTicks::Now() - beginning_time); | 373 base::TimeTicks::Now() - beginning_time); |
| 285 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems", history_item_count_); | 374 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems", |
| 375 private_data_->history_item_count_); | |
| 286 UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size()); | 376 UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size()); |
| 287 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", word_map_.size()); | 377 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", |
| 288 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", char_word_map_.size()); | 378 private_data_->word_map_.size()); |
| 379 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", | |
| 380 private_data_->char_word_map_.size()); | |
| 289 return true; | 381 return true; |
| 290 } | 382 } |
| 291 | 383 |
| 292 bool InMemoryURLIndex::SaveToCacheFile() { | 384 bool InMemoryURLIndex::SaveToCacheFile() { |
| 293 // TODO(mrossetti): Move File IO to another thread. | 385 // TODO(mrossetti): Move File IO to another thread. |
| 294 base::ThreadRestrictions::ScopedAllowIO allow_io; | 386 base::ThreadRestrictions::ScopedAllowIO allow_io; |
| 295 base::TimeTicks beginning_time = base::TimeTicks::Now(); | 387 base::TimeTicks beginning_time = base::TimeTicks::Now(); |
| 296 InMemoryURLIndexCacheItem index_cache; | 388 InMemoryURLIndexCacheItem index_cache; |
| 297 SavePrivateData(&index_cache); | 389 SavePrivateData(&index_cache); |
| 298 std::string data; | 390 std::string data; |
| (...skipping 11 matching lines...) Expand all Loading... | |
| 310 int size = data.size(); | 402 int size = data.size(); |
| 311 if (file_util::WriteFile(file_path, data.c_str(), size) != size) { | 403 if (file_util::WriteFile(file_path, data.c_str(), size) != size) { |
| 312 LOG(WARNING) << "Failed to write " << file_path.value(); | 404 LOG(WARNING) << "Failed to write " << file_path.value(); |
| 313 return false; | 405 return false; |
| 314 } | 406 } |
| 315 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexSaveCacheTime", | 407 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexSaveCacheTime", |
| 316 base::TimeTicks::Now() - beginning_time); | 408 base::TimeTicks::Now() - beginning_time); |
| 317 return true; | 409 return true; |
| 318 } | 410 } |
| 319 | 411 |
| 320 void InMemoryURLIndex::UpdateURL(URLID row_id, const URLRow& row) { | 412 void InMemoryURLIndex::UpdateURL(const URLRow& row) { |
| 321 // The row may or may not already be in our index. If it is not already | 413 // The row may or may not already be in our index. If it is not already |
| 322 // indexed and it qualifies then it gets indexed. If it is already | 414 // indexed and it qualifies then it gets indexed. If it is already |
| 323 // indexed and still qualifies then it gets updated, otherwise it | 415 // indexed and still qualifies then it gets updated, otherwise it |
| 324 // is deleted from the index. | 416 // is deleted from the index. |
| 325 HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id); | 417 HistoryInfoMap::iterator row_pos = |
| 326 if (row_pos == history_info_map_.end()) { | 418 private_data_->history_info_map_.find(row.id()); |
| 419 if (row_pos == private_data_->history_info_map_.end()) { | |
| 327 // This new row should be indexed if it qualifies. | 420 // This new row should be indexed if it qualifies. |
| 328 if (RowQualifiesAsSignificant(row, base::Time())) | 421 if (RowQualifiesAsSignificant(row, base::Time())) |
| 329 IndexRow(row); | 422 IndexRow(row); |
| 330 } else if (RowQualifiesAsSignificant(row, base::Time())) { | 423 } else if (RowQualifiesAsSignificant(row, base::Time())) { |
| 331 // This indexed row still qualifies and will be re-indexed. | 424 // This indexed row still qualifies and will be re-indexed. |
| 332 // The url won't have changed but the title, visit count, etc. | 425 // The url won't have changed but the title, visit count, etc. |
| 333 // might have changed. | 426 // might have changed. |
| 334 URLRow& old_row = row_pos->second; | 427 URLRow& old_row = row_pos->second; |
| 335 old_row.set_visit_count(row.visit_count()); | 428 old_row.set_visit_count(row.visit_count()); |
| 336 old_row.set_typed_count(row.typed_count()); | 429 old_row.set_typed_count(row.typed_count()); |
| 337 old_row.set_last_visit(row.last_visit()); | 430 old_row.set_last_visit(row.last_visit()); |
| 338 // TODO(mrossetti): When we start indexing the title the next line | 431 // While the URL is guaranteed to remain stable, the title may have change. |
|
Peter Kasting
2011/10/07 17:50:37
Nit: change -> changed
mrossetti
2011/10/07 22:24:37
Done.
| |
| 339 // will need attention. | 432 // If so, then we need to update the index with the changed words. |
| 340 old_row.set_title(row.title()); | 433 if (old_row.title() != row.title()) { |
| 434 // Clear all words associated with this row and re-index both the | |
| 435 // URL and title. | |
| 436 RemoveRowWordsFromIndex(row); | |
| 437 old_row.set_title(row.title()); | |
| 438 AddRowWordsToIndex(old_row); | |
| 439 } | |
| 341 } else { | 440 } else { |
| 342 // This indexed row no longer qualifies and will be de-indexed. | 441 // This indexed row no longer qualifies and will be de-indexed by |
| 343 history_info_map_.erase(row_id); | 442 // clearing all words associated with this row. |
| 443 RemoveRowFromIndex(row); | |
| 344 } | 444 } |
| 345 // This invalidates the cache. | 445 // This invalidates the cache. |
| 346 search_term_cache_.clear(); | 446 search_term_cache_.clear(); |
| 347 // TODO(mrossetti): Record this transaction in the cache. | |
| 348 } | 447 } |
| 349 | 448 |
| 350 void InMemoryURLIndex::DeleteURL(URLID row_id) { | 449 void InMemoryURLIndex::DeleteURL(const URLRow& row) { |
| 351 // Note that this does not remove any reference to this row from the | 450 RemoveRowFromIndex(row); |
| 352 // word_id_history_map_. That map will continue to contain (and return) | |
| 353 // hits against this row until that map is rebuilt, but since the | |
| 354 // history_info_map_ no longer references the row no erroneous results | |
| 355 // will propagate to the user. | |
| 356 history_info_map_.erase(row_id); | |
| 357 // This invalidates the word cache. | 451 // This invalidates the word cache. |
| 358 search_term_cache_.clear(); | 452 search_term_cache_.clear(); |
| 359 // TODO(mrossetti): Record this transaction in the cache. | 453 // TODO(mrossetti): Record this transaction in the cache. |
| 360 } | 454 } |
| 361 | 455 |
| 362 // Searching | 456 // Searching |
| 363 | 457 |
| 364 ScoredHistoryMatches InMemoryURLIndex::HistoryItemsForTerms( | 458 ScoredHistoryMatches InMemoryURLIndex::HistoryItemsForTerms( |
| 365 const String16Vector& terms) { | 459 const String16Vector& terms) { |
| 366 ScoredHistoryMatches scored_items; | 460 ScoredHistoryMatches scored_items; |
| 461 | |
| 462 // Do nothing if we have indexed no words (probably because we've not been | |
| 463 // initialized yet). | |
| 464 if (private_data_->word_list_.empty()) | |
| 465 return scored_items; | |
| 466 | |
| 367 if (!terms.empty()) { | 467 if (!terms.empty()) { |
| 368 // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep | 468 // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep |
| 369 // approach. | 469 // approach. |
| 370 ResetSearchTermCache(); | 470 ResetSearchTermCache(); |
| 371 | 471 |
| 372 // Lowercase the terms. | 472 // Lowercase the terms. |
| 373 // TODO(mrossetti): Another opportunity for a transform algorithm. | 473 // TODO(mrossetti): Another opportunity for a transform algorithm. |
| 374 String16Vector lower_terms; | 474 String16Vector lower_terms; |
| 375 for (String16Vector::const_iterator term_iter = terms.begin(); | 475 for (String16Vector::const_iterator term_iter = terms.begin(); |
| 376 term_iter != terms.end(); ++term_iter) | 476 term_iter != terms.end(); ++term_iter) |
| (...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 414 | 514 |
| 415 return scored_items; | 515 return scored_items; |
| 416 } | 516 } |
| 417 | 517 |
| 418 void InMemoryURLIndex::ResetSearchTermCache() { | 518 void InMemoryURLIndex::ResetSearchTermCache() { |
| 419 for (SearchTermCacheMap::iterator iter = search_term_cache_.begin(); | 519 for (SearchTermCacheMap::iterator iter = search_term_cache_.begin(); |
| 420 iter != search_term_cache_.end(); ++iter) | 520 iter != search_term_cache_.end(); ++iter) |
| 421 iter->second.used_ = false; | 521 iter->second.used_ = false; |
| 422 } | 522 } |
| 423 | 523 |
| 424 InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDSetFromWords( | 524 HistoryIDSet InMemoryURLIndex::HistoryIDSetFromWords( |
| 425 const string16& uni_string) { | 525 const string16& uni_string) { |
| 426 // Break the terms down into individual terms (words), get the candidate | 526 // Break the terms down into individual terms (words), get the candidate |
| 427 // set for each term, and intersect each to get a final candidate list. | 527 // set for each term, and intersect each to get a final candidate list. |
| 428 // Note that a single 'term' from the user's perspective might be | 528 // Note that a single 'term' from the user's perspective might be |
| 429 // a string like "http://www.somewebsite.com" which, from our perspective, | 529 // a string like "http://www.somewebsite.com" which, from our perspective, |
| 430 // is four words: 'http', 'www', 'somewebsite', and 'com'. | 530 // is four words: 'http', 'www', 'somewebsite', and 'com'. |
| 431 HistoryIDSet history_id_set; | 531 HistoryIDSet history_id_set; |
| 432 String16Vector terms = WordVectorFromString16(uni_string, true); | 532 String16Vector terms = String16VectorFromString16(uni_string, true); |
| 433 // Sort the terms into the longest first as such are likely to narrow down | 533 // Sort the terms into the longest first as such are likely to narrow down |
| 434 // the results quicker. Also, single character terms are the most expensive | 534 // the results quicker. Also, single character terms are the most expensive |
| 435 // to process so save them for last. | 535 // to process so save them for last. |
| 436 std::sort(terms.begin(), terms.end(), LengthGreater); | 536 std::sort(terms.begin(), terms.end(), LengthGreater); |
| 437 for (String16Vector::iterator iter = terms.begin(); iter != terms.end(); | 537 for (String16Vector::iterator iter = terms.begin(); iter != terms.end(); |
| 438 ++iter) { | 538 ++iter) { |
| 439 string16 uni_word = *iter; | 539 string16 uni_word = *iter; |
| 440 HistoryIDSet term_history_set = HistoryIDsForTerm(uni_word); | 540 HistoryIDSet term_history_set = HistoryIDsForTerm(uni_word); |
| 441 if (term_history_set.empty()) { | 541 if (term_history_set.empty()) { |
| 442 history_id_set.clear(); | 542 history_id_set.clear(); |
| 443 break; | 543 break; |
| 444 } | 544 } |
| 445 if (iter == terms.begin()) { | 545 if (iter == terms.begin()) { |
| 446 history_id_set.swap(term_history_set); | 546 history_id_set.swap(term_history_set); |
| 447 } else { | 547 } else { |
| 448 HistoryIDSet new_history_id_set; | 548 HistoryIDSet new_history_id_set; |
| 449 std::set_intersection(history_id_set.begin(), history_id_set.end(), | 549 std::set_intersection(history_id_set.begin(), history_id_set.end(), |
| 450 term_history_set.begin(), term_history_set.end(), | 550 term_history_set.begin(), term_history_set.end(), |
| 451 std::inserter(new_history_id_set, | 551 std::inserter(new_history_id_set, |
| 452 new_history_id_set.begin())); | 552 new_history_id_set.begin())); |
| 453 history_id_set.swap(new_history_id_set); | 553 history_id_set.swap(new_history_id_set); |
| 454 } | 554 } |
| 455 } | 555 } |
| 456 return history_id_set; | 556 return history_id_set; |
| 457 } | 557 } |
| 458 | 558 |
| 459 InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm( | 559 HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm( |
| 460 const string16& term) { | 560 const string16& term) { |
| 461 if (term.empty()) | 561 if (term.empty()) |
| 462 return HistoryIDSet(); | 562 return HistoryIDSet(); |
| 463 | 563 |
| 464 // TODO(mrossetti): Consider optimizing for very common terms such as | 564 // TODO(mrossetti): Consider optimizing for very common terms such as |
| 465 // 'http[s]', 'www', 'com', etc. Or collect the top 100 more frequently | 565 // 'http[s]', 'www', 'com', etc. Or collect the top 100 more frequently |
| 466 // occuring words in the user's searches. | 566 // occuring words in the user's searches. |
| 467 | 567 |
| 468 size_t term_length = term.length(); | 568 size_t term_length = term.length(); |
| 469 InMemoryURLIndex::WordIDSet word_id_set; | 569 WordIDSet word_id_set; |
| 470 if (term_length > 1) { | 570 if (term_length > 1) { |
| 471 // See if this term or a prefix thereof is present in the cache. | 571 // See if this term or a prefix thereof is present in the cache. |
| 472 SearchTermCacheMap::iterator best_prefix(search_term_cache_.end()); | 572 SearchTermCacheMap::iterator best_prefix(search_term_cache_.end()); |
| 473 for (SearchTermCacheMap::iterator cache_iter = search_term_cache_.begin(); | 573 for (SearchTermCacheMap::iterator cache_iter = search_term_cache_.begin(); |
| 474 cache_iter != search_term_cache_.end(); ++cache_iter) { | 574 cache_iter != search_term_cache_.end(); ++cache_iter) { |
| 475 if (StartsWith(term, cache_iter->first, false) && | 575 if (StartsWith(term, cache_iter->first, false) && |
| 476 (best_prefix == search_term_cache_.end() || | 576 (best_prefix == search_term_cache_.end() || |
| 477 cache_iter->first.length() > best_prefix->first.length())) | 577 cache_iter->first.length() > best_prefix->first.length())) |
| 478 best_prefix = cache_iter; | 578 best_prefix = cache_iter; |
| 479 } | 579 } |
| (...skipping 25 matching lines...) Expand all Loading... | |
| 505 | 605 |
| 506 // Filter for each remaining, unique character in the term. | 606 // Filter for each remaining, unique character in the term. |
| 507 Char16Set leftover_chars = Char16SetFromString16(leftovers); | 607 Char16Set leftover_chars = Char16SetFromString16(leftovers); |
| 508 Char16Set unique_chars; | 608 Char16Set unique_chars; |
| 509 std::set_difference(leftover_chars.begin(), leftover_chars.end(), | 609 std::set_difference(leftover_chars.begin(), leftover_chars.end(), |
| 510 prefix_chars.begin(), prefix_chars.end(), | 610 prefix_chars.begin(), prefix_chars.end(), |
| 511 std::inserter(unique_chars, unique_chars.begin())); | 611 std::inserter(unique_chars, unique_chars.begin())); |
| 512 | 612 |
| 513 // Reduce the word set with any leftover, unprocessed characters. | 613 // Reduce the word set with any leftover, unprocessed characters. |
| 514 if (!unique_chars.empty()) { | 614 if (!unique_chars.empty()) { |
| 515 WordIDSet leftover_set(WordIDSetForTermChars(unique_chars)); | 615 WordIDSet leftover_set( |
| 616 private_data_->WordIDSetForTermChars(unique_chars)); | |
| 516 // We might come up empty on the leftovers. | 617 // We might come up empty on the leftovers. |
| 517 if (leftover_set.empty()) { | 618 if (leftover_set.empty()) { |
| 518 search_term_cache_[term] = SearchTermCacheItem(); | 619 search_term_cache_[term] = SearchTermCacheItem(); |
| 519 return HistoryIDSet(); | 620 return HistoryIDSet(); |
| 520 } | 621 } |
| 521 // Or there may not have been a prefix from which to start. | 622 // Or there may not have been a prefix from which to start. |
| 522 if (prefix_chars.empty()) { | 623 if (prefix_chars.empty()) { |
| 523 word_id_set.swap(leftover_set); | 624 word_id_set.swap(leftover_set); |
| 524 } else { | 625 } else { |
| 525 WordIDSet new_word_id_set; | 626 WordIDSet new_word_id_set; |
| 526 std::set_intersection(word_id_set.begin(), word_id_set.end(), | 627 std::set_intersection(word_id_set.begin(), word_id_set.end(), |
| 527 leftover_set.begin(), leftover_set.end(), | 628 leftover_set.begin(), leftover_set.end(), |
| 528 std::inserter(new_word_id_set, | 629 std::inserter(new_word_id_set, |
| 529 new_word_id_set.begin())); | 630 new_word_id_set.begin())); |
| 530 word_id_set.swap(new_word_id_set); | 631 word_id_set.swap(new_word_id_set); |
| 531 } | 632 } |
| 532 } | 633 } |
| 533 | 634 |
| 534 // We must filter the word list because the resulting word set surely | 635 // We must filter the word list because the resulting word set surely |
| 535 // contains words which do not have the search term as a proper subset. | 636 // contains words which do not have the search term as a proper subset. |
| 536 for (WordIDSet::iterator word_set_iter = word_id_set.begin(); | 637 for (WordIDSet::iterator word_set_iter = word_id_set.begin(); |
| 537 word_set_iter != word_id_set.end(); ) { | 638 word_set_iter != word_id_set.end(); ) { |
| 538 if (word_list_[*word_set_iter].find(term) == string16::npos) | 639 if (private_data_->word_list_[*word_set_iter].find(term) == |
| 640 string16::npos) | |
| 539 word_id_set.erase(word_set_iter++); | 641 word_id_set.erase(word_set_iter++); |
| 540 else | 642 else |
| 541 ++word_set_iter; | 643 ++word_set_iter; |
| 542 } | 644 } |
| 543 } else { | 645 } else { |
| 544 word_id_set = WordIDSetForTermChars(Char16SetFromString16(term)); | 646 word_id_set = |
| 647 private_data_->WordIDSetForTermChars(Char16SetFromString16(term)); | |
| 545 } | 648 } |
| 546 | 649 |
| 547 // If any words resulted then we can compose a set of history IDs by unioning | 650 // If any words resulted then we can compose a set of history IDs by unioning |
| 548 // the sets from each word. | 651 // the sets from each word. |
| 549 HistoryIDSet history_id_set; | 652 HistoryIDSet history_id_set; |
| 550 if (!word_id_set.empty()) { | 653 if (!word_id_set.empty()) { |
| 551 for (WordIDSet::iterator word_id_iter = word_id_set.begin(); | 654 for (WordIDSet::iterator word_id_iter = word_id_set.begin(); |
| 552 word_id_iter != word_id_set.end(); ++word_id_iter) { | 655 word_id_iter != word_id_set.end(); ++word_id_iter) { |
| 553 WordID word_id = *word_id_iter; | 656 WordID word_id = *word_id_iter; |
| 554 WordIDHistoryMap::iterator word_iter = word_id_history_map_.find(word_id); | 657 WordIDHistoryMap::iterator word_iter = |
| 555 if (word_iter != word_id_history_map_.end()) { | 658 private_data_->word_id_history_map_.find(word_id); |
| 659 if (word_iter != private_data_->word_id_history_map_.end()) { | |
| 556 HistoryIDSet& word_history_id_set(word_iter->second); | 660 HistoryIDSet& word_history_id_set(word_iter->second); |
| 557 history_id_set.insert(word_history_id_set.begin(), | 661 history_id_set.insert(word_history_id_set.begin(), |
| 558 word_history_id_set.end()); | 662 word_history_id_set.end()); |
| 559 } | 663 } |
| 560 } | 664 } |
| 561 } | 665 } |
| 562 | 666 |
| 563 // Record a new cache entry for this word if the term is longer than | 667 // Record a new cache entry for this word if the term is longer than |
| 564 // a single character. | 668 // a single character. |
| 565 if (term_length > 1) | 669 if (term_length > 1) |
| 566 search_term_cache_[term] = SearchTermCacheItem(word_id_set, history_id_set); | 670 search_term_cache_[term] = SearchTermCacheItem(word_id_set, history_id_set); |
| 567 | 671 |
| 568 return history_id_set; | 672 return history_id_set; |
| 569 } | 673 } |
| 570 | 674 |
| 571 // Utility Functions | 675 // Utility Functions |
| 572 | 676 |
| 573 // static | |
| 574 InMemoryURLIndex::String16Set InMemoryURLIndex::WordSetFromString16( | |
| 575 const string16& uni_string) { | |
| 576 const size_t kMaxWordLength = 64; | |
| 577 String16Vector words = WordVectorFromString16(uni_string, false); | |
| 578 String16Set word_set; | |
| 579 for (String16Vector::const_iterator iter = words.begin(); iter != words.end(); | |
| 580 ++iter) | |
| 581 word_set.insert(base::i18n::ToLower(*iter).substr(0, kMaxWordLength)); | |
| 582 return word_set; | |
| 583 } | |
| 584 | |
| 585 // static | |
| 586 InMemoryURLIndex::String16Vector InMemoryURLIndex::WordVectorFromString16( | |
| 587 const string16& uni_string, | |
| 588 bool break_on_space) { | |
| 589 base::i18n::BreakIterator iter( | |
| 590 uni_string, | |
| 591 break_on_space ? base::i18n::BreakIterator::BREAK_SPACE | |
| 592 : base::i18n::BreakIterator::BREAK_WORD); | |
| 593 String16Vector words; | |
| 594 if (!iter.Init()) | |
| 595 return words; | |
| 596 while (iter.Advance()) { | |
| 597 if (break_on_space || iter.IsWord()) { | |
| 598 string16 word = iter.GetString(); | |
| 599 if (break_on_space) | |
| 600 TrimWhitespace(word, TRIM_ALL, &word); | |
| 601 if (!word.empty()) | |
| 602 words.push_back(word); | |
| 603 } | |
| 604 } | |
| 605 return words; | |
| 606 } | |
| 607 | |
| 608 // static | |
| 609 InMemoryURLIndex::Char16Set InMemoryURLIndex::Char16SetFromString16( | |
| 610 const string16& term) { | |
| 611 Char16Set characters; | |
| 612 for (string16::const_iterator iter = term.begin(); iter != term.end(); | |
| 613 ++iter) | |
| 614 characters.insert(*iter); | |
| 615 return characters; | |
| 616 } | |
| 617 | |
| 618 void InMemoryURLIndex::AddWordToIndex(const string16& term, | 677 void InMemoryURLIndex::AddWordToIndex(const string16& term, |
| 619 HistoryID history_id) { | 678 HistoryID history_id) { |
| 620 WordMap::iterator word_pos = word_map_.find(term); | 679 WordMap::iterator word_pos = private_data_->word_map_.find(term); |
| 621 if (word_pos != word_map_.end()) | 680 if (word_pos != private_data_->word_map_.end()) |
| 622 UpdateWordHistory(word_pos->second, history_id); | 681 UpdateWordHistory(word_pos->second, history_id); |
| 623 else | 682 else |
| 624 AddWordHistory(term, history_id); | 683 AddWordHistory(term, history_id); |
| 625 } | 684 } |
| 626 | 685 |
| 627 void InMemoryURLIndex::UpdateWordHistory(WordID word_id, HistoryID history_id) { | 686 void InMemoryURLIndex::UpdateWordHistory(WordID word_id, HistoryID history_id) { |
| 628 WordIDHistoryMap::iterator history_pos = word_id_history_map_.find(word_id); | 687 WordIDHistoryMap::iterator history_pos = |
| 629 DCHECK(history_pos != word_id_history_map_.end()); | 688 private_data_->word_id_history_map_.find(word_id); |
| 630 HistoryIDSet& history_id_set(history_pos->second); | 689 DCHECK(history_pos != private_data_->word_id_history_map_.end()); |
| 631 history_id_set.insert(history_id); | 690 HistoryIDSet& history_id_set(history_pos->second); |
| 691 history_id_set.insert(history_id); | |
| 692 private_data_->AddToHistoryIDWordMap(history_id, word_id); | |
| 632 } | 693 } |
| 633 | 694 |
| 634 // Add a new word to the word list and the word map, and then create a | 695 // Add a new word to the word list and the word map, and then create a |
| 635 // new entry in the word/history map. | 696 // new entry in the word/history map. |
| 636 void InMemoryURLIndex::AddWordHistory(const string16& term, | 697 void InMemoryURLIndex::AddWordHistory(const string16& term, |
| 637 HistoryID history_id) { | 698 HistoryID history_id) { |
| 638 word_list_.push_back(term); | 699 URLIndexPrivateData& private_data(*(private_data_.get())); |
| 639 WordID word_id = word_list_.size() - 1; | 700 WordID word_id = private_data.word_list_.size(); |
| 640 word_map_[term] = word_id; | 701 if (private_data.available_words_.empty()) { |
| 702 private_data.word_list_.push_back(term); | |
| 703 } else { | |
| 704 word_id = *(private_data.available_words_.begin()); | |
| 705 private_data.word_list_[word_id] = term; | |
| 706 private_data.available_words_.erase(word_id); | |
| 707 } | |
| 708 private_data.word_map_[term] = word_id; | |
| 709 | |
| 641 HistoryIDSet history_id_set; | 710 HistoryIDSet history_id_set; |
| 642 history_id_set.insert(history_id); | 711 history_id_set.insert(history_id); |
| 643 word_id_history_map_[word_id] = history_id_set; | 712 private_data.word_id_history_map_[word_id] = history_id_set; |
| 713 private_data_->AddToHistoryIDWordMap(history_id, word_id); | |
| 714 | |
| 644 // For each character in the newly added word (i.e. a word that is not | 715 // For each character in the newly added word (i.e. a word that is not |
| 645 // already in the word index), add the word to the character index. | 716 // already in the word index), add the word to the character index. |
| 646 Char16Set characters = Char16SetFromString16(term); | 717 Char16Set characters = Char16SetFromString16(term); |
| 647 for (Char16Set::iterator uni_char_iter = characters.begin(); | 718 for (Char16Set::iterator uni_char_iter = characters.begin(); |
| 648 uni_char_iter != characters.end(); ++uni_char_iter) { | 719 uni_char_iter != characters.end(); ++uni_char_iter) { |
| 649 char16 uni_char = *uni_char_iter; | 720 char16 uni_char = *uni_char_iter; |
| 650 CharWordIDMap::iterator char_iter = char_word_map_.find(uni_char); | 721 CharWordIDMap::iterator char_iter = |
| 651 if (char_iter != char_word_map_.end()) { | 722 private_data.char_word_map_.find(uni_char); |
| 723 if (char_iter != private_data.char_word_map_.end()) { | |
| 652 // Update existing entry in the char/word index. | 724 // Update existing entry in the char/word index. |
| 653 WordIDSet& word_id_set(char_iter->second); | 725 WordIDSet& word_id_set(char_iter->second); |
| 654 word_id_set.insert(word_id); | 726 word_id_set.insert(word_id); |
| 655 } else { | 727 } else { |
| 656 // Create a new entry in the char/word index. | 728 // Create a new entry in the char/word index. |
| 657 WordIDSet word_id_set; | 729 WordIDSet word_id_set; |
| 658 word_id_set.insert(word_id); | 730 word_id_set.insert(word_id); |
| 659 char_word_map_[uni_char] = word_id_set; | 731 private_data.char_word_map_[uni_char] = word_id_set; |
| 660 } | 732 } |
| 661 } | 733 } |
| 662 } | 734 } |
| 663 | 735 |
| 664 InMemoryURLIndex::WordIDSet InMemoryURLIndex::WordIDSetForTermChars( | |
| 665 const Char16Set& term_chars) { | |
| 666 WordIDSet word_id_set; | |
| 667 for (Char16Set::const_iterator c_iter = term_chars.begin(); | |
| 668 c_iter != term_chars.end(); ++c_iter) { | |
| 669 CharWordIDMap::iterator char_iter = char_word_map_.find(*c_iter); | |
| 670 if (char_iter == char_word_map_.end()) { | |
| 671 // A character was not found so there are no matching results: bail. | |
| 672 word_id_set.clear(); | |
| 673 break; | |
| 674 } | |
| 675 WordIDSet& char_word_id_set(char_iter->second); | |
| 676 // It is possible for there to no longer be any words associated with | |
| 677 // a particular character. Give up in that case. | |
| 678 if (char_word_id_set.empty()) { | |
| 679 word_id_set.clear(); | |
| 680 break; | |
| 681 } | |
| 682 | |
| 683 if (c_iter == term_chars.begin()) { | |
| 684 // First character results becomes base set of results. | |
| 685 word_id_set = char_word_id_set; | |
| 686 } else { | |
| 687 // Subsequent character results get intersected in. | |
| 688 WordIDSet new_word_id_set; | |
| 689 std::set_intersection(word_id_set.begin(), word_id_set.end(), | |
| 690 char_word_id_set.begin(), char_word_id_set.end(), | |
| 691 std::inserter(new_word_id_set, | |
| 692 new_word_id_set.begin())); | |
| 693 word_id_set.swap(new_word_id_set); | |
| 694 } | |
| 695 } | |
| 696 return word_id_set; | |
| 697 } | |
| 698 | |
| 699 // static | 736 // static |
| 700 TermMatches InMemoryURLIndex::MatchTermInString(const string16& term, | 737 // TODO(mrossetti): This can be made a ctor for ScoredHistoryMatch. |
| 701 const string16& string, | |
| 702 int term_num) { | |
| 703 const size_t kMaxCompareLength = 2048; | |
| 704 const string16& short_string = (string.length() > kMaxCompareLength) ? | |
| 705 string.substr(0, kMaxCompareLength) : string; | |
| 706 TermMatches matches; | |
| 707 for (size_t location = short_string.find(term); location != string16::npos; | |
| 708 location = short_string.find(term, location + 1)) { | |
| 709 matches.push_back(TermMatch(term_num, location, term.length())); | |
| 710 } | |
| 711 return matches; | |
| 712 } | |
| 713 | |
| 714 // static | |
| 715 TermMatches InMemoryURLIndex::SortAndDeoverlap(const TermMatches& matches) { | |
| 716 if (matches.empty()) | |
| 717 return matches; | |
| 718 TermMatches sorted_matches = matches; | |
| 719 std::sort(sorted_matches.begin(), sorted_matches.end(), MatchOffsetLess); | |
| 720 TermMatches clean_matches; | |
| 721 TermMatch last_match = sorted_matches[0]; | |
| 722 clean_matches.push_back(last_match); | |
| 723 for (TermMatches::const_iterator iter = sorted_matches.begin() + 1; | |
| 724 iter != sorted_matches.end(); ++iter) { | |
| 725 if (iter->offset >= last_match.offset + last_match.length) { | |
| 726 last_match = *iter; | |
| 727 clean_matches.push_back(last_match); | |
| 728 } | |
| 729 } | |
| 730 return clean_matches; | |
| 731 } | |
| 732 | |
| 733 // static | |
| 734 std::vector<size_t> InMemoryURLIndex::OffsetsFromTermMatches( | |
| 735 const TermMatches& matches) { | |
| 736 std::vector<size_t> offsets; | |
| 737 for (TermMatches::const_iterator i = matches.begin(); i != matches.end(); ++i) | |
| 738 offsets.push_back(i->offset); | |
| 739 return offsets; | |
| 740 } | |
| 741 | |
| 742 // static | |
| 743 TermMatches InMemoryURLIndex::ReplaceOffsetsInTermMatches( | |
| 744 const TermMatches& matches, | |
| 745 const std::vector<size_t>& offsets) { | |
| 746 DCHECK_EQ(matches.size(), offsets.size()); | |
| 747 TermMatches new_matches; | |
| 748 std::vector<size_t>::const_iterator offset_iter = offsets.begin(); | |
| 749 for (TermMatches::const_iterator term_iter = matches.begin(); | |
| 750 term_iter != matches.end(); ++term_iter, ++offset_iter) { | |
| 751 if (*offset_iter != string16::npos) { | |
| 752 TermMatch new_match(*term_iter); | |
| 753 new_match.offset = *offset_iter; | |
| 754 new_matches.push_back(new_match); | |
| 755 } | |
| 756 } | |
| 757 return new_matches; | |
| 758 } | |
| 759 | |
| 760 // static | |
| 761 ScoredHistoryMatch InMemoryURLIndex::ScoredMatchForURL( | 738 ScoredHistoryMatch InMemoryURLIndex::ScoredMatchForURL( |
| 762 const URLRow& row, | 739 const URLRow& row, |
| 763 const String16Vector& terms) { | 740 const String16Vector& terms) { |
| 764 ScoredHistoryMatch match(row); | 741 ScoredHistoryMatch match(row); |
| 765 GURL gurl = row.url(); | 742 GURL gurl = row.url(); |
| 766 if (!gurl.is_valid()) | 743 if (!gurl.is_valid()) |
| 767 return match; | 744 return match; |
| 768 | 745 |
| 769 // Figure out where each search term appears in the URL and/or page title | 746 // Figure out where each search term appears in the URL and/or page title |
| 770 // so that we can score as well as provide autocomplete highlighting. | 747 // so that we can score as well as provide autocomplete highlighting. |
| 771 string16 url = base::i18n::ToLower(UTF8ToUTF16(gurl.spec())); | 748 string16 url = base::i18n::ToLower(UTF8ToUTF16(gurl.spec())); |
| 772 string16 title = base::i18n::ToLower(row.title()); | 749 string16 title = base::i18n::ToLower(row.title()); |
| 773 int term_num = 0; | 750 int term_num = 0; |
| 774 for (String16Vector::const_iterator iter = terms.begin(); iter != terms.end(); | 751 for (String16Vector::const_iterator iter = terms.begin(); iter != terms.end(); |
| 775 ++iter, ++term_num) { | 752 ++iter, ++term_num) { |
| 776 string16 term = *iter; | 753 string16 term = *iter; |
| 777 TermMatches url_term_matches = MatchTermInString(term, url, term_num); | 754 TermMatches url_term_matches = MatchTermInString(term, url, term_num); |
| 778 TermMatches title_term_matches = MatchTermInString(term, title, term_num); | 755 TermMatches title_term_matches = MatchTermInString(term, title, term_num); |
| 779 if (url_term_matches.empty() && title_term_matches.empty()) | 756 if (url_term_matches.empty() && title_term_matches.empty()) |
| 780 return match; // A term was not found in either URL or title - reject. | 757 return match; // A term was not found in either URL or title - reject. |
| 781 match.url_matches.insert(match.url_matches.end(), url_term_matches.begin(), | 758 match.url_matches.insert(match.url_matches.end(), url_term_matches.begin(), |
| 782 url_term_matches.end()); | 759 url_term_matches.end()); |
| 783 match.title_matches.insert(match.title_matches.end(), | 760 match.title_matches.insert(match.title_matches.end(), |
| 784 title_term_matches.begin(), | 761 title_term_matches.begin(), |
| 785 title_term_matches.end()); | 762 title_term_matches.end()); |
| 786 } | 763 } |
| 787 | 764 |
| 788 // Sort matches by offset and eliminate any which overlap. | 765 // Sort matches by offset and eliminate any which overlap. |
| 789 match.url_matches = SortAndDeoverlap(match.url_matches); | 766 match.url_matches = SortAndDeoverlapMatches(match.url_matches); |
| 790 match.title_matches = SortAndDeoverlap(match.title_matches); | 767 match.title_matches = SortAndDeoverlapMatches(match.title_matches); |
| 791 | 768 |
| 792 // We should not (currently) inline autocomplete a result unless both of the | 769 // We should not (currently) inline autocomplete a result unless both of the |
| 793 // following are true: | 770 // following are true: |
| 794 // * There is exactly one substring matches in the URL, and | 771 // * There is exactly one substring matches in the URL, and |
| 795 // * The one URL match starts at the beginning of the URL. | 772 // * The one URL match starts at the beginning of the URL. |
| 796 match.can_inline = | 773 match.can_inline = |
| 797 match.url_matches.size() == 1 && match.url_matches[0].offset == 0; | 774 match.url_matches.size() == 1 && match.url_matches[0].offset == 0; |
| 798 | 775 |
| 799 // Get partial scores based on term matching. Note that the score for | 776 // Get partial scores based on term matching. Note that the score for |
| 800 // each of the URL and title are adjusted by the fraction of the | 777 // each of the URL and title are adjusted by the fraction of the |
| (...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 832 int factor[] = {term_score, days_ago_value, visit_count_value, | 809 int factor[] = {term_score, days_ago_value, visit_count_value, |
| 833 visit_count_value, typed_count_value, typed_count_value, | 810 visit_count_value, typed_count_value, typed_count_value, |
| 834 typed_count_value}; | 811 typed_count_value}; |
| 835 const int kInsignificantFactors = 2; | 812 const int kInsignificantFactors = 2; |
| 836 const int kSignificantFactors = arraysize(factor) - kInsignificantFactors; | 813 const int kSignificantFactors = arraysize(factor) - kInsignificantFactors; |
| 837 std::partial_sort(factor, factor + kSignificantFactors, | 814 std::partial_sort(factor, factor + kSignificantFactors, |
| 838 factor + arraysize(factor), std::greater<int>()); | 815 factor + arraysize(factor), std::greater<int>()); |
| 839 for (int i = 0; i < kSignificantFactors; ++i) | 816 for (int i = 0; i < kSignificantFactors; ++i) |
| 840 match.raw_score += factor[i]; | 817 match.raw_score += factor[i]; |
| 841 match.raw_score /= kSignificantFactors; | 818 match.raw_score /= kSignificantFactors; |
| 842 | |
| 843 return match; | 819 return match; |
| 844 } | 820 } |
| 845 | 821 |
| 846 int InMemoryURLIndex::ScoreComponentForMatches(const TermMatches& matches, | 822 int InMemoryURLIndex::ScoreComponentForMatches(const TermMatches& matches, |
| 847 size_t max_length) { | 823 size_t max_length) { |
| 848 // TODO(mrossetti): This is good enough for now but must be fine-tuned. | 824 // TODO(mrossetti): This is good enough for now but must be fine-tuned. |
| 849 if (matches.empty()) | 825 if (matches.empty()) |
| 850 return 0; | 826 return 0; |
| 851 | 827 |
| 852 // Score component for whether the input terms (if more than one) were found | 828 // Score component for whether the input terms (if more than one) were found |
| (...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 894 | 870 |
| 895 // Scale the sum of the three components above into a single score component | 871 // Scale the sum of the three components above into a single score component |
| 896 // on the same scale as that used in ScoredMatchForURL(). | 872 // on the same scale as that used in ScoredMatchForURL(). |
| 897 return ScoreForValue(raw_score, kTermScoreLevel); | 873 return ScoreForValue(raw_score, kTermScoreLevel); |
| 898 } | 874 } |
| 899 | 875 |
| 900 InMemoryURLIndex::AddHistoryMatch::AddHistoryMatch( | 876 InMemoryURLIndex::AddHistoryMatch::AddHistoryMatch( |
| 901 const InMemoryURLIndex& index, | 877 const InMemoryURLIndex& index, |
| 902 const String16Vector& lower_terms) | 878 const String16Vector& lower_terms) |
| 903 : index_(index), | 879 : index_(index), |
| 904 lower_terms_(lower_terms) { | 880 lower_terms_(lower_terms) {} |
| 905 } | |
| 906 | 881 |
| 907 InMemoryURLIndex::AddHistoryMatch::~AddHistoryMatch() {} | 882 InMemoryURLIndex::AddHistoryMatch::~AddHistoryMatch() {} |
| 908 | 883 |
| 909 void InMemoryURLIndex::AddHistoryMatch::operator()( | 884 void InMemoryURLIndex::AddHistoryMatch::operator()(const HistoryID history_id) { |
| 910 const InMemoryURLIndex::HistoryID history_id) { | |
| 911 HistoryInfoMap::const_iterator hist_pos = | 885 HistoryInfoMap::const_iterator hist_pos = |
| 912 index_.history_info_map_.find(history_id); | 886 index_.private_data_->history_info_map_.find(history_id); |
| 913 // Note that a history_id may be present in the word_id_history_map_ yet not | 887 // Note that a history_id may be present in the word_id_history_map_ yet not |
| 914 // be found in the history_info_map_. This occurs when an item has been | 888 // be found in the history_info_map_. This occurs when an item has been |
| 915 // deleted by the user or the item no longer qualifies as a quick result. | 889 // deleted by the user or the item no longer qualifies as a quick result. |
| 916 if (hist_pos != index_.history_info_map_.end()) { | 890 if (hist_pos != index_.private_data_->history_info_map_.end()) { |
| 917 const URLRow& hist_item = hist_pos->second; | 891 const URLRow& hist_item = hist_pos->second; |
| 918 ScoredHistoryMatch match(ScoredMatchForURL(hist_item, lower_terms_)); | 892 ScoredHistoryMatch match(ScoredMatchForURL(hist_item, lower_terms_)); |
| 919 if (match.raw_score > 0) | 893 if (match.raw_score > 0) |
| 920 scored_matches_.push_back(match); | 894 scored_matches_.push_back(match); |
| 921 } | 895 } |
| 922 } | 896 } |
| 923 | 897 |
| 924 bool InMemoryURLIndex::GetCacheFilePath(FilePath* file_path) { | 898 bool InMemoryURLIndex::GetCacheFilePath(FilePath* file_path) { |
| 925 if (history_dir_.empty()) | 899 if (history_dir_.empty()) |
| 926 return false; | 900 return false; |
| 927 *file_path = history_dir_.Append(FILE_PATH_LITERAL("History Provider Cache")); | 901 *file_path = history_dir_.Append(FILE_PATH_LITERAL("History Provider Cache")); |
| 928 return true; | 902 return true; |
| 929 } | 903 } |
| 930 | 904 |
| 931 bool InMemoryURLIndex::URLSchemeIsWhitelisted(const GURL& gurl) const { | 905 bool InMemoryURLIndex::URLSchemeIsWhitelisted(const GURL& gurl) const { |
| 932 return scheme_whitelist_.find(gurl.scheme()) != scheme_whitelist_.end(); | 906 return scheme_whitelist_.find(gurl.scheme()) != scheme_whitelist_.end(); |
| 933 } | 907 } |
| 934 | 908 |
| 935 void InMemoryURLIndex::SavePrivateData(InMemoryURLIndexCacheItem* cache) const { | 909 void InMemoryURLIndex::SavePrivateData(InMemoryURLIndexCacheItem* cache) const { |
| 936 DCHECK(cache); | 910 DCHECK(cache); |
| 937 cache->set_timestamp(base::Time::Now().ToInternalValue()); | 911 cache->set_timestamp(base::Time::Now().ToInternalValue()); |
| 938 cache->set_history_item_count(history_item_count_); | 912 cache->set_history_item_count(private_data_->history_item_count_); |
| 939 SaveWordList(cache); | 913 SaveWordList(cache); |
| 940 SaveWordMap(cache); | 914 SaveWordMap(cache); |
| 941 SaveCharWordMap(cache); | 915 SaveCharWordMap(cache); |
| 942 SaveWordIDHistoryMap(cache); | 916 SaveWordIDHistoryMap(cache); |
| 943 SaveHistoryInfoMap(cache); | 917 SaveHistoryInfoMap(cache); |
| 944 } | 918 } |
| 945 | 919 |
| 946 bool InMemoryURLIndex::RestorePrivateData( | 920 bool InMemoryURLIndex::RestorePrivateData( |
| 947 const InMemoryURLIndexCacheItem& cache) { | 921 const InMemoryURLIndexCacheItem& cache) { |
| 948 last_saved_ = base::Time::FromInternalValue(cache.timestamp()); | 922 last_saved_ = base::Time::FromInternalValue(cache.timestamp()); |
| 949 history_item_count_ = cache.history_item_count(); | 923 private_data_->history_item_count_ = cache.history_item_count(); |
| 950 return (history_item_count_ == 0) || (RestoreWordList(cache) && | 924 return (private_data_->history_item_count_ == 0) || (RestoreWordList(cache) && |
| 951 RestoreWordMap(cache) && RestoreCharWordMap(cache) && | 925 RestoreWordMap(cache) && RestoreCharWordMap(cache) && |
| 952 RestoreWordIDHistoryMap(cache) && RestoreHistoryInfoMap(cache)); | 926 RestoreWordIDHistoryMap(cache) && RestoreHistoryInfoMap(cache)); |
| 953 } | 927 } |
| 954 | 928 |
| 955 | |
| 956 void InMemoryURLIndex::SaveWordList(InMemoryURLIndexCacheItem* cache) const { | 929 void InMemoryURLIndex::SaveWordList(InMemoryURLIndexCacheItem* cache) const { |
| 957 if (word_list_.empty()) | 930 if (private_data_->word_list_.empty()) |
| 958 return; | 931 return; |
| 959 WordListItem* list_item = cache->mutable_word_list(); | 932 WordListItem* list_item = cache->mutable_word_list(); |
| 960 list_item->set_word_count(word_list_.size()); | 933 list_item->set_word_count(private_data_->word_list_.size()); |
| 961 for (String16Vector::const_iterator iter = word_list_.begin(); | 934 for (String16Vector::const_iterator iter = private_data_->word_list_.begin(); |
| 962 iter != word_list_.end(); ++iter) | 935 iter != private_data_->word_list_.end(); ++iter) |
| 963 list_item->add_word(UTF16ToUTF8(*iter)); | 936 list_item->add_word(UTF16ToUTF8(*iter)); |
| 964 } | 937 } |
| 965 | 938 |
| 966 bool InMemoryURLIndex::RestoreWordList(const InMemoryURLIndexCacheItem& cache) { | 939 bool InMemoryURLIndex::RestoreWordList(const InMemoryURLIndexCacheItem& cache) { |
| 967 if (!cache.has_word_list()) | 940 if (!cache.has_word_list()) |
| 968 return false; | 941 return false; |
| 969 const WordListItem& list_item(cache.word_list()); | 942 const WordListItem& list_item(cache.word_list()); |
| 970 uint32 expected_item_count = list_item.word_count(); | 943 uint32 expected_item_count = list_item.word_count(); |
| 971 uint32 actual_item_count = list_item.word_size(); | 944 uint32 actual_item_count = list_item.word_size(); |
| 972 if (actual_item_count == 0 || actual_item_count != expected_item_count) | 945 if (actual_item_count == 0 || actual_item_count != expected_item_count) |
| 973 return false; | 946 return false; |
| 974 const RepeatedPtrField<std::string>& words(list_item.word()); | 947 const RepeatedPtrField<std::string>& words(list_item.word()); |
| 975 for (RepeatedPtrField<std::string>::const_iterator iter = words.begin(); | 948 for (RepeatedPtrField<std::string>::const_iterator iter = words.begin(); |
| 976 iter != words.end(); ++iter) | 949 iter != words.end(); ++iter) |
| 977 word_list_.push_back(UTF8ToUTF16(*iter)); | 950 private_data_->word_list_.push_back(UTF8ToUTF16(*iter)); |
| 978 return true; | 951 return true; |
| 979 } | 952 } |
| 980 | 953 |
| 981 void InMemoryURLIndex::SaveWordMap(InMemoryURLIndexCacheItem* cache) const { | 954 void InMemoryURLIndex::SaveWordMap(InMemoryURLIndexCacheItem* cache) const { |
| 982 if (word_map_.empty()) | 955 if (private_data_->word_map_.empty()) |
| 983 return; | 956 return; |
| 984 WordMapItem* map_item = cache->mutable_word_map(); | 957 WordMapItem* map_item = cache->mutable_word_map(); |
| 985 map_item->set_item_count(word_map_.size()); | 958 map_item->set_item_count(private_data_->word_map_.size()); |
| 986 for (WordMap::const_iterator iter = word_map_.begin(); | 959 for (WordMap::const_iterator iter = private_data_->word_map_.begin(); |
| 987 iter != word_map_.end(); ++iter) { | 960 iter != private_data_->word_map_.end(); ++iter) { |
| 988 WordMapEntry* map_entry = map_item->add_word_map_entry(); | 961 WordMapEntry* map_entry = map_item->add_word_map_entry(); |
| 989 map_entry->set_word(UTF16ToUTF8(iter->first)); | 962 map_entry->set_word(UTF16ToUTF8(iter->first)); |
| 990 map_entry->set_word_id(iter->second); | 963 map_entry->set_word_id(iter->second); |
| 991 } | 964 } |
| 992 } | 965 } |
| 993 | 966 |
| 994 bool InMemoryURLIndex::RestoreWordMap(const InMemoryURLIndexCacheItem& cache) { | 967 bool InMemoryURLIndex::RestoreWordMap(const InMemoryURLIndexCacheItem& cache) { |
| 995 if (!cache.has_word_map()) | 968 if (!cache.has_word_map()) |
| 996 return false; | 969 return false; |
| 997 const WordMapItem& list_item(cache.word_map()); | 970 const WordMapItem& list_item(cache.word_map()); |
| 998 uint32 expected_item_count = list_item.item_count(); | 971 uint32 expected_item_count = list_item.item_count(); |
| 999 uint32 actual_item_count = list_item.word_map_entry_size(); | 972 uint32 actual_item_count = list_item.word_map_entry_size(); |
| 1000 if (actual_item_count == 0 || actual_item_count != expected_item_count) | 973 if (actual_item_count == 0 || actual_item_count != expected_item_count) |
| 1001 return false; | 974 return false; |
| 1002 const RepeatedPtrField<WordMapEntry>& entries(list_item.word_map_entry()); | 975 const RepeatedPtrField<WordMapEntry>& entries(list_item.word_map_entry()); |
| 1003 for (RepeatedPtrField<WordMapEntry>::const_iterator iter = entries.begin(); | 976 for (RepeatedPtrField<WordMapEntry>::const_iterator iter = entries.begin(); |
| 1004 iter != entries.end(); ++iter) | 977 iter != entries.end(); ++iter) |
| 1005 word_map_[UTF8ToUTF16(iter->word())] = iter->word_id(); | 978 private_data_->word_map_[UTF8ToUTF16(iter->word())] = iter->word_id(); |
| 1006 return true; | 979 return true; |
| 1007 } | 980 } |
| 1008 | 981 |
| 1009 void InMemoryURLIndex::SaveCharWordMap(InMemoryURLIndexCacheItem* cache) const { | 982 void InMemoryURLIndex::SaveCharWordMap(InMemoryURLIndexCacheItem* cache) const { |
| 1010 if (char_word_map_.empty()) | 983 if (private_data_->char_word_map_.empty()) |
| 1011 return; | 984 return; |
| 1012 CharWordMapItem* map_item = cache->mutable_char_word_map(); | 985 CharWordMapItem* map_item = cache->mutable_char_word_map(); |
| 1013 map_item->set_item_count(char_word_map_.size()); | 986 map_item->set_item_count(private_data_->char_word_map_.size()); |
| 1014 for (CharWordIDMap::const_iterator iter = char_word_map_.begin(); | 987 for (CharWordIDMap::const_iterator iter = |
| 1015 iter != char_word_map_.end(); ++iter) { | 988 private_data_->char_word_map_.begin(); |
| 989 iter != private_data_->char_word_map_.end(); ++iter) { | |
| 1016 CharWordMapEntry* map_entry = map_item->add_char_word_map_entry(); | 990 CharWordMapEntry* map_entry = map_item->add_char_word_map_entry(); |
| 1017 map_entry->set_char_16(iter->first); | 991 map_entry->set_char_16(iter->first); |
| 1018 const WordIDSet& word_id_set(iter->second); | 992 const WordIDSet& word_id_set(iter->second); |
| 1019 map_entry->set_item_count(word_id_set.size()); | 993 map_entry->set_item_count(word_id_set.size()); |
| 1020 for (WordIDSet::const_iterator set_iter = word_id_set.begin(); | 994 for (WordIDSet::const_iterator set_iter = word_id_set.begin(); |
| 1021 set_iter != word_id_set.end(); ++set_iter) | 995 set_iter != word_id_set.end(); ++set_iter) |
| 1022 map_entry->add_word_id(*set_iter); | 996 map_entry->add_word_id(*set_iter); |
| 1023 } | 997 } |
| 1024 } | 998 } |
| 1025 | 999 |
| (...skipping 13 matching lines...) Expand all Loading... | |
| 1039 expected_item_count = iter->item_count(); | 1013 expected_item_count = iter->item_count(); |
| 1040 actual_item_count = iter->word_id_size(); | 1014 actual_item_count = iter->word_id_size(); |
| 1041 if (actual_item_count == 0 || actual_item_count != expected_item_count) | 1015 if (actual_item_count == 0 || actual_item_count != expected_item_count) |
| 1042 return false; | 1016 return false; |
| 1043 char16 uni_char = static_cast<char16>(iter->char_16()); | 1017 char16 uni_char = static_cast<char16>(iter->char_16()); |
| 1044 WordIDSet word_id_set; | 1018 WordIDSet word_id_set; |
| 1045 const RepeatedField<int32>& word_ids(iter->word_id()); | 1019 const RepeatedField<int32>& word_ids(iter->word_id()); |
| 1046 for (RepeatedField<int32>::const_iterator jiter = word_ids.begin(); | 1020 for (RepeatedField<int32>::const_iterator jiter = word_ids.begin(); |
| 1047 jiter != word_ids.end(); ++jiter) | 1021 jiter != word_ids.end(); ++jiter) |
| 1048 word_id_set.insert(*jiter); | 1022 word_id_set.insert(*jiter); |
| 1049 char_word_map_[uni_char] = word_id_set; | 1023 private_data_->char_word_map_[uni_char] = word_id_set; |
| 1050 } | 1024 } |
| 1051 return true; | 1025 return true; |
| 1052 } | 1026 } |
| 1053 | 1027 |
| 1054 void InMemoryURLIndex::SaveWordIDHistoryMap(InMemoryURLIndexCacheItem* cache) | 1028 void InMemoryURLIndex::SaveWordIDHistoryMap(InMemoryURLIndexCacheItem* cache) |
| 1055 const { | 1029 const { |
| 1056 if (word_id_history_map_.empty()) | 1030 if (private_data_->word_id_history_map_.empty()) |
| 1057 return; | 1031 return; |
| 1058 WordIDHistoryMapItem* map_item = cache->mutable_word_id_history_map(); | 1032 WordIDHistoryMapItem* map_item = cache->mutable_word_id_history_map(); |
| 1059 map_item->set_item_count(word_id_history_map_.size()); | 1033 map_item->set_item_count(private_data_->word_id_history_map_.size()); |
| 1060 for (WordIDHistoryMap::const_iterator iter = word_id_history_map_.begin(); | 1034 for (WordIDHistoryMap::const_iterator iter = |
| 1061 iter != word_id_history_map_.end(); ++iter) { | 1035 private_data_->word_id_history_map_.begin(); |
| 1036 iter != private_data_->word_id_history_map_.end(); ++iter) { | |
| 1062 WordIDHistoryMapEntry* map_entry = | 1037 WordIDHistoryMapEntry* map_entry = |
| 1063 map_item->add_word_id_history_map_entry(); | 1038 map_item->add_word_id_history_map_entry(); |
| 1064 map_entry->set_word_id(iter->first); | 1039 map_entry->set_word_id(iter->first); |
| 1065 const HistoryIDSet& history_id_set(iter->second); | 1040 const HistoryIDSet& history_id_set(iter->second); |
| 1066 map_entry->set_item_count(history_id_set.size()); | 1041 map_entry->set_item_count(history_id_set.size()); |
| 1067 for (HistoryIDSet::const_iterator set_iter = history_id_set.begin(); | 1042 for (HistoryIDSet::const_iterator set_iter = history_id_set.begin(); |
| 1068 set_iter != history_id_set.end(); ++set_iter) | 1043 set_iter != history_id_set.end(); ++set_iter) |
| 1069 map_entry->add_history_id(*set_iter); | 1044 map_entry->add_history_id(*set_iter); |
| 1070 } | 1045 } |
| 1071 } | 1046 } |
| (...skipping 12 matching lines...) Expand all Loading... | |
| 1084 for (RepeatedPtrField<WordIDHistoryMapEntry>::const_iterator iter = | 1059 for (RepeatedPtrField<WordIDHistoryMapEntry>::const_iterator iter = |
| 1085 entries.begin(); iter != entries.end(); ++iter) { | 1060 entries.begin(); iter != entries.end(); ++iter) { |
| 1086 expected_item_count = iter->item_count(); | 1061 expected_item_count = iter->item_count(); |
| 1087 actual_item_count = iter->history_id_size(); | 1062 actual_item_count = iter->history_id_size(); |
| 1088 if (actual_item_count == 0 || actual_item_count != expected_item_count) | 1063 if (actual_item_count == 0 || actual_item_count != expected_item_count) |
| 1089 return false; | 1064 return false; |
| 1090 WordID word_id = iter->word_id(); | 1065 WordID word_id = iter->word_id(); |
| 1091 HistoryIDSet history_id_set; | 1066 HistoryIDSet history_id_set; |
| 1092 const RepeatedField<int64>& history_ids(iter->history_id()); | 1067 const RepeatedField<int64>& history_ids(iter->history_id()); |
| 1093 for (RepeatedField<int64>::const_iterator jiter = history_ids.begin(); | 1068 for (RepeatedField<int64>::const_iterator jiter = history_ids.begin(); |
| 1094 jiter != history_ids.end(); ++jiter) | 1069 jiter != history_ids.end(); ++jiter) { |
| 1095 history_id_set.insert(*jiter); | 1070 history_id_set.insert(*jiter); |
| 1096 word_id_history_map_[word_id] = history_id_set; | 1071 private_data_->AddToHistoryIDWordMap(*jiter, word_id); |
| 1072 } | |
| 1073 private_data_->word_id_history_map_[word_id] = history_id_set; | |
| 1097 } | 1074 } |
| 1098 return true; | 1075 return true; |
| 1099 } | 1076 } |
| 1100 | 1077 |
| 1101 void InMemoryURLIndex::SaveHistoryInfoMap( | 1078 void InMemoryURLIndex::SaveHistoryInfoMap( |
| 1102 InMemoryURLIndexCacheItem* cache) const { | 1079 InMemoryURLIndexCacheItem* cache) const { |
| 1103 if (history_info_map_.empty()) | 1080 if (private_data_->history_info_map_.empty()) |
| 1104 return; | 1081 return; |
| 1105 HistoryInfoMapItem* map_item = cache->mutable_history_info_map(); | 1082 HistoryInfoMapItem* map_item = cache->mutable_history_info_map(); |
| 1106 map_item->set_item_count(history_info_map_.size()); | 1083 map_item->set_item_count(private_data_->history_info_map_.size()); |
| 1107 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin(); | 1084 for (HistoryInfoMap::const_iterator iter = |
| 1108 iter != history_info_map_.end(); ++iter) { | 1085 private_data_->history_info_map_.begin(); |
| 1086 iter != private_data_->history_info_map_.end(); ++iter) { | |
| 1109 HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry(); | 1087 HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry(); |
| 1110 map_entry->set_history_id(iter->first); | 1088 map_entry->set_history_id(iter->first); |
| 1111 const URLRow& url_row(iter->second); | 1089 const URLRow& url_row(iter->second); |
| 1112 // Note: We only save information that contributes to the index so there | 1090 // Note: We only save information that contributes to the index so there |
| 1113 // is no need to save search_term_cache_ (not persistent), | 1091 // is no need to save search_term_cache_ (not persistent), |
| 1114 // languages_, etc. | 1092 // languages_, etc. |
| 1115 map_entry->set_visit_count(url_row.visit_count()); | 1093 map_entry->set_visit_count(url_row.visit_count()); |
| 1116 map_entry->set_typed_count(url_row.typed_count()); | 1094 map_entry->set_typed_count(url_row.typed_count()); |
| 1117 map_entry->set_last_visit(url_row.last_visit().ToInternalValue()); | 1095 map_entry->set_last_visit(url_row.last_visit().ToInternalValue()); |
| 1118 map_entry->set_url(url_row.url().spec()); | 1096 map_entry->set_url(url_row.url().spec()); |
| (...skipping 17 matching lines...) Expand all Loading... | |
| 1136 HistoryID history_id = iter->history_id(); | 1114 HistoryID history_id = iter->history_id(); |
| 1137 GURL url(iter->url()); | 1115 GURL url(iter->url()); |
| 1138 URLRow url_row(url, history_id); | 1116 URLRow url_row(url, history_id); |
| 1139 url_row.set_visit_count(iter->visit_count()); | 1117 url_row.set_visit_count(iter->visit_count()); |
| 1140 url_row.set_typed_count(iter->typed_count()); | 1118 url_row.set_typed_count(iter->typed_count()); |
| 1141 url_row.set_last_visit(base::Time::FromInternalValue(iter->last_visit())); | 1119 url_row.set_last_visit(base::Time::FromInternalValue(iter->last_visit())); |
| 1142 if (iter->has_title()) { | 1120 if (iter->has_title()) { |
| 1143 string16 title(UTF8ToUTF16(iter->title())); | 1121 string16 title(UTF8ToUTF16(iter->title())); |
| 1144 url_row.set_title(title); | 1122 url_row.set_title(title); |
| 1145 } | 1123 } |
| 1146 history_info_map_[history_id] = url_row; | 1124 private_data_->history_info_map_[history_id] = url_row; |
| 1147 } | 1125 } |
| 1148 return true; | 1126 return true; |
| 1149 } | 1127 } |
| 1150 | 1128 |
| 1151 } // namespace history | 1129 } // namespace history |
| OLD | NEW |