| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef COMPONENTS_OMNIBOX_BROWSER_URL_INDEX_PRIVATE_DATA_H_ | 5 #ifndef COMPONENTS_OMNIBOX_BROWSER_URL_INDEX_PRIVATE_DATA_H_ |
| 6 #define COMPONENTS_OMNIBOX_BROWSER_URL_INDEX_PRIVATE_DATA_H_ | 6 #define COMPONENTS_OMNIBOX_BROWSER_URL_INDEX_PRIVATE_DATA_H_ |
| 7 | 7 |
| 8 #include <stddef.h> | 8 #include <stddef.h> |
| 9 | 9 |
| 10 #include <set> | 10 #include <set> |
| (...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 59 // set). Once we have a set of candidates, they are filtered to ensure | 59 // set). Once we have a set of candidates, they are filtered to ensure |
| 60 // that all |term_string| terms, as separated by whitespace and the | 60 // that all |term_string| terms, as separated by whitespace and the |
| 61 // cursor (if set), occur within the candidate's URL or page title. | 61 // cursor (if set), occur within the candidate's URL or page title. |
| 62 // Scores are then calculated on no more than |kItemsToScoreLimit| | 62 // Scores are then calculated on no more than |kItemsToScoreLimit| |
| 63 // candidates, as the scoring of such a large number of candidates may | 63 // candidates, as the scoring of such a large number of candidates may |
| 64 // cause perceptible typing response delays in the omnibox. This is | 64 // cause perceptible typing response delays in the omnibox. This is |
| 65 // likely to occur for short omnibox terms such as 'h' and 'w' which | 65 // likely to occur for short omnibox terms such as 'h' and 'w' which |
| 66 // will be found in nearly all history candidates. Results are sorted by | 66 // will be found in nearly all history candidates. Results are sorted by |
| 67 // descending score. The full results set (i.e. beyond the | 67 // descending score. The full results set (i.e. beyond the |
| 68 // |kItemsToScoreLimit| limit) will be retained and used for subsequent calls | 68 // |kItemsToScoreLimit| limit) will be retained and used for subsequent calls |
| 69 // to this function. |languages| is used to help parse/format the URLs in the | 69 // to this function. In total, |max_matches| of items will be returned in the |
| 70 // history index. In total, |max_matches| of items will be returned in the | |
| 71 // |ScoredHistoryMatches| vector. | 70 // |ScoredHistoryMatches| vector. |
| 72 ScoredHistoryMatches HistoryItemsForTerms( | 71 ScoredHistoryMatches HistoryItemsForTerms( |
| 73 base::string16 term_string, | 72 base::string16 term_string, |
| 74 size_t cursor_position, | 73 size_t cursor_position, |
| 75 size_t max_matches, | 74 size_t max_matches, |
| 76 const std::string& languages, | |
| 77 bookmarks::BookmarkModel* bookmark_model, | 75 bookmarks::BookmarkModel* bookmark_model, |
| 78 TemplateURLService* template_url_service); | 76 TemplateURLService* template_url_service); |
| 79 | 77 |
| 80 // Adds the history item in |row| to the index if it does not already already | 78 // Adds the history item in |row| to the index if it does not already already |
| 81 // exist and it meets the minimum 'quick' criteria. If the row already exists | 79 // exist and it meets the minimum 'quick' criteria. If the row already exists |
| 82 // in the index then the index will be updated if the row still meets the | 80 // in the index then the index will be updated if the row still meets the |
| 83 // criteria, otherwise the row will be removed from the index. Returns true | 81 // criteria, otherwise the row will be removed from the index. Returns true |
| 84 // if the index was actually updated. |languages| gives a list of language | 82 // if the index was actually updated. |scheme_whitelist| is used to filter |
| 85 // encodings by which the URLs and page titles are broken down into words and | 83 // non-qualifying schemes. |history_service| is used to schedule an update to |
| 86 // characters. |scheme_whitelist| is used to filter non-qualifying schemes. | 84 // the recent visits component of this URL's entry in the index. |
| 87 // |history_service| is used to schedule an update to the recent visits | |
| 88 // component of this URL's entry in the index. | |
| 89 bool UpdateURL(history::HistoryService* history_service, | 85 bool UpdateURL(history::HistoryService* history_service, |
| 90 const history::URLRow& row, | 86 const history::URLRow& row, |
| 91 const std::string& languages, | |
| 92 const std::set<std::string>& scheme_whitelist, | 87 const std::set<std::string>& scheme_whitelist, |
| 93 base::CancelableTaskTracker* tracker); | 88 base::CancelableTaskTracker* tracker); |
| 94 | 89 |
| 95 // Updates the entry for |url_id| in the index, replacing its | 90 // Updates the entry for |url_id| in the index, replacing its |
| 96 // recent visits information with |recent_visits|. If |url_id| | 91 // recent visits information with |recent_visits|. If |url_id| |
| 97 // is not in the index, does nothing. | 92 // is not in the index, does nothing. |
| 98 void UpdateRecentVisits(history::URLID url_id, | 93 void UpdateRecentVisits(history::URLID url_id, |
| 99 const history::VisitVector& recent_visits); | 94 const history::VisitVector& recent_visits); |
| 100 | 95 |
| 101 // Using |history_service| schedules an update (using the historyDB | 96 // Using |history_service| schedules an update (using the historyDB |
| 102 // thread) for the recent visits information for |url_id|. Unless | 97 // thread) for the recent visits information for |url_id|. Unless |
| 103 // something unexpectedly goes wrong, UdpateRecentVisits() should | 98 // something unexpectedly goes wrong, UdpateRecentVisits() should |
| 104 // eventually be called from a callback. | 99 // eventually be called from a callback. |
| 105 void ScheduleUpdateRecentVisits(history::HistoryService* history_service, | 100 void ScheduleUpdateRecentVisits(history::HistoryService* history_service, |
| 106 history::URLID url_id, | 101 history::URLID url_id, |
| 107 base::CancelableTaskTracker* tracker); | 102 base::CancelableTaskTracker* tracker); |
| 108 | 103 |
| 109 // Deletes index data for the history item with the given |url|. | 104 // Deletes index data for the history item with the given |url|. |
| 110 // The item may not have actually been indexed, which is the case if it did | 105 // The item may not have actually been indexed, which is the case if it did |
| 111 // not previously meet minimum 'quick' criteria. Returns true if the index | 106 // not previously meet minimum 'quick' criteria. Returns true if the index |
| 112 // was actually updated. | 107 // was actually updated. |
| 113 bool DeleteURL(const GURL& url); | 108 bool DeleteURL(const GURL& url); |
| 114 | 109 |
| 115 // Constructs a new object by restoring its contents from the cache file | 110 // Constructs a new object by restoring its contents from the cache file |
| 116 // at |path|. Returns the new URLIndexPrivateData which on success will | 111 // at |path|. Returns the new URLIndexPrivateData which on success will |
| 117 // contain the restored data but upon failure will be empty. |languages| | 112 // contain the restored data but upon failure will be empty. |
| 118 // is used to break URLs and page titles into words. This function | 113 // This function should be run on the the file thread. |
| 119 // should be run on the the file thread. | |
| 120 static scoped_refptr<URLIndexPrivateData> RestoreFromFile( | 114 static scoped_refptr<URLIndexPrivateData> RestoreFromFile( |
| 121 const base::FilePath& path, | 115 const base::FilePath& path); |
| 122 const std::string& languages); | |
| 123 | 116 |
| 124 // Constructs a new object by rebuilding its contents from the history | 117 // Constructs a new object by rebuilding its contents from the history |
| 125 // database in |history_db|. Returns the new URLIndexPrivateData which on | 118 // database in |history_db|. Returns the new URLIndexPrivateData which on |
| 126 // success will contain the rebuilt data but upon failure will be empty. | 119 // success will contain the rebuilt data but upon failure will be empty. |
| 127 // |languages| gives a list of language encodings by which the URLs and page | |
| 128 // titles are broken down into words and characters. | |
| 129 static scoped_refptr<URLIndexPrivateData> RebuildFromHistory( | 120 static scoped_refptr<URLIndexPrivateData> RebuildFromHistory( |
| 130 history::HistoryDatabase* history_db, | 121 history::HistoryDatabase* history_db, |
| 131 const std::string& languages, | |
| 132 const std::set<std::string>& scheme_whitelist); | 122 const std::set<std::string>& scheme_whitelist); |
| 133 | 123 |
| 134 // Writes |private_data| as a cache file to |file_path| and returns success. | 124 // Writes |private_data| as a cache file to |file_path| and returns success. |
| 135 static bool WritePrivateDataToCacheFileTask( | 125 static bool WritePrivateDataToCacheFileTask( |
| 136 scoped_refptr<URLIndexPrivateData> private_data, | 126 scoped_refptr<URLIndexPrivateData> private_data, |
| 137 const base::FilePath& file_path); | 127 const base::FilePath& file_path); |
| 138 | 128 |
| 139 // Creates a copy of ourself. | 129 // Creates a copy of ourself. |
| 140 scoped_refptr<URLIndexPrivateData> Duplicate() const; | 130 scoped_refptr<URLIndexPrivateData> Duplicate() const; |
| 141 | 131 |
| (...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 196 }; | 186 }; |
| 197 typedef std::map<base::string16, SearchTermCacheItem> SearchTermCacheMap; | 187 typedef std::map<base::string16, SearchTermCacheItem> SearchTermCacheMap; |
| 198 | 188 |
| 199 // A helper class which performs the final filter on each candidate | 189 // A helper class which performs the final filter on each candidate |
| 200 // history URL match, inserting accepted matches into |scored_matches_|. | 190 // history URL match, inserting accepted matches into |scored_matches_|. |
| 201 class AddHistoryMatch { | 191 class AddHistoryMatch { |
| 202 public: | 192 public: |
| 203 AddHistoryMatch(bookmarks::BookmarkModel* bookmark_model, | 193 AddHistoryMatch(bookmarks::BookmarkModel* bookmark_model, |
| 204 TemplateURLService* template_url_service, | 194 TemplateURLService* template_url_service, |
| 205 const URLIndexPrivateData& private_data, | 195 const URLIndexPrivateData& private_data, |
| 206 const std::string& languages, | |
| 207 const base::string16& lower_string, | 196 const base::string16& lower_string, |
| 208 const String16Vector& lower_terms, | 197 const String16Vector& lower_terms, |
| 209 const base::Time now); | 198 const base::Time now); |
| 210 AddHistoryMatch(const AddHistoryMatch& other); | 199 AddHistoryMatch(const AddHistoryMatch& other); |
| 211 ~AddHistoryMatch(); | 200 ~AddHistoryMatch(); |
| 212 | 201 |
| 213 void operator()(const HistoryID history_id); | 202 void operator()(const HistoryID history_id); |
| 214 | 203 |
| 215 ScoredHistoryMatches ScoredMatches() const { return scored_matches_; } | 204 ScoredHistoryMatches ScoredMatches() const { return scored_matches_; } |
| 216 | 205 |
| 217 private: | 206 private: |
| 218 friend class InMemoryURLIndexTest; | 207 friend class InMemoryURLIndexTest; |
| 219 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, AddHistoryMatch); | 208 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, AddHistoryMatch); |
| 220 bookmarks::BookmarkModel* bookmark_model_; | 209 bookmarks::BookmarkModel* bookmark_model_; |
| 221 TemplateURLService* template_url_service_; | 210 TemplateURLService* template_url_service_; |
| 222 const URLIndexPrivateData& private_data_; | 211 const URLIndexPrivateData& private_data_; |
| 223 const std::string& languages_; | |
| 224 ScoredHistoryMatches scored_matches_; | 212 ScoredHistoryMatches scored_matches_; |
| 225 const base::string16& lower_string_; | 213 const base::string16& lower_string_; |
| 226 const String16Vector& lower_terms_; | 214 const String16Vector& lower_terms_; |
| 227 WordStarts lower_terms_to_word_starts_offsets_; | 215 WordStarts lower_terms_to_word_starts_offsets_; |
| 228 const base::Time now_; | 216 const base::Time now_; |
| 229 }; | 217 }; |
| 230 | 218 |
| 231 // A helper predicate class used to filter excess history items when the | 219 // A helper predicate class used to filter excess history items when the |
| 232 // candidate results set is too large. | 220 // candidate results set is too large. |
| 233 class HistoryItemFactorGreater { | 221 class HistoryItemFactorGreater { |
| (...skipping 14 matching lines...) Expand all Loading... |
| 248 HistoryIDSet HistoryIDSetFromWords(const String16Vector& unsorted_words); | 236 HistoryIDSet HistoryIDSetFromWords(const String16Vector& unsorted_words); |
| 249 | 237 |
| 250 // Helper function to HistoryIDSetFromWords which composes a set of history | 238 // Helper function to HistoryIDSetFromWords which composes a set of history |
| 251 // ids for the given term given in |term|. | 239 // ids for the given term given in |term|. |
| 252 HistoryIDSet HistoryIDsForTerm(const base::string16& term); | 240 HistoryIDSet HistoryIDsForTerm(const base::string16& term); |
| 253 | 241 |
| 254 // Given a set of Char16s, finds words containing those characters. | 242 // Given a set of Char16s, finds words containing those characters. |
| 255 WordIDSet WordIDSetForTermChars(const Char16Set& term_chars); | 243 WordIDSet WordIDSetForTermChars(const Char16Set& term_chars); |
| 256 | 244 |
| 257 // Indexes one URL history item as described by |row|. Returns true if the | 245 // Indexes one URL history item as described by |row|. Returns true if the |
| 258 // row was actually indexed. |languages| gives a list of language encodings by | 246 // row was actually indexed. |scheme_whitelist| is used to filter |
| 259 // which the URLs and page titles are broken down into words and characters. | 247 // non-qualifying schemes. If |history_db| is not NULL then this function |
| 260 // |scheme_whitelist| is used to filter non-qualifying schemes. If | 248 // uses the history database synchronously to get the URL's recent visits |
| 261 // |history_db| is not NULL then this function uses the history database | 249 // information. This mode should/ only be used on the historyDB thread. |
| 262 // synchronously to get the URL's recent visits information. This mode should | 250 // If |history_db| is NULL, then this function uses |history_service| to |
| 263 // only be used on the historyDB thread. If |history_db| is NULL, then | 251 // schedule a task on the historyDB thread to fetch and update the recent |
| 264 // this function uses |history_service| to schedule a task on the | 252 // visits information. |
| 265 // historyDB thread to fetch and update the recent visits | |
| 266 // information. | |
| 267 bool IndexRow(history::HistoryDatabase* history_db, | 253 bool IndexRow(history::HistoryDatabase* history_db, |
| 268 history::HistoryService* history_service, | 254 history::HistoryService* history_service, |
| 269 const history::URLRow& row, | 255 const history::URLRow& row, |
| 270 const std::string& languages, | |
| 271 const std::set<std::string>& scheme_whitelist, | 256 const std::set<std::string>& scheme_whitelist, |
| 272 base::CancelableTaskTracker* tracker); | 257 base::CancelableTaskTracker* tracker); |
| 273 | 258 |
| 274 // Parses and indexes the words in the URL and page title of |row| and | 259 // Parses and indexes the words in the URL and page title of |row| and |
| 275 // calculate the word starts in each, saving the starts in |word_starts|. | 260 // calculate the word starts in each, saving the starts in |word_starts|. |
| 276 // |languages| gives a list of language encodings by which the URLs and page | |
| 277 // titles are broken down into words and characters. | |
| 278 void AddRowWordsToIndex(const history::URLRow& row, | 261 void AddRowWordsToIndex(const history::URLRow& row, |
| 279 RowWordStarts* word_starts, | 262 RowWordStarts* word_starts); |
| 280 const std::string& languages); | |
| 281 | 263 |
| 282 // Given a single word in |uni_word|, adds a reference for the containing | 264 // Given a single word in |uni_word|, adds a reference for the containing |
| 283 // history item identified by |history_id| to the index. | 265 // history item identified by |history_id| to the index. |
| 284 void AddWordToIndex(const base::string16& uni_word, HistoryID history_id); | 266 void AddWordToIndex(const base::string16& uni_word, HistoryID history_id); |
| 285 | 267 |
| 286 // Creates a new entry in the word/history map for |word_id| and add | 268 // Creates a new entry in the word/history map for |word_id| and add |
| 287 // |history_id| as the initial element of the word's set. | 269 // |history_id| as the initial element of the word's set. |
| 288 void AddWordHistory(const base::string16& uni_word, HistoryID history_id); | 270 void AddWordHistory(const base::string16& uni_word, HistoryID history_id); |
| 289 | 271 |
| 290 // Updates an existing entry in the word/history index by adding the | 272 // Updates an existing entry in the word/history index by adding the |
| (...skipping 26 matching lines...) Expand all Loading... |
| 317 void SaveCharWordMap( | 299 void SaveCharWordMap( |
| 318 in_memory_url_index::InMemoryURLIndexCacheItem* cache) const; | 300 in_memory_url_index::InMemoryURLIndexCacheItem* cache) const; |
| 319 void SaveWordIDHistoryMap( | 301 void SaveWordIDHistoryMap( |
| 320 in_memory_url_index::InMemoryURLIndexCacheItem* cache) const; | 302 in_memory_url_index::InMemoryURLIndexCacheItem* cache) const; |
| 321 void SaveHistoryInfoMap( | 303 void SaveHistoryInfoMap( |
| 322 in_memory_url_index::InMemoryURLIndexCacheItem* cache) const; | 304 in_memory_url_index::InMemoryURLIndexCacheItem* cache) const; |
| 323 void SaveWordStartsMap( | 305 void SaveWordStartsMap( |
| 324 in_memory_url_index::InMemoryURLIndexCacheItem* cache) const; | 306 in_memory_url_index::InMemoryURLIndexCacheItem* cache) const; |
| 325 | 307 |
| 326 // Decode a data structure from the protobuf |cache|. Return false if there | 308 // Decode a data structure from the protobuf |cache|. Return false if there |
| 327 // is any kind of failure. |languages| will be used to break URLs and page | 309 // is any kind of failure. |
| 328 // titles into words | |
| 329 bool RestorePrivateData( | 310 bool RestorePrivateData( |
| 330 const in_memory_url_index::InMemoryURLIndexCacheItem& cache, | 311 const in_memory_url_index::InMemoryURLIndexCacheItem& cache); |
| 331 const std::string& languages); | |
| 332 bool RestoreWordList( | 312 bool RestoreWordList( |
| 333 const in_memory_url_index::InMemoryURLIndexCacheItem& cache); | 313 const in_memory_url_index::InMemoryURLIndexCacheItem& cache); |
| 334 bool RestoreWordMap( | 314 bool RestoreWordMap( |
| 335 const in_memory_url_index::InMemoryURLIndexCacheItem& cache); | 315 const in_memory_url_index::InMemoryURLIndexCacheItem& cache); |
| 336 bool RestoreCharWordMap( | 316 bool RestoreCharWordMap( |
| 337 const in_memory_url_index::InMemoryURLIndexCacheItem& cache); | 317 const in_memory_url_index::InMemoryURLIndexCacheItem& cache); |
| 338 bool RestoreWordIDHistoryMap( | 318 bool RestoreWordIDHistoryMap( |
| 339 const in_memory_url_index::InMemoryURLIndexCacheItem& cache); | 319 const in_memory_url_index::InMemoryURLIndexCacheItem& cache); |
| 340 bool RestoreHistoryInfoMap( | 320 bool RestoreHistoryInfoMap( |
| 341 const in_memory_url_index::InMemoryURLIndexCacheItem& cache); | 321 const in_memory_url_index::InMemoryURLIndexCacheItem& cache); |
| 342 bool RestoreWordStartsMap( | 322 bool RestoreWordStartsMap( |
| 343 const in_memory_url_index::InMemoryURLIndexCacheItem& cache, | 323 const in_memory_url_index::InMemoryURLIndexCacheItem& cache); |
| 344 const std::string& languages); | |
| 345 | 324 |
| 346 // Determines if |gurl| has a whitelisted scheme and returns true if so. | 325 // Determines if |gurl| has a whitelisted scheme and returns true if so. |
| 347 static bool URLSchemeIsWhitelisted(const GURL& gurl, | 326 static bool URLSchemeIsWhitelisted(const GURL& gurl, |
| 348 const std::set<std::string>& whitelist); | 327 const std::set<std::string>& whitelist); |
| 349 | 328 |
| 350 // Cache of search terms. | 329 // Cache of search terms. |
| 351 SearchTermCacheMap search_term_cache_; | 330 SearchTermCacheMap search_term_cache_; |
| 352 | 331 |
| 353 // Start of data members that are cached ------------------------------------- | 332 // Start of data members that are cached ------------------------------------- |
| 354 | 333 |
| (...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 408 int saved_cache_version_; | 387 int saved_cache_version_; |
| 409 | 388 |
| 410 // Used for unit testing only. Records the number of candidate history items | 389 // Used for unit testing only. Records the number of candidate history items |
| 411 // at three stages in the index searching process. | 390 // at three stages in the index searching process. |
| 412 size_t pre_filter_item_count_; // After word index is queried. | 391 size_t pre_filter_item_count_; // After word index is queried. |
| 413 size_t post_filter_item_count_; // After trimming large result set. | 392 size_t post_filter_item_count_; // After trimming large result set. |
| 414 size_t post_scoring_item_count_; // After performing final filter/scoring. | 393 size_t post_scoring_item_count_; // After performing final filter/scoring. |
| 415 }; | 394 }; |
| 416 | 395 |
| 417 #endif // COMPONENTS_OMNIBOX_BROWSER_URL_INDEX_PRIVATE_DATA_H_ | 396 #endif // COMPONENTS_OMNIBOX_BROWSER_URL_INDEX_PRIVATE_DATA_H_ |
| OLD | NEW |