| OLD | NEW |
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ | 5 #ifndef CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ |
| 6 #define CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ | 6 #define CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ |
| 7 #pragma once | 7 #pragma once |
| 8 | 8 |
| 9 #include <functional> | 9 #include <functional> |
| 10 #include <map> | 10 #include <map> |
| (...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 83 void ShutDown(); | 83 void ShutDown(); |
| 84 | 84 |
| 85 // Restores the index's private data from the cache file stored in the | 85 // Restores the index's private data from the cache file stored in the |
| 86 // profile directory and returns true if successful. | 86 // profile directory and returns true if successful. |
| 87 bool RestoreFromCacheFile(); | 87 bool RestoreFromCacheFile(); |
| 88 | 88 |
| 89 // Caches the index private data and writes the cache file to the profile | 89 // Caches the index private data and writes the cache file to the profile |
| 90 // directory. | 90 // directory. |
| 91 bool SaveToCacheFile(); | 91 bool SaveToCacheFile(); |
| 92 | 92 |
| 93 // Given a vector containing one or more words as string16s, scans the | 93 // Given a string16 in |term_string|, scans the history index and returns a |
| 94 // history index and return a vector with all scored, matching history items. | 94 // vector with all scored, matching history items. The |term_string| is |
| 95 // Each term must occur somewhere in the history item's URL or page title for | 95 // broken down into individual terms (words), each of which must occur in the |
| 96 // the item to qualify; however, the terms do not necessarily have to be | 96 // candidate history item's URL or page title for the item to qualify; |
| 97 // adjacent. Results are sorted with higher scoring items first. Each term | 97 // however, the terms do not necessarily have to be adjacent. Once we have |
| 98 // from |terms| may contain punctuation but should not contain spaces. | 98 // a set of candidates, they are filtered to insure that all |term_string| |
| 99 // A search request which results in more than |kItemsToScoreLimit| total | 99 // terms, as separated by whitespace, occur within the candidate's URL |
| 100 // candidate items returns no matches (though the results set will be | 100 // or page title. Scores are then calculated on no more than |
| 101 // retained and used for subsequent calls to this function) as the scoring | 101 // |kItemsToScoreLimit| candidates, as the scoring of such a large number of |
| 102 // of such a large number of candidates may cause perceptible typing response | 102 // candidates may cause perceptible typing response delays in the omnibox. |
| 103 // delays in the omnibox. This is likely to occur for short omnibox terms | 103 // This is likely to occur for short omnibox terms such as 'h' and 'w' which |
| 104 // such as 'h' and 'w' which will be found in nearly all history candidates. | 104 // will be found in nearly all history candidates. Results are sorted by |
| 105 ScoredHistoryMatches HistoryItemsForTerms(const String16Vector& terms); | 105 // descending score. The full results set (i.e. beyond the |
| 106 // |kItemsToScoreLimit| limit) will be retained and used for subsequent calls |
| 107 // to this function. |
| 108 ScoredHistoryMatches HistoryItemsForTerms(const string16& term_string); |
| 106 | 109 |
| 107 // Updates or adds an history item to the index if it meets the minimum | 110 // Updates or adds an history item to the index if it meets the minimum |
| 108 // 'quick' criteria. | 111 // 'quick' criteria. |
| 109 void UpdateURL(URLID row_id, const URLRow& row); | 112 void UpdateURL(URLID row_id, const URLRow& row); |
| 110 | 113 |
| 111 // Deletes indexing data for an history item. The item may not have actually | 114 // Deletes indexing data for an history item. The item may not have actually |
| 112 // been indexed (which is the case if it did not previously meet minimum | 115 // been indexed (which is the case if it did not previously meet minimum |
| 113 // 'quick' criteria). | 116 // 'quick' criteria). |
| 114 void DeleteURL(URLID row_id); | 117 void DeleteURL(URLID row_id); |
| 115 | 118 |
| 116 private: | 119 private: |
| 117 friend class AddHistoryMatch; | 120 friend class AddHistoryMatch; |
| 118 friend class InMemoryURLIndexTest; | 121 friend class InMemoryURLIndexTest; |
| 119 FRIEND_TEST_ALL_PREFIXES(LimitedInMemoryURLIndexTest, Initialization); | 122 FRIEND_TEST_ALL_PREFIXES(LimitedInMemoryURLIndexTest, Initialization); |
| 120 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheFilePath); | 123 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheFilePath); |
| 121 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheSaveRestore); | 124 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheSaveRestore); |
| 122 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Char16Utilities); | 125 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, HugeResultSet); |
| 123 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, NonUniqueTermCharacterSets); | 126 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, NonUniqueTermCharacterSets); |
| 124 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Scoring); | 127 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Scoring); |
| 125 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, StaticFunctions); | 128 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, StaticFunctions); |
| 126 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TitleSearch); | 129 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TitleSearch); |
| 127 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TitleChange); | 130 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TitleChange); |
| 128 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TypedCharacterCaching); | 131 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TypedCharacterCaching); |
| 129 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, WhitelistedURLs); | 132 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, WhitelistedURLs); |
| 130 | 133 |
| 131 // Signals that there are no previously cached results for the typed term. | 134 // Signals that there are no previously cached results for the typed term. |
| 132 static const size_t kNoCachedResultForTerm; | 135 static const size_t kNoCachedResultForTerm; |
| (...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 177 void operator()(const HistoryID history_id); | 180 void operator()(const HistoryID history_id); |
| 178 | 181 |
| 179 ScoredHistoryMatches ScoredMatches() const { return scored_matches_; } | 182 ScoredHistoryMatches ScoredMatches() const { return scored_matches_; } |
| 180 | 183 |
| 181 private: | 184 private: |
| 182 const InMemoryURLIndex& index_; | 185 const InMemoryURLIndex& index_; |
| 183 ScoredHistoryMatches scored_matches_; | 186 ScoredHistoryMatches scored_matches_; |
| 184 const String16Vector& lower_terms_; | 187 const String16Vector& lower_terms_; |
| 185 }; | 188 }; |
| 186 | 189 |
| 190 // A helper predicate class used to filter excess history items when the |
| 191 // candidate results set is too large. |
| 192 class HistoryItemFactorGreater |
| 193 : public std::binary_function<HistoryID, HistoryID, void> { |
| 194 public: |
| 195 explicit HistoryItemFactorGreater(const HistoryInfoMap& history_info_map); |
| 196 ~HistoryItemFactorGreater(); |
| 197 |
| 198 bool operator()(const HistoryID h1, const HistoryID h2); |
| 199 |
| 200 private: |
| 201 const history::HistoryInfoMap& history_info_map_; |
| 202 }; |
| 203 |
| 187 // Initializes all index data members in preparation for restoring the index | 204 // Initializes all index data members in preparation for restoring the index |
| 188 // from the cache or a complete rebuild from the history database. | 205 // from the cache or a complete rebuild from the history database. |
| 189 void ClearPrivateData(); | 206 void ClearPrivateData(); |
| 190 | 207 |
| 191 // Initializes the whitelist of URL schemes. | 208 // Initializes the whitelist of URL schemes. |
| 192 static void InitializeSchemeWhitelist(std::set<std::string>* whitelist); | 209 static void InitializeSchemeWhitelist(std::set<std::string>* whitelist); |
| 193 | 210 |
| 194 // URL History indexing support functions. | 211 // URL History indexing support functions. |
| 195 | 212 |
| 196 // Indexes one URL history item. | 213 // Indexes one URL history item. |
| (...skipping 17 matching lines...) Expand all Loading... |
| 214 void UpdateWordHistory(WordID word_id, HistoryID history_id); | 231 void UpdateWordHistory(WordID word_id, HistoryID history_id); |
| 215 | 232 |
| 216 // Creates a new entry in the word/history map for |word_id| and add | 233 // Creates a new entry in the word/history map for |word_id| and add |
| 217 // |history_id| as the initial element of the word's set. | 234 // |history_id| as the initial element of the word's set. |
| 218 void AddWordHistory(const string16& uni_word, HistoryID history_id); | 235 void AddWordHistory(const string16& uni_word, HistoryID history_id); |
| 219 | 236 |
| 220 // Clears |used_| for each item in the search term cache. | 237 // Clears |used_| for each item in the search term cache. |
| 221 void ResetSearchTermCache(); | 238 void ResetSearchTermCache(); |
| 222 | 239 |
| 223 // Composes a set of history item IDs by intersecting the set for each word | 240 // Composes a set of history item IDs by intersecting the set for each word |
| 224 // in |uni_string|. | 241 // in |unsorted_words|. |
| 225 HistoryIDSet HistoryIDSetFromWords(const string16& uni_string); | 242 HistoryIDSet HistoryIDSetFromWords(const String16Vector& unsorted_words); |
| 226 | 243 |
| 227 // Helper function to HistoryIDSetFromWords which composes a set of history | 244 // Helper function to HistoryIDSetFromWords which composes a set of history |
| 228 // ids for the given term given in |term|. | 245 // ids for the given term given in |term|. |
| 229 HistoryIDSet HistoryIDsForTerm(const string16& term); | 246 HistoryIDSet HistoryIDsForTerm(const string16& term); |
| 230 | 247 |
| 231 // Calculates a raw score for this history item by first determining | 248 // Calculates a raw score for this history item by first determining |
| 232 // if all of the terms in |terms_vector| occur in |row| and, if so, | 249 // if all of the terms in |terms_vector| occur in |row| and, if so, |
| 233 // calculating a raw score based on 1) starting position of each term | 250 // calculating a raw score based on 1) starting position of each term |
| 234 // in the user input, 2) completeness of each term's match, 3) ordering | 251 // in the user input, 2) completeness of each term's match, 3) ordering |
| 235 // of the occurrence of each term (i.e. they appear in order), 4) last | 252 // of the occurrence of each term (i.e. they appear in order), 4) last |
| (...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 299 // Only URLs with a whitelisted scheme are indexed. | 316 // Only URLs with a whitelisted scheme are indexed. |
| 300 std::set<std::string> scheme_whitelist_; | 317 std::set<std::string> scheme_whitelist_; |
| 301 | 318 |
| 302 // Set to true at shutdown when the cache has been written to disk. Used | 319 // Set to true at shutdown when the cache has been written to disk. Used |
| 303 // as a temporary safety check to insure that the cache is saved before | 320 // as a temporary safety check to insure that the cache is saved before |
| 304 // the index has been destructed. | 321 // the index has been destructed. |
| 305 // TODO(mrossetti): Eliminate once the transition to SQLite has been done. | 322 // TODO(mrossetti): Eliminate once the transition to SQLite has been done. |
| 306 // http://crbug.com/83659 | 323 // http://crbug.com/83659 |
| 307 bool cached_at_shutdown_; | 324 bool cached_at_shutdown_; |
| 308 | 325 |
| 326 // Used for unit testing only. Records the number of candidate history items |
| 327 // at three stages in the index searching process. |
| 328 size_t pre_filter_item_count; // After word index is queried. |
| 329 size_t post_filter_item_count; // After trimming large result set. |
| 330 size_t post_scoring_item_count; // After performing final filter and scoring. |
| 331 |
| 309 DISALLOW_COPY_AND_ASSIGN(InMemoryURLIndex); | 332 DISALLOW_COPY_AND_ASSIGN(InMemoryURLIndex); |
| 310 }; | 333 }; |
| 311 | 334 |
| 312 } // namespace history | 335 } // namespace history |
| 313 | 336 |
| 314 #endif // CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ | 337 #endif // CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ |
| OLD | NEW |