Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ | 5 #ifndef CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ |
| 6 #define CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ | 6 #define CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ |
| 7 #pragma once | 7 #pragma once |
| 8 | 8 |
| 9 #include <functional> | 9 #include <functional> |
| 10 #include <map> | 10 #include <map> |
| (...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 85 void ShutDown(); | 85 void ShutDown(); |
| 86 | 86 |
| 87 // Restores the index's private data from the cache file stored in the | 87 // Restores the index's private data from the cache file stored in the |
| 88 // profile directory and returns true if successful. | 88 // profile directory and returns true if successful. |
| 89 bool RestoreFromCacheFile(); | 89 bool RestoreFromCacheFile(); |
| 90 | 90 |
| 91 // Caches the index private data and writes the cache file to the profile | 91 // Caches the index private data and writes the cache file to the profile |
| 92 // directory. | 92 // directory. |
| 93 bool SaveToCacheFile(); | 93 bool SaveToCacheFile(); |
| 94 | 94 |
| 95 // Given a vector containing one or more words as string16s, scans the | 95 // Given a string16 in |term_string|, scans the history index and return a |
|
Peter Kasting
2011/11/21 20:31:02
Nit: return -> returns
mrossetti
2011/11/21 21:38:25
Done.
| |
| 96 // history index and return a vector with all scored, matching history items. | 96 // vector with all scored, matching history items. The |term_string| is |
| 97 // Each term must occur somewhere in the history item's URL or page title for | 97 // broken down into individual terms (words), each of which must occur in the |
| 98 // the item to qualify; however, the terms do not necessarily have to be | 98 // candidate history item's URL or page title for the item to qualify; |
| 99 // adjacent. Results are sorted with higher scoring items first. Each term | 99 // however, the terms do not necessarily have to be adjacent. Once we have |
| 100 // from |terms| may contain punctuation but should not contain spaces. | 100 // a set of candidates, they are filtered to insure that all |term_string| |
| 101 // A search request which results in more than |kItemsToScoreLimit| total | 101 // terms, as separated by whitespace, occur withint the candidate's URL |
|
Peter Kasting
2011/11/21 20:31:02
Nit: withint -> within
mrossetti
2011/11/21 21:38:25
Done.
| |
| 102 // candidate items returns no matches (though the results set will be | 102 // or page title. Scores are then calculated on no more than |
| 103 // retained and used for subsequent calls to this function) as the scoring | 103 // |kItemsToScoreLimit| candidates (as the scoring of such a large number of |
|
Peter Kasting
2011/11/21 20:31:02
Nit: instead of using parens, just add a comma bef
mrossetti
2011/11/21 21:38:25
Done.
| |
| 104 // of such a large number of candidates may cause perceptible typing response | 104 // candidates may cause perceptible typing response delays in the omnibox. |
| 105 // delays in the omnibox. This is likely to occur for short omnibox terms | 105 // This is likely to occur for short omnibox terms such as 'h' and 'w' which |
| 106 // such as 'h' and 'w' which will be found in nearly all history candidates. | 106 // will be found in nearly all history candidates.) Results are sorted by |
| 107 ScoredHistoryMatches HistoryItemsForTerms(const String16Vector& terms); | 107 // descending score. The full results set (i.e. beyond the |
| 108 // |kItemsToScoreLimit| limit) will be retained and used for subsequent calls | |
| 109 // to this function. | |
| 110 ScoredHistoryMatches HistoryItemsForTerms(const string16& term_string); | |
| 108 | 111 |
| 109 // Updates or adds an history item to the index if it meets the minimum | 112 // Updates or adds an history item to the index if it meets the minimum |
| 110 // 'quick' criteria. | 113 // 'quick' criteria. |
| 111 void UpdateURL(URLID row_id, const URLRow& row); | 114 void UpdateURL(URLID row_id, const URLRow& row); |
| 112 | 115 |
| 113 // Deletes indexing data for an history item. The item may not have actually | 116 // Deletes indexing data for an history item. The item may not have actually |
| 114 // been indexed (which is the case if it did not previously meet minimum | 117 // been indexed (which is the case if it did not previously meet minimum |
| 115 // 'quick' criteria). | 118 // 'quick' criteria). |
| 116 void DeleteURL(URLID row_id); | 119 void DeleteURL(URLID row_id); |
| 117 | 120 |
| 118 private: | 121 private: |
| 119 friend class AddHistoryMatch; | 122 friend class AddHistoryMatch; |
| 120 friend class InMemoryURLIndexTest; | 123 friend class InMemoryURLIndexTest; |
| 121 FRIEND_TEST_ALL_PREFIXES(LimitedInMemoryURLIndexTest, Initialization); | 124 FRIEND_TEST_ALL_PREFIXES(LimitedInMemoryURLIndexTest, Initialization); |
| 122 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheFilePath); | 125 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheFilePath); |
| 123 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheSaveRestore); | 126 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheSaveRestore); |
| 124 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Char16Utilities); | 127 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, HugeResultSet); |
| 125 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, NonUniqueTermCharacterSets); | 128 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, NonUniqueTermCharacterSets); |
| 126 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Scoring); | 129 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Scoring); |
| 127 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, StaticFunctions); | 130 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, StaticFunctions); |
| 128 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TitleSearch); | 131 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TitleSearch); |
| 129 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TitleChange); | 132 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TitleChange); |
| 130 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TypedCharacterCaching); | 133 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TypedCharacterCaching); |
| 131 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, WhitelistedURLs); | 134 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, WhitelistedURLs); |
| 132 | 135 |
| 133 // Signals that there are no previously cached results for the typed term. | 136 // Signals that there are no previously cached results for the typed term. |
| 134 static const size_t kNoCachedResultForTerm; | 137 static const size_t kNoCachedResultForTerm; |
| (...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 179 void operator()(const HistoryID history_id); | 182 void operator()(const HistoryID history_id); |
| 180 | 183 |
| 181 ScoredHistoryMatches ScoredMatches() const { return scored_matches_; } | 184 ScoredHistoryMatches ScoredMatches() const { return scored_matches_; } |
| 182 | 185 |
| 183 private: | 186 private: |
| 184 const InMemoryURLIndex& index_; | 187 const InMemoryURLIndex& index_; |
| 185 ScoredHistoryMatches scored_matches_; | 188 ScoredHistoryMatches scored_matches_; |
| 186 const String16Vector& lower_terms_; | 189 const String16Vector& lower_terms_; |
| 187 }; | 190 }; |
| 188 | 191 |
| 192 // A helper predicate class used to filter excess history items when the | |
| 193 // candidate results set is too large. | |
| 194 class HistoryItemFactorGreater : public std::binary_function<HistoryID, | |
|
Peter Kasting
2011/11/21 20:31:02
Nit: Probably slightly more readable to linebreak
mrossetti
2011/11/21 21:38:25
Done.
| |
| 195 HistoryID, | |
| 196 void> { | |
| 197 public: | |
| 198 explicit HistoryItemFactorGreater(const HistoryInfoMap& history_info_map); | |
| 199 ~HistoryItemFactorGreater(); | |
| 200 | |
| 201 bool operator()(const HistoryID h1, const HistoryID h2); | |
| 202 private: | |
|
Peter Kasting
2011/11/21 20:31:02
Nit: Blank line above this
mrossetti
2011/11/21 21:38:25
Done.
| |
| 203 const history::HistoryInfoMap& history_info_map_; | |
| 204 }; | |
| 205 | |
| 189 // Initializes all index data members in preparation for restoring the index | 206 // Initializes all index data members in preparation for restoring the index |
| 190 // from the cache or a complete rebuild from the history database. | 207 // from the cache or a complete rebuild from the history database. |
| 191 void ClearPrivateData(); | 208 void ClearPrivateData(); |
| 192 | 209 |
| 193 // Initializes the whitelist of URL schemes. | 210 // Initializes the whitelist of URL schemes. |
| 194 static void InitializeSchemeWhitelist(std::set<std::string>* whitelist); | 211 static void InitializeSchemeWhitelist(std::set<std::string>* whitelist); |
| 195 | 212 |
| 196 // URL History indexing support functions. | 213 // URL History indexing support functions. |
| 197 | 214 |
| 198 // Indexes one URL history item. | 215 // Indexes one URL history item. |
| (...skipping 17 matching lines...) Expand all Loading... | |
| 216 void UpdateWordHistory(WordID word_id, HistoryID history_id); | 233 void UpdateWordHistory(WordID word_id, HistoryID history_id); |
| 217 | 234 |
| 218 // Creates a new entry in the word/history map for |word_id| and add | 235 // Creates a new entry in the word/history map for |word_id| and add |
| 219 // |history_id| as the initial element of the word's set. | 236 // |history_id| as the initial element of the word's set. |
| 220 void AddWordHistory(const string16& uni_word, HistoryID history_id); | 237 void AddWordHistory(const string16& uni_word, HistoryID history_id); |
| 221 | 238 |
| 222 // Clears |used_| for each item in the search term cache. | 239 // Clears |used_| for each item in the search term cache. |
| 223 void ResetSearchTermCache(); | 240 void ResetSearchTermCache(); |
| 224 | 241 |
| 225 // Composes a set of history item IDs by intersecting the set for each word | 242 // Composes a set of history item IDs by intersecting the set for each word |
| 226 // in |uni_string|. | 243 // in |unsorted_words|. |
| 227 HistoryIDSet HistoryIDSetFromWords(const string16& uni_string); | 244 HistoryIDSet HistoryIDSetFromWords(const String16Vector& unsorted_words); |
| 228 | 245 |
| 229 // Helper function to HistoryIDSetFromWords which composes a set of history | 246 // Helper function to HistoryIDSetFromWords which composes a set of history |
| 230 // ids for the given term given in |term|. | 247 // ids for the given term given in |term|. |
| 231 HistoryIDSet HistoryIDsForTerm(const string16& term); | 248 HistoryIDSet HistoryIDsForTerm(const string16& term); |
| 232 | 249 |
| 233 // Calculates a raw score for this history item by first determining | 250 // Calculates a raw score for this history item by first determining |
| 234 // if all of the terms in |terms_vector| occur in |row| and, if so, | 251 // if all of the terms in |terms_vector| occur in |row| and, if so, |
| 235 // calculating a raw score based on 1) starting position of each term | 252 // calculating a raw score based on 1) starting position of each term |
| 236 // in the user input, 2) completeness of each term's match, 3) ordering | 253 // in the user input, 2) completeness of each term's match, 3) ordering |
| 237 // of the occurrence of each term (i.e. they appear in order), 4) last | 254 // of the occurrence of each term (i.e. they appear in order), 4) last |
| (...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 301 // Only URLs with a whitelisted scheme are indexed. | 318 // Only URLs with a whitelisted scheme are indexed. |
| 302 std::set<std::string> scheme_whitelist_; | 319 std::set<std::string> scheme_whitelist_; |
| 303 | 320 |
| 304 // Set to true at shutdown when the cache has been written to disk. Used | 321 // Set to true at shutdown when the cache has been written to disk. Used |
| 305 // as a temporary safety check to insure that the cache is saved before | 322 // as a temporary safety check to insure that the cache is saved before |
| 306 // the index has been destructed. | 323 // the index has been destructed. |
| 307 // TODO(mrossetti): Eliminate once the transition to SQLite has been done. | 324 // TODO(mrossetti): Eliminate once the transition to SQLite has been done. |
| 308 // http://crbug.com/83659 | 325 // http://crbug.com/83659 |
| 309 bool cached_at_shutdown_; | 326 bool cached_at_shutdown_; |
| 310 | 327 |
| 328 // Used for unit testing only. Records the number of candidate history items | |
| 329 // at three stages in the index searching process. | |
| 330 size_t pre_filter_item_count; // After word index is queried. | |
| 331 size_t post_filter_item_count; // After trimming large result set. | |
| 332 size_t post_scoring_item_count; // After performing final filter and scoring. | |
| 333 | |
| 311 DISALLOW_COPY_AND_ASSIGN(InMemoryURLIndex); | 334 DISALLOW_COPY_AND_ASSIGN(InMemoryURLIndex); |
| 312 }; | 335 }; |
| 313 | 336 |
| 314 } // namespace history | 337 } // namespace history |
| 315 | 338 |
| 316 #endif // CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ | 339 #endif // CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ |
| OLD | NEW |