OLD | NEW |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ | 5 #ifndef CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ |
6 #define CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ | 6 #define CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ |
7 #pragma once | 7 #pragma once |
8 | 8 |
9 #include <functional> | 9 #include <functional> |
10 #include <map> | 10 #include <map> |
(...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
83 void ShutDown(); | 83 void ShutDown(); |
84 | 84 |
85 // Restores the index's private data from the cache file stored in the | 85 // Restores the index's private data from the cache file stored in the |
86 // profile directory and returns true if successful. | 86 // profile directory and returns true if successful. |
87 bool RestoreFromCacheFile(); | 87 bool RestoreFromCacheFile(); |
88 | 88 |
89 // Caches the index private data and writes the cache file to the profile | 89 // Caches the index private data and writes the cache file to the profile |
90 // directory. | 90 // directory. |
91 bool SaveToCacheFile(); | 91 bool SaveToCacheFile(); |
92 | 92 |
93 // Given a vector containing one or more words as string16s, scans the | 93 // Given a string16 in |term_string|, scans the history index and returns a |
94 // history index and return a vector with all scored, matching history items. | 94 // vector with all scored, matching history items. The |term_string| is |
95 // Each term must occur somewhere in the history item's URL or page title for | 95 // broken down into individual terms (words), each of which must occur in the |
96 // the item to qualify; however, the terms do not necessarily have to be | 96 // candidate history item's URL or page title for the item to qualify; |
97 // adjacent. Results are sorted with higher scoring items first. Each term | 97 // however, the terms do not necessarily have to be adjacent. Once we have |
98 // from |terms| may contain punctuation but should not contain spaces. | 98 // a set of candidates, they are filtered to insure that all |term_string| |
99 // A search request which results in more than |kItemsToScoreLimit| total | 99 // terms, as separated by whitespace, occur within the candidate's URL |
100 // candidate items returns no matches (though the results set will be | 100 // or page title. Scores are then calculated on no more than |
101 // retained and used for subsequent calls to this function) as the scoring | 101 // |kItemsToScoreLimit| candidates, as the scoring of such a large number of |
102 // of such a large number of candidates may cause perceptible typing response | 102 // candidates may cause perceptible typing response delays in the omnibox. |
103 // delays in the omnibox. This is likely to occur for short omnibox terms | 103 // This is likely to occur for short omnibox terms such as 'h' and 'w' which |
104 // such as 'h' and 'w' which will be found in nearly all history candidates. | 104 // will be found in nearly all history candidates. Results are sorted by |
105 ScoredHistoryMatches HistoryItemsForTerms(const String16Vector& terms); | 105 // descending score. The full results set (i.e. beyond the |
| 106 // |kItemsToScoreLimit| limit) will be retained and used for subsequent calls |
| 107 // to this function. |
| 108 ScoredHistoryMatches HistoryItemsForTerms(const string16& term_string); |
106 | 109 |
107 // Updates or adds an history item to the index if it meets the minimum | 110 // Updates or adds an history item to the index if it meets the minimum |
108 // 'quick' criteria. | 111 // 'quick' criteria. |
109 void UpdateURL(URLID row_id, const URLRow& row); | 112 void UpdateURL(URLID row_id, const URLRow& row); |
110 | 113 |
111 // Deletes indexing data for an history item. The item may not have actually | 114 // Deletes indexing data for an history item. The item may not have actually |
112 // been indexed (which is the case if it did not previously meet minimum | 115 // been indexed (which is the case if it did not previously meet minimum |
113 // 'quick' criteria). | 116 // 'quick' criteria). |
114 void DeleteURL(URLID row_id); | 117 void DeleteURL(URLID row_id); |
115 | 118 |
116 private: | 119 private: |
117 friend class AddHistoryMatch; | 120 friend class AddHistoryMatch; |
118 friend class InMemoryURLIndexTest; | 121 friend class InMemoryURLIndexTest; |
119 FRIEND_TEST_ALL_PREFIXES(LimitedInMemoryURLIndexTest, Initialization); | 122 FRIEND_TEST_ALL_PREFIXES(LimitedInMemoryURLIndexTest, Initialization); |
120 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheFilePath); | 123 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheFilePath); |
121 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheSaveRestore); | 124 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheSaveRestore); |
122 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Char16Utilities); | 125 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, HugeResultSet); |
123 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, NonUniqueTermCharacterSets); | 126 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, NonUniqueTermCharacterSets); |
124 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Scoring); | 127 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Scoring); |
125 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, StaticFunctions); | 128 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, StaticFunctions); |
126 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TitleSearch); | 129 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TitleSearch); |
127 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TitleChange); | 130 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TitleChange); |
128 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TypedCharacterCaching); | 131 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TypedCharacterCaching); |
129 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, WhitelistedURLs); | 132 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, WhitelistedURLs); |
130 | 133 |
131 // Signals that there are no previously cached results for the typed term. | 134 // Signals that there are no previously cached results for the typed term. |
132 static const size_t kNoCachedResultForTerm; | 135 static const size_t kNoCachedResultForTerm; |
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
177 void operator()(const HistoryID history_id); | 180 void operator()(const HistoryID history_id); |
178 | 181 |
179 ScoredHistoryMatches ScoredMatches() const { return scored_matches_; } | 182 ScoredHistoryMatches ScoredMatches() const { return scored_matches_; } |
180 | 183 |
181 private: | 184 private: |
182 const InMemoryURLIndex& index_; | 185 const InMemoryURLIndex& index_; |
183 ScoredHistoryMatches scored_matches_; | 186 ScoredHistoryMatches scored_matches_; |
184 const String16Vector& lower_terms_; | 187 const String16Vector& lower_terms_; |
185 }; | 188 }; |
186 | 189 |
| 190 // A helper predicate class used to filter excess history items when the |
| 191 // candidate results set is too large. |
| 192 class HistoryItemFactorGreater |
| 193 : public std::binary_function<HistoryID, HistoryID, void> { |
| 194 public: |
| 195 explicit HistoryItemFactorGreater(const HistoryInfoMap& history_info_map); |
| 196 ~HistoryItemFactorGreater(); |
| 197 |
| 198 bool operator()(const HistoryID h1, const HistoryID h2); |
| 199 |
| 200 private: |
| 201 const history::HistoryInfoMap& history_info_map_; |
| 202 }; |
| 203 |
187 // Initializes all index data members in preparation for restoring the index | 204 // Initializes all index data members in preparation for restoring the index |
188 // from the cache or a complete rebuild from the history database. | 205 // from the cache or a complete rebuild from the history database. |
189 void ClearPrivateData(); | 206 void ClearPrivateData(); |
190 | 207 |
191 // Initializes the whitelist of URL schemes. | 208 // Initializes the whitelist of URL schemes. |
192 static void InitializeSchemeWhitelist(std::set<std::string>* whitelist); | 209 static void InitializeSchemeWhitelist(std::set<std::string>* whitelist); |
193 | 210 |
194 // URL History indexing support functions. | 211 // URL History indexing support functions. |
195 | 212 |
196 // Indexes one URL history item. | 213 // Indexes one URL history item. |
(...skipping 17 matching lines...) Expand all Loading... |
214 void UpdateWordHistory(WordID word_id, HistoryID history_id); | 231 void UpdateWordHistory(WordID word_id, HistoryID history_id); |
215 | 232 |
216 // Creates a new entry in the word/history map for |word_id| and add | 233 // Creates a new entry in the word/history map for |word_id| and add |
217 // |history_id| as the initial element of the word's set. | 234 // |history_id| as the initial element of the word's set. |
218 void AddWordHistory(const string16& uni_word, HistoryID history_id); | 235 void AddWordHistory(const string16& uni_word, HistoryID history_id); |
219 | 236 |
220 // Clears |used_| for each item in the search term cache. | 237 // Clears |used_| for each item in the search term cache. |
221 void ResetSearchTermCache(); | 238 void ResetSearchTermCache(); |
222 | 239 |
223 // Composes a set of history item IDs by intersecting the set for each word | 240 // Composes a set of history item IDs by intersecting the set for each word |
224 // in |uni_string|. | 241 // in |unsorted_words|. |
225 HistoryIDSet HistoryIDSetFromWords(const string16& uni_string); | 242 HistoryIDSet HistoryIDSetFromWords(const String16Vector& unsorted_words); |
226 | 243 |
227 // Helper function to HistoryIDSetFromWords which composes a set of history | 244 // Helper function to HistoryIDSetFromWords which composes a set of history |
228 // ids for the given term given in |term|. | 245 // ids for the given term given in |term|. |
229 HistoryIDSet HistoryIDsForTerm(const string16& term); | 246 HistoryIDSet HistoryIDsForTerm(const string16& term); |
230 | 247 |
231 // Calculates a raw score for this history item by first determining | 248 // Calculates a raw score for this history item by first determining |
232 // if all of the terms in |terms_vector| occur in |row| and, if so, | 249 // if all of the terms in |terms_vector| occur in |row| and, if so, |
233 // calculating a raw score based on 1) starting position of each term | 250 // calculating a raw score based on 1) starting position of each term |
234 // in the user input, 2) completeness of each term's match, 3) ordering | 251 // in the user input, 2) completeness of each term's match, 3) ordering |
235 // of the occurrence of each term (i.e. they appear in order), 4) last | 252 // of the occurrence of each term (i.e. they appear in order), 4) last |
(...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
299 // Only URLs with a whitelisted scheme are indexed. | 316 // Only URLs with a whitelisted scheme are indexed. |
300 std::set<std::string> scheme_whitelist_; | 317 std::set<std::string> scheme_whitelist_; |
301 | 318 |
302 // Set to true at shutdown when the cache has been written to disk. Used | 319 // Set to true at shutdown when the cache has been written to disk. Used |
303 // as a temporary safety check to insure that the cache is saved before | 320 // as a temporary safety check to insure that the cache is saved before |
304 // the index has been destructed. | 321 // the index has been destructed. |
305 // TODO(mrossetti): Eliminate once the transition to SQLite has been done. | 322 // TODO(mrossetti): Eliminate once the transition to SQLite has been done. |
306 // http://crbug.com/83659 | 323 // http://crbug.com/83659 |
307 bool cached_at_shutdown_; | 324 bool cached_at_shutdown_; |
308 | 325 |
| 326 // Used for unit testing only. Records the number of candidate history items |
| 327 // at three stages in the index searching process. |
| 328 size_t pre_filter_item_count; // After word index is queried. |
| 329 size_t post_filter_item_count; // After trimming large result set. |
| 330 size_t post_scoring_item_count; // After performing final filter and scoring. |
| 331 |
309 DISALLOW_COPY_AND_ASSIGN(InMemoryURLIndex); | 332 DISALLOW_COPY_AND_ASSIGN(InMemoryURLIndex); |
310 }; | 333 }; |
311 | 334 |
312 } // namespace history | 335 } // namespace history |
313 | 336 |
314 #endif // CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ | 337 #endif // CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ |
OLD | NEW |