OLD | NEW |
---|---|
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ | 5 #ifndef CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ |
6 #define CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ | 6 #define CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ |
7 #pragma once | 7 #pragma once |
8 | 8 |
9 #include <functional> | 9 #include <functional> |
10 #include <map> | 10 #include <map> |
(...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
85 void ShutDown(); | 85 void ShutDown(); |
86 | 86 |
87 // Restores the index's private data from the cache file stored in the | 87 // Restores the index's private data from the cache file stored in the |
88 // profile directory and returns true if successful. | 88 // profile directory and returns true if successful. |
89 bool RestoreFromCacheFile(); | 89 bool RestoreFromCacheFile(); |
90 | 90 |
91 // Caches the index private data and writes the cache file to the profile | 91 // Caches the index private data and writes the cache file to the profile |
92 // directory. | 92 // directory. |
93 bool SaveToCacheFile(); | 93 bool SaveToCacheFile(); |
94 | 94 |
95 // Given a vector containing one or more words as string16s, scans the | 95 // Given a string16 in |term_string|, scans the history index and return a |
Peter Kasting
2011/11/21 20:31:02
Nit: return -> returns
mrossetti
2011/11/21 21:38:25
Done.
| |
96 // history index and return a vector with all scored, matching history items. | 96 // vector with all scored, matching history items. The |term_string| is |
97 // Each term must occur somewhere in the history item's URL or page title for | 97 // broken down into individual terms (words), each of which must occur in the |
98 // the item to qualify; however, the terms do not necessarily have to be | 98 // candidate history item's URL or page title for the item to qualify; |
99 // adjacent. Results are sorted with higher scoring items first. Each term | 99 // however, the terms do not necessarily have to be adjacent. Once we have |
100 // from |terms| may contain punctuation but should not contain spaces. | 100 // a set of candidates, they are filtered to insure that all |term_string| |
101 // A search request which results in more than |kItemsToScoreLimit| total | 101 // terms, as separated by whitespace, occur withint the candidate's URL |
Peter Kasting
2011/11/21 20:31:02
Nit: withint -> within
mrossetti
2011/11/21 21:38:25
Done.
| |
102 // candidate items returns no matches (though the results set will be | 102 // or page title. Scores are then calculated on no more than |
103 // retained and used for subsequent calls to this function) as the scoring | 103 // |kItemsToScoreLimit| candidates (as the scoring of such a large number of |
Peter Kasting
2011/11/21 20:31:02
Nit: instead of using parens, just add a comma bef
mrossetti
2011/11/21 21:38:25
Done.
| |
104 // of such a large number of candidates may cause perceptible typing response | 104 // candidates may cause perceptible typing response delays in the omnibox. |
105 // delays in the omnibox. This is likely to occur for short omnibox terms | 105 // This is likely to occur for short omnibox terms such as 'h' and 'w' which |
106 // such as 'h' and 'w' which will be found in nearly all history candidates. | 106 // will be found in nearly all history candidates.) Results are sorted by |
107 ScoredHistoryMatches HistoryItemsForTerms(const String16Vector& terms); | 107 // descending score. The full results set (i.e. beyond the |
108 // |kItemsToScoreLimit| limit) will be retained and used for subsequent calls | |
109 // to this function. | |
110 ScoredHistoryMatches HistoryItemsForTerms(const string16& term_string); | |
108 | 111 |
109 // Updates or adds an history item to the index if it meets the minimum | 112 // Updates or adds an history item to the index if it meets the minimum |
110 // 'quick' criteria. | 113 // 'quick' criteria. |
111 void UpdateURL(URLID row_id, const URLRow& row); | 114 void UpdateURL(URLID row_id, const URLRow& row); |
112 | 115 |
113 // Deletes indexing data for an history item. The item may not have actually | 116 // Deletes indexing data for an history item. The item may not have actually |
114 // been indexed (which is the case if it did not previously meet minimum | 117 // been indexed (which is the case if it did not previously meet minimum |
115 // 'quick' criteria). | 118 // 'quick' criteria). |
116 void DeleteURL(URLID row_id); | 119 void DeleteURL(URLID row_id); |
117 | 120 |
118 private: | 121 private: |
119 friend class AddHistoryMatch; | 122 friend class AddHistoryMatch; |
120 friend class InMemoryURLIndexTest; | 123 friend class InMemoryURLIndexTest; |
121 FRIEND_TEST_ALL_PREFIXES(LimitedInMemoryURLIndexTest, Initialization); | 124 FRIEND_TEST_ALL_PREFIXES(LimitedInMemoryURLIndexTest, Initialization); |
122 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheFilePath); | 125 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheFilePath); |
123 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheSaveRestore); | 126 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheSaveRestore); |
124 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Char16Utilities); | 127 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, HugeResultSet); |
125 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, NonUniqueTermCharacterSets); | 128 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, NonUniqueTermCharacterSets); |
126 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Scoring); | 129 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Scoring); |
127 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, StaticFunctions); | 130 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, StaticFunctions); |
128 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TitleSearch); | 131 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TitleSearch); |
129 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TitleChange); | 132 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TitleChange); |
130 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TypedCharacterCaching); | 133 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TypedCharacterCaching); |
131 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, WhitelistedURLs); | 134 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, WhitelistedURLs); |
132 | 135 |
133 // Signals that there are no previously cached results for the typed term. | 136 // Signals that there are no previously cached results for the typed term. |
134 static const size_t kNoCachedResultForTerm; | 137 static const size_t kNoCachedResultForTerm; |
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
179 void operator()(const HistoryID history_id); | 182 void operator()(const HistoryID history_id); |
180 | 183 |
181 ScoredHistoryMatches ScoredMatches() const { return scored_matches_; } | 184 ScoredHistoryMatches ScoredMatches() const { return scored_matches_; } |
182 | 185 |
183 private: | 186 private: |
184 const InMemoryURLIndex& index_; | 187 const InMemoryURLIndex& index_; |
185 ScoredHistoryMatches scored_matches_; | 188 ScoredHistoryMatches scored_matches_; |
186 const String16Vector& lower_terms_; | 189 const String16Vector& lower_terms_; |
187 }; | 190 }; |
188 | 191 |
192 // A helper predicate class used to filter excess history items when the | |
193 // candidate results set is too large. | |
194 class HistoryItemFactorGreater : public std::binary_function<HistoryID, | |
Peter Kasting
2011/11/21 20:31:02
Nit: Probably slightly more readable to linebreak
mrossetti
2011/11/21 21:38:25
Done.
| |
195 HistoryID, | |
196 void> { | |
197 public: | |
198 explicit HistoryItemFactorGreater(const HistoryInfoMap& history_info_map); | |
199 ~HistoryItemFactorGreater(); | |
200 | |
201 bool operator()(const HistoryID h1, const HistoryID h2); | |
202 private: | |
Peter Kasting
2011/11/21 20:31:02
Nit: Blank line above this
mrossetti
2011/11/21 21:38:25
Done.
| |
203 const history::HistoryInfoMap& history_info_map_; | |
204 }; | |
205 | |
189 // Initializes all index data members in preparation for restoring the index | 206 // Initializes all index data members in preparation for restoring the index |
190 // from the cache or a complete rebuild from the history database. | 207 // from the cache or a complete rebuild from the history database. |
191 void ClearPrivateData(); | 208 void ClearPrivateData(); |
192 | 209 |
193 // Initializes the whitelist of URL schemes. | 210 // Initializes the whitelist of URL schemes. |
194 static void InitializeSchemeWhitelist(std::set<std::string>* whitelist); | 211 static void InitializeSchemeWhitelist(std::set<std::string>* whitelist); |
195 | 212 |
196 // URL History indexing support functions. | 213 // URL History indexing support functions. |
197 | 214 |
198 // Indexes one URL history item. | 215 // Indexes one URL history item. |
(...skipping 17 matching lines...) Expand all Loading... | |
216 void UpdateWordHistory(WordID word_id, HistoryID history_id); | 233 void UpdateWordHistory(WordID word_id, HistoryID history_id); |
217 | 234 |
218 // Creates a new entry in the word/history map for |word_id| and add | 235 // Creates a new entry in the word/history map for |word_id| and add |
219 // |history_id| as the initial element of the word's set. | 236 // |history_id| as the initial element of the word's set. |
220 void AddWordHistory(const string16& uni_word, HistoryID history_id); | 237 void AddWordHistory(const string16& uni_word, HistoryID history_id); |
221 | 238 |
222 // Clears |used_| for each item in the search term cache. | 239 // Clears |used_| for each item in the search term cache. |
223 void ResetSearchTermCache(); | 240 void ResetSearchTermCache(); |
224 | 241 |
225 // Composes a set of history item IDs by intersecting the set for each word | 242 // Composes a set of history item IDs by intersecting the set for each word |
226 // in |uni_string|. | 243 // in |unsorted_words|. |
227 HistoryIDSet HistoryIDSetFromWords(const string16& uni_string); | 244 HistoryIDSet HistoryIDSetFromWords(const String16Vector& unsorted_words); |
228 | 245 |
229 // Helper function to HistoryIDSetFromWords which composes a set of history | 246 // Helper function to HistoryIDSetFromWords which composes a set of history |
230 // ids for the given term given in |term|. | 247 // ids for the given term given in |term|. |
231 HistoryIDSet HistoryIDsForTerm(const string16& term); | 248 HistoryIDSet HistoryIDsForTerm(const string16& term); |
232 | 249 |
233 // Calculates a raw score for this history item by first determining | 250 // Calculates a raw score for this history item by first determining |
234 // if all of the terms in |terms_vector| occur in |row| and, if so, | 251 // if all of the terms in |terms_vector| occur in |row| and, if so, |
235 // calculating a raw score based on 1) starting position of each term | 252 // calculating a raw score based on 1) starting position of each term |
236 // in the user input, 2) completeness of each term's match, 3) ordering | 253 // in the user input, 2) completeness of each term's match, 3) ordering |
237 // of the occurrence of each term (i.e. they appear in order), 4) last | 254 // of the occurrence of each term (i.e. they appear in order), 4) last |
(...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
301 // Only URLs with a whitelisted scheme are indexed. | 318 // Only URLs with a whitelisted scheme are indexed. |
302 std::set<std::string> scheme_whitelist_; | 319 std::set<std::string> scheme_whitelist_; |
303 | 320 |
304 // Set to true at shutdown when the cache has been written to disk. Used | 321 // Set to true at shutdown when the cache has been written to disk. Used |
305 // as a temporary safety check to insure that the cache is saved before | 322 // as a temporary safety check to insure that the cache is saved before |
306 // the index has been destructed. | 323 // the index has been destructed. |
307 // TODO(mrossetti): Eliminate once the transition to SQLite has been done. | 324 // TODO(mrossetti): Eliminate once the transition to SQLite has been done. |
308 // http://crbug.com/83659 | 325 // http://crbug.com/83659 |
309 bool cached_at_shutdown_; | 326 bool cached_at_shutdown_; |
310 | 327 |
328 // Used for unit testing only. Records the number of candidate history items | |
329 // at three stages in the index searching process. | |
330 size_t pre_filter_item_count; // After word index is queried. | |
331 size_t post_filter_item_count; // After trimming large result set. | |
332 size_t post_scoring_item_count; // After performing final filter and scoring. | |
333 | |
311 DISALLOW_COPY_AND_ASSIGN(InMemoryURLIndex); | 334 DISALLOW_COPY_AND_ASSIGN(InMemoryURLIndex); |
312 }; | 335 }; |
313 | 336 |
314 } // namespace history | 337 } // namespace history |
315 | 338 |
316 #endif // CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ | 339 #endif // CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ |
OLD | NEW |