OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_ | 5 #ifndef CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_ |
6 #define CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_ | 6 #define CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_ |
7 #pragma once | 7 #pragma once |
8 | 8 |
9 #include "base/file_path.h" | 9 #include "base/file_path.h" |
10 #include "base/gtest_prod_util.h" | 10 #include "base/gtest_prod_util.h" |
| 11 #include "base/memory/ref_counted.h" |
11 #include "chrome/browser/history/in_memory_url_index_types.h" | 12 #include "chrome/browser/history/in_memory_url_index_types.h" |
12 #include "chrome/browser/history/in_memory_url_index_cache.pb.h" | 13 #include "chrome/browser/history/in_memory_url_index_cache.pb.h" |
| 14 #include "content/public/browser/notification_details.h" |
13 | 15 |
14 class HistoryQuickProviderTest; | 16 class HistoryQuickProviderTest; |
15 | 17 |
16 namespace in_memory_url_index { | 18 namespace in_memory_url_index { |
17 class InMemoryURLIndexCacheItem; | 19 class InMemoryURLIndexCacheItem; |
18 } | 20 } |
19 | 21 |
20 namespace history { | 22 namespace history { |
21 | 23 |
22 namespace imui = in_memory_url_index; | 24 namespace imui = in_memory_url_index; |
23 | 25 |
24 class HistoryDatabase; | 26 class HistoryDatabase; |
| 27 class InMemoryURLIndex; |
| 28 class RefCountedBool; |
25 | 29 |
26 // Current version of the cache file. | 30 // Current version of the cache file. |
27 static const int kCurrentCacheFileVersion = 1; | 31 static const int kCurrentCacheFileVersion = 1; |
28 | 32 |
29 // A structure describing the InMemoryURLIndex's internal data and providing for | 33 // A structure describing the InMemoryURLIndex's internal data and providing for |
30 // restoring, rebuilding and updating that internal data. | 34 // restoring, rebuilding and updating that internal data. |
31 class URLIndexPrivateData { | 35 class URLIndexPrivateData |
| 36 : public base::RefCountedThreadSafe<URLIndexPrivateData> { |
32 public: | 37 public: |
33 URLIndexPrivateData(); | 38 URLIndexPrivateData(); |
| 39 |
| 40 private: |
| 41 friend class base::RefCountedThreadSafe<URLIndexPrivateData>; |
34 ~URLIndexPrivateData(); | 42 ~URLIndexPrivateData(); |
35 | 43 |
36 private: | |
37 friend class AddHistoryMatch; | 44 friend class AddHistoryMatch; |
38 friend class ::HistoryQuickProviderTest; | 45 friend class ::HistoryQuickProviderTest; |
39 friend class InMemoryURLIndex; | 46 friend class InMemoryURLIndex; |
40 friend class InMemoryURLIndexTest; | 47 friend class InMemoryURLIndexTest; |
41 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheSaveRestore); | 48 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheSaveRestore); |
42 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, HugeResultSet); | 49 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, HugeResultSet); |
43 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Scoring); | 50 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Scoring); |
44 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TitleSearch); | 51 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TitleSearch); |
45 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TypedCharacterCaching); | 52 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TypedCharacterCaching); |
46 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, WhitelistedURLs); | 53 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, WhitelistedURLs); |
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
122 // or page title. Scores are then calculated on no more than | 129 // or page title. Scores are then calculated on no more than |
123 // |kItemsToScoreLimit| candidates, as the scoring of such a large number of | 130 // |kItemsToScoreLimit| candidates, as the scoring of such a large number of |
124 // candidates may cause perceptible typing response delays in the omnibox. | 131 // candidates may cause perceptible typing response delays in the omnibox. |
125 // This is likely to occur for short omnibox terms such as 'h' and 'w' which | 132 // This is likely to occur for short omnibox terms such as 'h' and 'w' which |
126 // will be found in nearly all history candidates. Results are sorted by | 133 // will be found in nearly all history candidates. Results are sorted by |
127 // descending score. The full results set (i.e. beyond the | 134 // descending score. The full results set (i.e. beyond the |
128 // |kItemsToScoreLimit| limit) will be retained and used for subsequent calls | 135 // |kItemsToScoreLimit| limit) will be retained and used for subsequent calls |
129 // to this function. | 136 // to this function. |
130 ScoredHistoryMatches HistoryItemsForTerms(const string16& term_string); | 137 ScoredHistoryMatches HistoryItemsForTerms(const string16& term_string); |
131 | 138 |
132 // Sets the |languages| to a list of language encodings with which the history | 139 // Creates a new URLIndexPrivateData object, populates it from the contents |
133 // URLs and omnibox searches are interpreted, i.e. how each is broken | 140 // of the cache file stored in |file_path|, and assigns it to |private_data|. |
134 // down into words and each word is broken down into characters. | 141 // |languages| will be used to break URLs and page titles into words and is |
135 void set_languages(const std::string& languages) { languages_ = languages; } | 142 // deliberately passed by value. |
| 143 static void RestoreFromFileTask( |
| 144 const FilePath& file_path, |
| 145 scoped_refptr<URLIndexPrivateData> private_data, |
| 146 std::string languages); |
136 | 147 |
137 // Restores the index's private data from the cache file stored in the | 148 // Constructs a new object by restoring its contents from the file at |path|. |
138 // profile directory and returns true if successful. | 149 // Returns the new URLIndexPrivateData which on success will contain the |
139 bool RestoreFromFile(const FilePath& file_path); | 150 // restored data but upon failure will be empty. |languages| will be used to |
| 151 // break URLs and page titles into words |
| 152 static scoped_refptr<URLIndexPrivateData> RestoreFromFile( |
| 153 const FilePath& path, |
| 154 const std::string& languages); |
140 | 155 |
141 // Constructs a new object by rebuilding its contents from the history | 156 // Constructs a new object by rebuilding its contents from the history |
142 // database in |history_db|. Returns the new URLIndexPrivateData which on | 157 // database in |history_db|. Returns the new URLIndexPrivateData which on |
143 // success will contain the rebuilt data but upon failure will be empty. | 158 // success will contain the rebuilt data but upon failure will be empty. |
144 static URLIndexPrivateData* RebuildFromHistory(HistoryDatabase* history_db); | 159 // |languages| gives a list of language encodings by which the URLs and page |
| 160 // titles are broken down into words and characters. |
| 161 static scoped_refptr<URLIndexPrivateData> RebuildFromHistory( |
| 162 HistoryDatabase* history_db, |
| 163 const std::string& languages, |
| 164 const std::set<std::string>& scheme_whitelist); |
| 165 |
| 166 // Writes |private_data| as a cache file to |file_path| and returns success |
| 167 // via |succeeded|. |
| 168 static void WritePrivateDataToCacheFileTask( |
| 169 scoped_refptr<URLIndexPrivateData> private_data, |
| 170 const FilePath& file_path, |
| 171 scoped_refptr<RefCountedBool> succeeded); |
145 | 172 |
146 // Caches the index private data and writes the cache file to the profile | 173 // Caches the index private data and writes the cache file to the profile |
147 // directory. | 174 // directory. Called by WritePrivateDataToCacheFileTask. |
148 bool SaveToFile(const FilePath& file_path); | 175 bool SaveToFile(const FilePath& file_path); |
149 | 176 |
150 // Initializes all index data members in preparation for restoring the index | 177 // Initializes all index data members in preparation for restoring the index |
151 // from the cache or a complete rebuild from the history database. | 178 // from the cache or a complete rebuild from the history database. |
152 void Clear(); | 179 void Clear(); |
153 | 180 |
| 181 // Returns true if there is no data in the index. |
| 182 bool Empty() const; |
| 183 |
| 184 // Creates a copy of ourself. |
| 185 scoped_refptr<URLIndexPrivateData> Duplicate() const; |
| 186 |
154 // Adds |word_id| to |history_id|'s entry in the history/word map, | 187 // Adds |word_id| to |history_id|'s entry in the history/word map, |
155 // creating a new entry if one does not already exist. | 188 // creating a new entry if one does not already exist. |
156 void AddToHistoryIDWordMap(HistoryID history_id, WordID word_id); | 189 void AddToHistoryIDWordMap(HistoryID history_id, WordID word_id); |
157 | 190 |
158 // Given a set of Char16s, finds words containing those characters. | 191 // Given a set of Char16s, finds words containing those characters. |
159 WordIDSet WordIDSetForTermChars(const Char16Set& term_chars); | 192 WordIDSet WordIDSetForTermChars(const Char16Set& term_chars); |
160 | 193 |
161 // Initializes the whitelist of URL schemes. | |
162 static void InitializeSchemeWhitelist(std::set<std::string>* whitelist); | |
163 | |
164 // URL History indexing support functions. | 194 // URL History indexing support functions. |
165 | 195 |
166 // Indexes one URL history item as described by |row|. Returns true if the | 196 // Indexes one URL history item as described by |row|. Returns true if the |
167 // row was actually indexed. | 197 // row was actually indexed. |languages| gives a list of language encodings by |
168 bool IndexRow(const URLRow& row); | 198 // which the URLs and page titles are broken down into words and characters. |
| 199 // |scheme_whitelist| is used to filter non-qualifying schemes. |
| 200 bool IndexRow(const URLRow& row, |
| 201 const std::string& languages, |
| 202 const std::set<std::string>& scheme_whitelist); |
169 | 203 |
170 // Adds the history item in |row| to the index if it does not already already | 204 // Adds the history item in |row| to the index if it does not already already |
171 // exist and it meets the minimum 'quick' criteria. If the row already exists | 205 // exist and it meets the minimum 'quick' criteria. If the row already exists |
172 // in the index then the index will be updated if the row still meets the | 206 // in the index then the index will be updated if the row still meets the |
173 // criteria, otherwise the row will be removed from the index. Returns true | 207 // criteria, otherwise the row will be removed from the index. Returns true |
174 // if the index was actually updated. | 208 // if the index was actually updated. |languages| gives a list of language |
175 bool UpdateURL(const URLRow& row); | 209 // encodings by which the URLs and page titles are broken down into words and |
| 210 // characters. |scheme_whitelist| is used to filter non-qualifying schemes. |
| 211 bool UpdateURL(const URLRow& row, |
| 212 const std::string& languages, |
| 213 const std::set<std::string>& scheme_whitelist); |
176 | 214 |
177 // Deletes indexing data for the history item with the URL given in |url|. | 215 // Deletes indexing data for the history item with the URL given in |url|. |
178 // The item may not have actually been indexed, which is the case if it did | 216 // The item may not have actually been indexed, which is the case if it did |
179 // not previously meet minimum 'quick' criteria. Returns true if the index | 217 // not previously meet minimum 'quick' criteria. Returns true if the index |
180 // was actually updated. | 218 // was actually updated. |
181 bool DeleteURL(const GURL& url); | 219 bool DeleteURL(const GURL& url); |
182 | 220 |
183 // Parses and indexes the words in the URL and page title of |row| and | 221 // Parses and indexes the words in the URL and page title of |row| and |
184 // calculate the word starts in each, saving the starts in |word_starts|. | 222 // calculate the word starts in each, saving the starts in |word_starts|. |
185 void AddRowWordsToIndex(const URLRow& row, RowWordStarts* word_starts); | 223 // |languages| gives a list of language encodings by which the URLs and page |
| 224 // titles are broken down into words and characters. |
| 225 void AddRowWordsToIndex(const URLRow& row, |
| 226 RowWordStarts* word_starts, |
| 227 const std::string& languages); |
186 | 228 |
187 // Removes |row| and all associated words and characters from the index. | 229 // Removes |row| and all associated words and characters from the index. |
188 void RemoveRowFromIndex(const URLRow& row); | 230 void RemoveRowFromIndex(const URLRow& row); |
189 | 231 |
190 // Removes all words and characters associated with |row| from the index. | 232 // Removes all words and characters associated with |row| from the index. |
191 void RemoveRowWordsFromIndex(const URLRow& row); | 233 void RemoveRowWordsFromIndex(const URLRow& row); |
192 | 234 |
193 // Given a single word in |uni_word|, adds a reference for the containing | 235 // Given a single word in |uni_word|, adds a reference for the containing |
194 // history item identified by |history_id| to the index. | 236 // history item identified by |history_id| to the index. |
195 void AddWordToIndex(const string16& uni_word, HistoryID history_id); | 237 void AddWordToIndex(const string16& uni_word, HistoryID history_id); |
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
228 const string16& lower_string, | 270 const string16& lower_string, |
229 const String16Vector& terms_vector, | 271 const String16Vector& terms_vector, |
230 const RowWordStarts& word_starts); | 272 const RowWordStarts& word_starts); |
231 | 273 |
232 // Calculates a component score based on position, ordering and total | 274 // Calculates a component score based on position, ordering and total |
233 // substring match size using metrics recorded in |matches|. |max_length| | 275 // substring match size using metrics recorded in |matches|. |max_length| |
234 // is the length of the string against which the terms are being searched. | 276 // is the length of the string against which the terms are being searched. |
235 static int ScoreComponentForMatches(const TermMatches& matches, | 277 static int ScoreComponentForMatches(const TermMatches& matches, |
236 size_t max_length); | 278 size_t max_length); |
237 | 279 |
238 // Determines if |gurl| has a whitelisted scheme and returns true if so. | |
239 bool URLSchemeIsWhitelisted(const GURL& gurl) const; | |
240 | |
241 // Sets the version of the cache file that will be saved when calling | |
242 // SavePrivateData(). For unit testing only. | |
243 void set_saved_cache_version(int version) { saved_cache_version_ = version; } | |
244 | |
245 // Encode a data structure into the protobuf |cache|. | 280 // Encode a data structure into the protobuf |cache|. |
246 void SavePrivateData(imui::InMemoryURLIndexCacheItem* cache) const; | 281 void SavePrivateData(imui::InMemoryURLIndexCacheItem* cache) const; |
247 void SaveWordList(imui::InMemoryURLIndexCacheItem* cache) const; | 282 void SaveWordList(imui::InMemoryURLIndexCacheItem* cache) const; |
248 void SaveWordMap(imui::InMemoryURLIndexCacheItem* cache) const; | 283 void SaveWordMap(imui::InMemoryURLIndexCacheItem* cache) const; |
249 void SaveCharWordMap(imui::InMemoryURLIndexCacheItem* cache) const; | 284 void SaveCharWordMap(imui::InMemoryURLIndexCacheItem* cache) const; |
250 void SaveWordIDHistoryMap(imui::InMemoryURLIndexCacheItem* cache) const; | 285 void SaveWordIDHistoryMap(imui::InMemoryURLIndexCacheItem* cache) const; |
251 void SaveHistoryInfoMap(imui::InMemoryURLIndexCacheItem* cache) const; | 286 void SaveHistoryInfoMap(imui::InMemoryURLIndexCacheItem* cache) const; |
252 void SaveWordStartsMap(imui::InMemoryURLIndexCacheItem* cache) const; | 287 void SaveWordStartsMap(imui::InMemoryURLIndexCacheItem* cache) const; |
253 | 288 |
254 // Decode a data structure from the protobuf |cache|. Return false if there | 289 // Decode a data structure from the protobuf |cache|. Return false if there |
255 // is any kind of failure. | 290 // is any kind of failure. |languages| will be used to break URLs and page |
256 bool RestorePrivateData(const imui::InMemoryURLIndexCacheItem& cache); | 291 // titles into words |
| 292 bool RestorePrivateData(const imui::InMemoryURLIndexCacheItem& cache, |
| 293 const std::string& languages); |
257 bool RestoreWordList(const imui::InMemoryURLIndexCacheItem& cache); | 294 bool RestoreWordList(const imui::InMemoryURLIndexCacheItem& cache); |
258 bool RestoreWordMap(const imui::InMemoryURLIndexCacheItem& cache); | 295 bool RestoreWordMap(const imui::InMemoryURLIndexCacheItem& cache); |
259 bool RestoreCharWordMap(const imui::InMemoryURLIndexCacheItem& cache); | 296 bool RestoreCharWordMap(const imui::InMemoryURLIndexCacheItem& cache); |
260 bool RestoreWordIDHistoryMap(const imui::InMemoryURLIndexCacheItem& cache); | 297 bool RestoreWordIDHistoryMap(const imui::InMemoryURLIndexCacheItem& cache); |
261 bool RestoreHistoryInfoMap(const imui::InMemoryURLIndexCacheItem& cache); | 298 bool RestoreHistoryInfoMap(const imui::InMemoryURLIndexCacheItem& cache); |
262 bool RestoreWordStartsMap(const imui::InMemoryURLIndexCacheItem& cache); | 299 bool RestoreWordStartsMap(const imui::InMemoryURLIndexCacheItem& cache, |
| 300 const std::string& languages); |
| 301 |
| 302 // Determines if |gurl| has a whitelisted scheme and returns true if so. |
| 303 static bool URLSchemeIsWhitelisted(const GURL& gurl, |
| 304 const std::set<std::string>& whitelist); |
263 | 305 |
264 // Cache of search terms. | 306 // Cache of search terms. |
265 SearchTermCacheMap search_term_cache_; | 307 SearchTermCacheMap search_term_cache_; |
266 | 308 |
267 // Languages used during the word-breaking process during indexing. | |
268 std::string languages_; | |
269 | |
270 // Only URLs with a whitelisted scheme are indexed. | |
271 std::set<std::string> scheme_whitelist_; | |
272 | |
273 // Start of data members that are cached ------------------------------------- | 309 // Start of data members that are cached ------------------------------------- |
274 | 310 |
275 // The version of the cache file most recently used to restore this instance | 311 // The version of the cache file most recently used to restore this instance |
276 // of the private data. If the private data was rebuilt from the history | 312 // of the private data. If the private data was rebuilt from the history |
277 // database this will be 0. | 313 // database this will be 0. |
278 int restored_cache_version_; | 314 int restored_cache_version_; |
279 | 315 |
280 // A list of all of indexed words. The index of a word in this list is the | 316 // A list of all of indexed words. The index of a word in this list is the |
281 // ID of the word in the word_map_. It reduces the memory overhead by | 317 // ID of the word in the word_map_. It reduces the memory overhead by |
282 // replacing a potentially long and repeated string with a simple index. | 318 // replacing a potentially long and repeated string with a simple index. |
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
327 // Used for unit testing only. Records the number of candidate history items | 363 // Used for unit testing only. Records the number of candidate history items |
328 // at three stages in the index searching process. | 364 // at three stages in the index searching process. |
329 size_t pre_filter_item_count_; // After word index is queried. | 365 size_t pre_filter_item_count_; // After word index is queried. |
330 size_t post_filter_item_count_; // After trimming large result set. | 366 size_t post_filter_item_count_; // After trimming large result set. |
331 size_t post_scoring_item_count_; // After performing final filter/scoring. | 367 size_t post_scoring_item_count_; // After performing final filter/scoring. |
332 }; | 368 }; |
333 | 369 |
334 } // namespace history | 370 } // namespace history |
335 | 371 |
336 #endif // CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_ | 372 #endif // CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_ |
OLD | NEW |