OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_ | 5 #ifndef CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_ |
6 #define CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_ | 6 #define CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_ |
7 #pragma once | 7 #pragma once |
8 | 8 |
9 #include "base/file_path.h" | 9 #include "base/file_path.h" |
10 #include "base/gtest_prod_util.h" | 10 #include "base/gtest_prod_util.h" |
| 11 #include "base/memory/ref_counted.h" |
11 #include "chrome/browser/history/in_memory_url_index_types.h" | 12 #include "chrome/browser/history/in_memory_url_index_types.h" |
12 #include "chrome/browser/history/in_memory_url_index_cache.pb.h" | 13 #include "chrome/browser/history/in_memory_url_index_cache.pb.h" |
| 14 #include "content/public/browser/notification_details.h" |
13 | 15 |
14 class HistoryQuickProviderTest; | 16 class HistoryQuickProviderTest; |
15 | 17 |
16 namespace in_memory_url_index { | 18 namespace in_memory_url_index { |
17 class InMemoryURLIndexCacheItem; | 19 class InMemoryURLIndexCacheItem; |
18 } | 20 } |
19 | 21 |
20 namespace history { | 22 namespace history { |
21 | 23 |
22 namespace imui = in_memory_url_index; | 24 namespace imui = in_memory_url_index; |
23 | 25 |
24 class HistoryDatabase; | 26 class HistoryDatabase; |
| 27 class InMemoryURLIndex; |
| 28 class RefCountedBool; |
25 | 29 |
26 // A structure describing the InMemoryURLIndex's internal data and providing for | 30 // A structure describing the InMemoryURLIndex's internal data and providing for |
27 // restoring, rebuilding and updating that internal data. | 31 // restoring, rebuilding and updating that internal data. |
28 class URLIndexPrivateData { | 32 class URLIndexPrivateData |
| 33 : public base::RefCountedThreadSafe<URLIndexPrivateData> { |
29 public: | 34 public: |
30 URLIndexPrivateData(); | 35 URLIndexPrivateData(); |
| 36 |
| 37 private: |
| 38 friend class base::RefCountedThreadSafe<URLIndexPrivateData>; |
31 ~URLIndexPrivateData(); | 39 ~URLIndexPrivateData(); |
32 | 40 |
33 private: | |
34 friend class AddHistoryMatch; | 41 friend class AddHistoryMatch; |
35 friend class ::HistoryQuickProviderTest; | 42 friend class ::HistoryQuickProviderTest; |
36 friend class InMemoryURLIndex; | 43 friend class InMemoryURLIndex; |
37 friend class InMemoryURLIndexTest; | 44 friend class InMemoryURLIndexTest; |
38 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheSaveRestore); | 45 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, CacheSaveRestore); |
39 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, HugeResultSet); | 46 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, HugeResultSet); |
40 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Scoring); | 47 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, Scoring); |
41 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TitleSearch); | 48 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TitleSearch); |
42 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TypedCharacterCaching); | 49 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, TypedCharacterCaching); |
43 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, WhitelistedURLs); | 50 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, WhitelistedURLs); |
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
119 // or page title. Scores are then calculated on no more than | 126 // or page title. Scores are then calculated on no more than |
120 // |kItemsToScoreLimit| candidates, as the scoring of such a large number of | 127 // |kItemsToScoreLimit| candidates, as the scoring of such a large number of |
121 // candidates may cause perceptible typing response delays in the omnibox. | 128 // candidates may cause perceptible typing response delays in the omnibox. |
122 // This is likely to occur for short omnibox terms such as 'h' and 'w' which | 129 // This is likely to occur for short omnibox terms such as 'h' and 'w' which |
123 // will be found in nearly all history candidates. Results are sorted by | 130 // will be found in nearly all history candidates. Results are sorted by |
124 // descending score. The full results set (i.e. beyond the | 131 // descending score. The full results set (i.e. beyond the |
125 // |kItemsToScoreLimit| limit) will be retained and used for subsequent calls | 132 // |kItemsToScoreLimit| limit) will be retained and used for subsequent calls |
126 // to this function. | 133 // to this function. |
127 ScoredHistoryMatches HistoryItemsForTerms(const string16& term_string); | 134 ScoredHistoryMatches HistoryItemsForTerms(const string16& term_string); |
128 | 135 |
129 // Sets the |languages| to a list of language encodings with which the history | 136 // Creates a new URLIndexPrivateData object, populates it from the contents |
130 // URLs and omnibox searches are interpreted, i.e. how each is broken | 137 // of the cache file stored in |file_path|, and assigns it to |
131 // down into words and each word is broken down into characters. | 138 // |private_data_ptr|. |
132 void set_languages(const std::string& languages) { languages_ = languages; } | 139 static void RestoreFromFileTask( |
| 140 const FilePath& file_path, |
| 141 scoped_refptr<URLIndexPrivateData> private_data_ptr); |
133 | 142 |
134 // Restores the index's private data from the cache file stored in the | 143 // Constructs a new object by restoring its contents from the file at |path|. |
135 // profile directory and returns true if successful. | 144 // Returns the new URLIndexPrivateData which on success will contain the |
136 bool RestoreFromFile(const FilePath& file_path); | 145 // restored data but upon failure will be empty. |
| 146 static scoped_refptr<URLIndexPrivateData> RestoreFromFile( |
| 147 const FilePath& path); |
137 | 148 |
138 // Constructs a new object by rebuilding its contents from the history | 149 // Constructs a new object by rebuilding its contents from the history |
139 // database in |history_db|. Returns the new URLIndexPrivateData which on | 150 // database in |history_db|. Returns the new URLIndexPrivateData which on |
140 // success will contain the rebuilt data but upon failure will be empty. | 151 // success will contain the rebuilt data but upon failure will be empty. |
141 static URLIndexPrivateData* RebuildFromHistory(HistoryDatabase* history_db); | 152 // |languages| gives a list of language encodings by which the URLs and page |
| 153 // titles are broken down into words and characters. |
| 154 static scoped_refptr<URLIndexPrivateData> RebuildFromHistory( |
| 155 HistoryDatabase* history_db, |
| 156 const std::string& languages, |
| 157 const std::set<std::string>& scheme_whitelist); |
| 158 |
| 159 // Writes |private_data| as a cache file to |file_path| and returns success |
| 160 // via |succeeded|. |
| 161 static void WritePrivateDataToCacheFileTask( |
| 162 scoped_refptr<URLIndexPrivateData> private_data, |
| 163 const FilePath& file_path, |
| 164 scoped_refptr<RefCountedBool> succeeded); |
142 | 165 |
143 // Caches the index private data and writes the cache file to the profile | 166 // Caches the index private data and writes the cache file to the profile |
144 // directory. | 167 // directory. Called by WritePrivateDataToCacheFileTask. |
145 bool SaveToFile(const FilePath& file_path); | 168 bool SaveToFile(const FilePath& file_path); |
146 | 169 |
147 // Initializes all index data members in preparation for restoring the index | 170 // Initializes all index data members in preparation for restoring the index |
148 // from the cache or a complete rebuild from the history database. | 171 // from the cache or a complete rebuild from the history database. |
149 void Clear(); | 172 void Clear(); |
150 | 173 |
| 174 // Returns true if there is no data in the index. |
| 175 bool Empty() const; |
| 176 |
| 177 // Creates a copy of ourself. |
| 178 URLIndexPrivateData* Duplicate() const; |
| 179 |
151 // Adds |word_id| to |history_id|'s entry in the history/word map, | 180 // Adds |word_id| to |history_id|'s entry in the history/word map, |
152 // creating a new entry if one does not already exist. | 181 // creating a new entry if one does not already exist. |
153 void AddToHistoryIDWordMap(HistoryID history_id, WordID word_id); | 182 void AddToHistoryIDWordMap(HistoryID history_id, WordID word_id); |
154 | 183 |
155 // Given a set of Char16s, finds words containing those characters. | 184 // Given a set of Char16s, finds words containing those characters. |
156 WordIDSet WordIDSetForTermChars(const Char16Set& term_chars); | 185 WordIDSet WordIDSetForTermChars(const Char16Set& term_chars); |
157 | 186 |
158 // Initializes the whitelist of URL schemes. | |
159 static void InitializeSchemeWhitelist(std::set<std::string>* whitelist); | |
160 | |
161 // URL History indexing support functions. | 187 // URL History indexing support functions. |
162 | 188 |
163 // Indexes one URL history item as described by |row|. Returns true if the | 189 // Indexes one URL history item as described by |row|. Returns true if the |
164 // row was actually indexed. | 190 // row was actually indexed. |languages| gives a list of language encodings by |
165 bool IndexRow(const URLRow& row); | 191 // which the URLs and page titles are broken down into words and characters. |
| 192 // |scheme_whitelist| is used to filter non-qualifying schemes. |
| 193 bool IndexRow(const URLRow& row, |
| 194 const std::string& languages, |
| 195 const std::set<std::string>& scheme_whitelist); |
166 | 196 |
167 // Adds the history item in |row| to the index if it does not already already | 197 // Adds the history item in |row| to the index if it does not already already |
168 // exist and it meets the minimum 'quick' criteria. If the row already exists | 198 // exist and it meets the minimum 'quick' criteria. If the row already exists |
169 // in the index then the index will be updated if the row still meets the | 199 // in the index then the index will be updated if the row still meets the |
170 // criteria, otherwise the row will be removed from the index. Returns true | 200 // criteria, otherwise the row will be removed from the index. Returns true |
171 // if the index was actually updated. | 201 // if the index was actually updated. |languages| gives a list of language |
172 bool UpdateURL(const URLRow& row); | 202 // encodings by which the URLs and page titles are broken down into words and |
| 203 // characters. |scheme_whitelist| is used to filter non-qualifying schemes. |
| 204 bool UpdateURL(const URLRow& row, |
| 205 const std::string& languages, |
| 206 const std::set<std::string>& scheme_whitelist); |
173 | 207 |
174 // Deletes indexing data for the history item with the URL given in |url|. | 208 // Deletes indexing data for the history item with the URL given in |url|. |
175 // The item may not have actually been indexed, which is the case if it did | 209 // The item may not have actually been indexed, which is the case if it did |
176 // not previously meet minimum 'quick' criteria. Returns true if the index | 210 // not previously meet minimum 'quick' criteria. Returns true if the index |
177 // was actually updated. | 211 // was actually updated. |
178 bool DeleteURL(const GURL& url); | 212 bool DeleteURL(const GURL& url); |
179 | 213 |
180 // Parses and indexes the words in the URL and page title of |row|. | 214 // Parses and indexes the words in the URL and page title of |row|. |
181 void AddRowWordsToIndex(const URLRow& row); | 215 // |languages| gives a list of language encodings by which the URLs and page |
| 216 // titles are broken down into words and characters. |
| 217 void AddRowWordsToIndex(const URLRow& row, |
| 218 const std::string& languages); |
182 | 219 |
183 // Removes |row| and all associated words and characters from the index. | 220 // Removes |row| and all associated words and characters from the index. |
184 void RemoveRowFromIndex(const URLRow& row); | 221 void RemoveRowFromIndex(const URLRow& row); |
185 | 222 |
186 // Removes all words and characters associated with |row| from the index. | 223 // Removes all words and characters associated with |row| from the index. |
187 void RemoveRowWordsFromIndex(const URLRow& row); | 224 void RemoveRowWordsFromIndex(const URLRow& row); |
188 | 225 |
189 // Given a single word in |uni_word|, adds a reference for the containing | 226 // Given a single word in |uni_word|, adds a reference for the containing |
190 // history item identified by |history_id| to the index. | 227 // history item identified by |history_id| to the index. |
191 void AddWordToIndex(const string16& uni_word, HistoryID history_id); | 228 void AddWordToIndex(const string16& uni_word, HistoryID history_id); |
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
223 const URLRow& row, | 260 const URLRow& row, |
224 const string16& lower_string, | 261 const string16& lower_string, |
225 const String16Vector& terms_vector); | 262 const String16Vector& terms_vector); |
226 | 263 |
227 // Calculates a component score based on position, ordering and total | 264 // Calculates a component score based on position, ordering and total |
228 // substring match size using metrics recorded in |matches|. |max_length| | 265 // substring match size using metrics recorded in |matches|. |max_length| |
229 // is the length of the string against which the terms are being searched. | 266 // is the length of the string against which the terms are being searched. |
230 static int ScoreComponentForMatches(const TermMatches& matches, | 267 static int ScoreComponentForMatches(const TermMatches& matches, |
231 size_t max_length); | 268 size_t max_length); |
232 | 269 |
233 // Determines if |gurl| has a whitelisted scheme and returns true if so. | |
234 bool URLSchemeIsWhitelisted(const GURL& gurl) const; | |
235 | |
236 // Encode a data structure into the protobuf |cache|. | 270 // Encode a data structure into the protobuf |cache|. |
237 void SavePrivateData(imui::InMemoryURLIndexCacheItem* cache) const; | 271 void SavePrivateData(imui::InMemoryURLIndexCacheItem* cache) const; |
238 void SaveWordList(imui::InMemoryURLIndexCacheItem* cache) const; | 272 void SaveWordList(imui::InMemoryURLIndexCacheItem* cache) const; |
239 void SaveWordMap(imui::InMemoryURLIndexCacheItem* cache) const; | 273 void SaveWordMap(imui::InMemoryURLIndexCacheItem* cache) const; |
240 void SaveCharWordMap(imui::InMemoryURLIndexCacheItem* cache) const; | 274 void SaveCharWordMap(imui::InMemoryURLIndexCacheItem* cache) const; |
241 void SaveWordIDHistoryMap(imui::InMemoryURLIndexCacheItem* cache) const; | 275 void SaveWordIDHistoryMap(imui::InMemoryURLIndexCacheItem* cache) const; |
242 void SaveHistoryInfoMap(imui::InMemoryURLIndexCacheItem* cache) const; | 276 void SaveHistoryInfoMap(imui::InMemoryURLIndexCacheItem* cache) const; |
243 | 277 |
244 // Decode a data structure from the protobuf |cache|. Return false if there | 278 // Decode a data structure from the protobuf |cache|. Return false if there |
245 // is any kind of failure. | 279 // is any kind of failure. |
246 bool RestorePrivateData(const imui::InMemoryURLIndexCacheItem& cache); | 280 bool RestorePrivateData(const imui::InMemoryURLIndexCacheItem& cache); |
247 bool RestoreWordList(const imui::InMemoryURLIndexCacheItem& cache); | 281 bool RestoreWordList(const imui::InMemoryURLIndexCacheItem& cache); |
248 bool RestoreWordMap(const imui::InMemoryURLIndexCacheItem& cache); | 282 bool RestoreWordMap(const imui::InMemoryURLIndexCacheItem& cache); |
249 bool RestoreCharWordMap(const imui::InMemoryURLIndexCacheItem& cache); | 283 bool RestoreCharWordMap(const imui::InMemoryURLIndexCacheItem& cache); |
250 bool RestoreWordIDHistoryMap(const imui::InMemoryURLIndexCacheItem& cache); | 284 bool RestoreWordIDHistoryMap(const imui::InMemoryURLIndexCacheItem& cache); |
251 bool RestoreHistoryInfoMap(const imui::InMemoryURLIndexCacheItem& cache); | 285 bool RestoreHistoryInfoMap(const imui::InMemoryURLIndexCacheItem& cache); |
252 | 286 |
| 287 // Determines if |gurl| has a whitelisted scheme and returns true if so. |
| 288 static bool URLSchemeIsWhitelisted(const GURL& gurl, |
| 289 const std::set<std::string>& whitelist); |
| 290 |
253 // Cache of search terms. | 291 // Cache of search terms. |
254 SearchTermCacheMap search_term_cache_; | 292 SearchTermCacheMap search_term_cache_; |
255 | 293 |
256 // Languages used during the word-breaking process during indexing. | |
257 std::string languages_; | |
258 | |
259 // Only URLs with a whitelisted scheme are indexed. | |
260 std::set<std::string> scheme_whitelist_; | |
261 | |
262 // Start of data members that are cached ------------------------------------- | 294 // Start of data members that are cached ------------------------------------- |
263 | 295 |
264 // A list of all of indexed words. The index of a word in this list is the | 296 // A list of all of indexed words. The index of a word in this list is the |
265 // ID of the word in the word_map_. It reduces the memory overhead by | 297 // ID of the word in the word_map_. It reduces the memory overhead by |
266 // replacing a potentially long and repeated string with a simple index. | 298 // replacing a potentially long and repeated string with a simple index. |
267 String16Vector word_list_; | 299 String16Vector word_list_; |
268 | 300 |
269 // A list of available words slots in |word_list_|. An available word slot | 301 // A list of available words slots in |word_list_|. An available word slot |
270 // is the index of a unused word in word_list_ vector, also referred to as | 302 // is the index of a unused word in word_list_ vector, also referred to as |
271 // a WordID. As URL visits are added or modified new words may be added to | 303 // a WordID. As URL visits are added or modified new words may be added to |
(...skipping 30 matching lines...) Expand all Loading... |
302 // Used for unit testing only. Records the number of candidate history items | 334 // Used for unit testing only. Records the number of candidate history items |
303 // at three stages in the index searching process. | 335 // at three stages in the index searching process. |
304 size_t pre_filter_item_count_; // After word index is queried. | 336 size_t pre_filter_item_count_; // After word index is queried. |
305 size_t post_filter_item_count_; // After trimming large result set. | 337 size_t post_filter_item_count_; // After trimming large result set. |
306 size_t post_scoring_item_count_; // After performing final filter/scoring. | 338 size_t post_scoring_item_count_; // After performing final filter/scoring. |
307 }; | 339 }; |
308 | 340 |
309 } // namespace history | 341 } // namespace history |
310 | 342 |
311 #endif // CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_ | 343 #endif // CHROME_BROWSER_HISTORY_URL_INDEX_PRIVATE_DATA_H_ |
OLD | NEW |