OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef CHROME_BROWSER_AUTOCOMPLETE_URL_INDEX_PRIVATE_DATA_H_ | 5 #ifndef CHROME_BROWSER_AUTOCOMPLETE_URL_INDEX_PRIVATE_DATA_H_ |
6 #define CHROME_BROWSER_AUTOCOMPLETE_URL_INDEX_PRIVATE_DATA_H_ | 6 #define CHROME_BROWSER_AUTOCOMPLETE_URL_INDEX_PRIVATE_DATA_H_ |
7 | 7 |
8 #include <set> | 8 #include <set> |
9 #include <string> | 9 #include <string> |
10 | 10 |
11 #include "base/files/file_path.h" | 11 #include "base/files/file_path.h" |
12 #include "base/gtest_prod_util.h" | 12 #include "base/gtest_prod_util.h" |
13 #include "base/memory/ref_counted.h" | 13 #include "base/memory/ref_counted.h" |
14 #include "chrome/browser/autocomplete/in_memory_url_index_cache.pb.h" | 14 #include "chrome/browser/autocomplete/in_memory_url_index_cache.pb.h" |
15 #include "chrome/browser/autocomplete/in_memory_url_index_types.h" | 15 #include "chrome/browser/autocomplete/in_memory_url_index_types.h" |
16 #include "chrome/browser/autocomplete/scored_history_match.h" | 16 #include "chrome/browser/autocomplete/scored_history_match.h" |
17 #include "chrome/browser/history/history_service.h" | 17 #include "chrome/browser/history/history_service.h" |
18 | 18 |
19 class HistoryQuickProviderTest; | 19 class HistoryQuickProviderTest; |
20 | 20 |
21 namespace in_memory_url_index { | 21 namespace in_memory_url_index { |
22 class InMemoryURLIndexCacheItem; | 22 class InMemoryURLIndexCacheItem; |
23 } | 23 } |
24 | 24 |
25 namespace history { | 25 namespace history { |
26 | |
27 namespace imui = in_memory_url_index; | |
28 | |
29 class HistoryDatabase; | 26 class HistoryDatabase; |
30 class InMemoryURLIndex; | 27 class InMemoryURLIndex; |
31 class RefCountedBool; | 28 class RefCountedBool; |
| 29 } |
32 | 30 |
33 // Current version of the cache file. | 31 // Current version of the cache file. |
34 static const int kCurrentCacheFileVersion = 5; | 32 static const int kCurrentCacheFileVersion = 5; |
35 | 33 |
36 // A structure private to InMemoryURLIndex describing its internal data and | 34 // A structure private to InMemoryURLIndex describing its internal data and |
37 // providing for restoring, rebuilding and updating that internal data. As | 35 // providing for restoring, rebuilding and updating that internal data. As |
38 // this class is for exclusive use by the InMemoryURLIndex class there should | 36 // this class is for exclusive use by the InMemoryURLIndex class there should |
39 // be no calls from any other class. | 37 // be no calls from any other class. |
40 // | 38 // |
41 // All public member functions are called on the main thread unless otherwise | 39 // All public member functions are called on the main thread unless otherwise |
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
74 // Adds the history item in |row| to the index if it does not already already | 72 // Adds the history item in |row| to the index if it does not already already |
75 // exist and it meets the minimum 'quick' criteria. If the row already exists | 73 // exist and it meets the minimum 'quick' criteria. If the row already exists |
76 // in the index then the index will be updated if the row still meets the | 74 // in the index then the index will be updated if the row still meets the |
77 // criteria, otherwise the row will be removed from the index. Returns true | 75 // criteria, otherwise the row will be removed from the index. Returns true |
78 // if the index was actually updated. |languages| gives a list of language | 76 // if the index was actually updated. |languages| gives a list of language |
79 // encodings by which the URLs and page titles are broken down into words and | 77 // encodings by which the URLs and page titles are broken down into words and |
80 // characters. |scheme_whitelist| is used to filter non-qualifying schemes. | 78 // characters. |scheme_whitelist| is used to filter non-qualifying schemes. |
81 // |history_service| is used to schedule an update to the recent visits | 79 // |history_service| is used to schedule an update to the recent visits |
82 // component of this URL's entry in the index. | 80 // component of this URL's entry in the index. |
83 bool UpdateURL(HistoryService* history_service, | 81 bool UpdateURL(HistoryService* history_service, |
84 const URLRow& row, | 82 const history::URLRow& row, |
85 const std::string& languages, | 83 const std::string& languages, |
86 const std::set<std::string>& scheme_whitelist, | 84 const std::set<std::string>& scheme_whitelist, |
87 base::CancelableTaskTracker* tracker); | 85 base::CancelableTaskTracker* tracker); |
88 | 86 |
89 // Updates the entry for |url_id| in the index, replacing its | 87 // Updates the entry for |url_id| in the index, replacing its |
90 // recent visits information with |recent_visits|. If |url_id| | 88 // recent visits information with |recent_visits|. If |url_id| |
91 // is not in the index, does nothing. | 89 // is not in the index, does nothing. |
92 void UpdateRecentVisits(URLID url_id, | 90 void UpdateRecentVisits(history::URLID url_id, |
93 const VisitVector& recent_visits); | 91 const history::VisitVector& recent_visits); |
94 | 92 |
95 // Using |history_service| schedules an update (using the historyDB | 93 // Using |history_service| schedules an update (using the historyDB |
96 // thread) for the recent visits information for |url_id|. Unless | 94 // thread) for the recent visits information for |url_id|. Unless |
97 // something unexpectedly goes wrong, UdpateRecentVisits() should | 95 // something unexpectedly goes wrong, UdpateRecentVisits() should |
98 // eventually be called from a callback. | 96 // eventually be called from a callback. |
99 void ScheduleUpdateRecentVisits(HistoryService* history_service, | 97 void ScheduleUpdateRecentVisits(HistoryService* history_service, |
100 URLID url_id, | 98 history::URLID url_id, |
101 base::CancelableTaskTracker* tracker); | 99 base::CancelableTaskTracker* tracker); |
102 | 100 |
103 // Deletes index data for the history item with the given |url|. | 101 // Deletes index data for the history item with the given |url|. |
104 // The item may not have actually been indexed, which is the case if it did | 102 // The item may not have actually been indexed, which is the case if it did |
105 // not previously meet minimum 'quick' criteria. Returns true if the index | 103 // not previously meet minimum 'quick' criteria. Returns true if the index |
106 // was actually updated. | 104 // was actually updated. |
107 bool DeleteURL(const GURL& url); | 105 bool DeleteURL(const GURL& url); |
108 | 106 |
109 // Constructs a new object by restoring its contents from the cache file | 107 // Constructs a new object by restoring its contents from the cache file |
110 // at |path|. Returns the new URLIndexPrivateData which on success will | 108 // at |path|. Returns the new URLIndexPrivateData which on success will |
111 // contain the restored data but upon failure will be empty. |languages| | 109 // contain the restored data but upon failure will be empty. |languages| |
112 // is used to break URLs and page titles into words. This function | 110 // is used to break URLs and page titles into words. This function |
113 // should be run on the the file thread. | 111 // should be run on the the file thread. |
114 static scoped_refptr<URLIndexPrivateData> RestoreFromFile( | 112 static scoped_refptr<URLIndexPrivateData> RestoreFromFile( |
115 const base::FilePath& path, | 113 const base::FilePath& path, |
116 const std::string& languages); | 114 const std::string& languages); |
117 | 115 |
118 // Constructs a new object by rebuilding its contents from the history | 116 // Constructs a new object by rebuilding its contents from the history |
119 // database in |history_db|. Returns the new URLIndexPrivateData which on | 117 // database in |history_db|. Returns the new URLIndexPrivateData which on |
120 // success will contain the rebuilt data but upon failure will be empty. | 118 // success will contain the rebuilt data but upon failure will be empty. |
121 // |languages| gives a list of language encodings by which the URLs and page | 119 // |languages| gives a list of language encodings by which the URLs and page |
122 // titles are broken down into words and characters. | 120 // titles are broken down into words and characters. |
123 static scoped_refptr<URLIndexPrivateData> RebuildFromHistory( | 121 static scoped_refptr<URLIndexPrivateData> RebuildFromHistory( |
124 HistoryDatabase* history_db, | 122 history::HistoryDatabase* history_db, |
125 const std::string& languages, | 123 const std::string& languages, |
126 const std::set<std::string>& scheme_whitelist); | 124 const std::set<std::string>& scheme_whitelist); |
127 | 125 |
128 // Writes |private_data| as a cache file to |file_path| and returns success. | 126 // Writes |private_data| as a cache file to |file_path| and returns success. |
129 static bool WritePrivateDataToCacheFileTask( | 127 static bool WritePrivateDataToCacheFileTask( |
130 scoped_refptr<URLIndexPrivateData> private_data, | 128 scoped_refptr<URLIndexPrivateData> private_data, |
131 const base::FilePath& file_path); | 129 const base::FilePath& file_path); |
132 | 130 |
133 // Creates a copy of ourself. | 131 // Creates a copy of ourself. |
134 scoped_refptr<URLIndexPrivateData> Duplicate() const; | 132 scoped_refptr<URLIndexPrivateData> Duplicate() const; |
(...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
219 // candidate results set is too large. | 217 // candidate results set is too large. |
220 class HistoryItemFactorGreater | 218 class HistoryItemFactorGreater |
221 : public std::binary_function<HistoryID, HistoryID, void> { | 219 : public std::binary_function<HistoryID, HistoryID, void> { |
222 public: | 220 public: |
223 explicit HistoryItemFactorGreater(const HistoryInfoMap& history_info_map); | 221 explicit HistoryItemFactorGreater(const HistoryInfoMap& history_info_map); |
224 ~HistoryItemFactorGreater(); | 222 ~HistoryItemFactorGreater(); |
225 | 223 |
226 bool operator()(const HistoryID h1, const HistoryID h2); | 224 bool operator()(const HistoryID h1, const HistoryID h2); |
227 | 225 |
228 private: | 226 private: |
229 const history::HistoryInfoMap& history_info_map_; | 227 const HistoryInfoMap& history_info_map_; |
230 }; | 228 }; |
231 | 229 |
232 // URL History indexing support functions. | 230 // URL History indexing support functions. |
233 | 231 |
234 // Composes a set of history item IDs by intersecting the set for each word | 232 // Composes a set of history item IDs by intersecting the set for each word |
235 // in |unsorted_words|. | 233 // in |unsorted_words|. |
236 HistoryIDSet HistoryIDSetFromWords(const String16Vector& unsorted_words); | 234 HistoryIDSet HistoryIDSetFromWords(const String16Vector& unsorted_words); |
237 | 235 |
238 // Helper function to HistoryIDSetFromWords which composes a set of history | 236 // Helper function to HistoryIDSetFromWords which composes a set of history |
239 // ids for the given term given in |term|. | 237 // ids for the given term given in |term|. |
240 HistoryIDSet HistoryIDsForTerm(const base::string16& term); | 238 HistoryIDSet HistoryIDsForTerm(const base::string16& term); |
241 | 239 |
242 // Given a set of Char16s, finds words containing those characters. | 240 // Given a set of Char16s, finds words containing those characters. |
243 WordIDSet WordIDSetForTermChars(const Char16Set& term_chars); | 241 WordIDSet WordIDSetForTermChars(const Char16Set& term_chars); |
244 | 242 |
245 // Indexes one URL history item as described by |row|. Returns true if the | 243 // Indexes one URL history item as described by |row|. Returns true if the |
246 // row was actually indexed. |languages| gives a list of language encodings by | 244 // row was actually indexed. |languages| gives a list of language encodings by |
247 // which the URLs and page titles are broken down into words and characters. | 245 // which the URLs and page titles are broken down into words and characters. |
248 // |scheme_whitelist| is used to filter non-qualifying schemes. If | 246 // |scheme_whitelist| is used to filter non-qualifying schemes. If |
249 // |history_db| is not NULL then this function uses the history database | 247 // |history_db| is not NULL then this function uses the history database |
250 // synchronously to get the URL's recent visits information. This mode should | 248 // synchronously to get the URL's recent visits information. This mode should |
251 // only be used on the historyDB thread. If |history_db| is NULL, then | 249 // only be used on the historyDB thread. If |history_db| is NULL, then |
252 // this function uses |history_service| to schedule a task on the | 250 // this function uses |history_service| to schedule a task on the |
253 // historyDB thread to fetch and update the recent visits | 251 // historyDB thread to fetch and update the recent visits |
254 // information. | 252 // information. |
255 bool IndexRow(HistoryDatabase* history_db, | 253 bool IndexRow(history::HistoryDatabase* history_db, |
256 HistoryService* history_service, | 254 HistoryService* history_service, |
257 const URLRow& row, | 255 const history::URLRow& row, |
258 const std::string& languages, | 256 const std::string& languages, |
259 const std::set<std::string>& scheme_whitelist, | 257 const std::set<std::string>& scheme_whitelist, |
260 base::CancelableTaskTracker* tracker); | 258 base::CancelableTaskTracker* tracker); |
261 | 259 |
262 // Parses and indexes the words in the URL and page title of |row| and | 260 // Parses and indexes the words in the URL and page title of |row| and |
263 // calculate the word starts in each, saving the starts in |word_starts|. | 261 // calculate the word starts in each, saving the starts in |word_starts|. |
264 // |languages| gives a list of language encodings by which the URLs and page | 262 // |languages| gives a list of language encodings by which the URLs and page |
265 // titles are broken down into words and characters. | 263 // titles are broken down into words and characters. |
266 void AddRowWordsToIndex(const URLRow& row, | 264 void AddRowWordsToIndex(const history::URLRow& row, |
267 RowWordStarts* word_starts, | 265 RowWordStarts* word_starts, |
268 const std::string& languages); | 266 const std::string& languages); |
269 | 267 |
270 // Given a single word in |uni_word|, adds a reference for the containing | 268 // Given a single word in |uni_word|, adds a reference for the containing |
271 // history item identified by |history_id| to the index. | 269 // history item identified by |history_id| to the index. |
272 void AddWordToIndex(const base::string16& uni_word, HistoryID history_id); | 270 void AddWordToIndex(const base::string16& uni_word, HistoryID history_id); |
273 | 271 |
274 // Creates a new entry in the word/history map for |word_id| and add | 272 // Creates a new entry in the word/history map for |word_id| and add |
275 // |history_id| as the initial element of the word's set. | 273 // |history_id| as the initial element of the word's set. |
276 void AddWordHistory(const base::string16& uni_word, HistoryID history_id); | 274 void AddWordHistory(const base::string16& uni_word, HistoryID history_id); |
277 | 275 |
278 // Updates an existing entry in the word/history index by adding the | 276 // Updates an existing entry in the word/history index by adding the |
279 // |history_id| to set for |word_id| in the word_id_history_map_. | 277 // |history_id| to set for |word_id| in the word_id_history_map_. |
280 void UpdateWordHistory(WordID word_id, HistoryID history_id); | 278 void UpdateWordHistory(WordID word_id, HistoryID history_id); |
281 | 279 |
282 // Adds |word_id| to |history_id|'s entry in the history/word map, | 280 // Adds |word_id| to |history_id|'s entry in the history/word map, |
283 // creating a new entry if one does not already exist. | 281 // creating a new entry if one does not already exist. |
284 void AddToHistoryIDWordMap(HistoryID history_id, WordID word_id); | 282 void AddToHistoryIDWordMap(HistoryID history_id, WordID word_id); |
285 | 283 |
286 // Removes |row| and all associated words and characters from the index. | 284 // Removes |row| and all associated words and characters from the index. |
287 void RemoveRowFromIndex(const URLRow& row); | 285 void RemoveRowFromIndex(const history::URLRow& row); |
288 | 286 |
289 // Removes all words and characters associated with |row| from the index. | 287 // Removes all words and characters associated with |row| from the index. |
290 void RemoveRowWordsFromIndex(const URLRow& row); | 288 void RemoveRowWordsFromIndex(const history::URLRow& row); |
291 | 289 |
292 // Clears |used_| for each item in the search term cache. | 290 // Clears |used_| for each item in the search term cache. |
293 void ResetSearchTermCache(); | 291 void ResetSearchTermCache(); |
294 | 292 |
295 // Caches the index private data and writes the cache file to the profile | 293 // Caches the index private data and writes the cache file to the profile |
296 // directory. Called by WritePrivateDataToCacheFileTask. | 294 // directory. Called by WritePrivateDataToCacheFileTask. |
297 bool SaveToFile(const base::FilePath& file_path); | 295 bool SaveToFile(const base::FilePath& file_path); |
298 | 296 |
299 // Encode a data structure into the protobuf |cache|. | 297 // Encode a data structure into the protobuf |cache|. |
300 void SavePrivateData(imui::InMemoryURLIndexCacheItem* cache) const; | 298 void SavePrivateData( |
301 void SaveWordList(imui::InMemoryURLIndexCacheItem* cache) const; | 299 in_memory_url_index::InMemoryURLIndexCacheItem* cache) const; |
302 void SaveWordMap(imui::InMemoryURLIndexCacheItem* cache) const; | 300 void SaveWordList( |
303 void SaveCharWordMap(imui::InMemoryURLIndexCacheItem* cache) const; | 301 in_memory_url_index::InMemoryURLIndexCacheItem* cache) const; |
304 void SaveWordIDHistoryMap(imui::InMemoryURLIndexCacheItem* cache) const; | 302 void SaveWordMap(in_memory_url_index::InMemoryURLIndexCacheItem* cache) const; |
305 void SaveHistoryInfoMap(imui::InMemoryURLIndexCacheItem* cache) const; | 303 void SaveCharWordMap( |
306 void SaveWordStartsMap(imui::InMemoryURLIndexCacheItem* cache) const; | 304 in_memory_url_index::InMemoryURLIndexCacheItem* cache) const; |
| 305 void SaveWordIDHistoryMap( |
| 306 in_memory_url_index::InMemoryURLIndexCacheItem* cache) const; |
| 307 void SaveHistoryInfoMap( |
| 308 in_memory_url_index::InMemoryURLIndexCacheItem* cache) const; |
| 309 void SaveWordStartsMap( |
| 310 in_memory_url_index::InMemoryURLIndexCacheItem* cache) const; |
307 | 311 |
308 // Decode a data structure from the protobuf |cache|. Return false if there | 312 // Decode a data structure from the protobuf |cache|. Return false if there |
309 // is any kind of failure. |languages| will be used to break URLs and page | 313 // is any kind of failure. |languages| will be used to break URLs and page |
310 // titles into words | 314 // titles into words |
311 bool RestorePrivateData(const imui::InMemoryURLIndexCacheItem& cache, | 315 bool RestorePrivateData( |
312 const std::string& languages); | 316 const in_memory_url_index::InMemoryURLIndexCacheItem& cache, |
313 bool RestoreWordList(const imui::InMemoryURLIndexCacheItem& cache); | 317 const std::string& languages); |
314 bool RestoreWordMap(const imui::InMemoryURLIndexCacheItem& cache); | 318 bool RestoreWordList( |
315 bool RestoreCharWordMap(const imui::InMemoryURLIndexCacheItem& cache); | 319 const in_memory_url_index::InMemoryURLIndexCacheItem& cache); |
316 bool RestoreWordIDHistoryMap(const imui::InMemoryURLIndexCacheItem& cache); | 320 bool RestoreWordMap( |
317 bool RestoreHistoryInfoMap(const imui::InMemoryURLIndexCacheItem& cache); | 321 const in_memory_url_index::InMemoryURLIndexCacheItem& cache); |
318 bool RestoreWordStartsMap(const imui::InMemoryURLIndexCacheItem& cache, | 322 bool RestoreCharWordMap( |
319 const std::string& languages); | 323 const in_memory_url_index::InMemoryURLIndexCacheItem& cache); |
| 324 bool RestoreWordIDHistoryMap( |
| 325 const in_memory_url_index::InMemoryURLIndexCacheItem& cache); |
| 326 bool RestoreHistoryInfoMap( |
| 327 const in_memory_url_index::InMemoryURLIndexCacheItem& cache); |
| 328 bool RestoreWordStartsMap( |
| 329 const in_memory_url_index::InMemoryURLIndexCacheItem& cache, |
| 330 const std::string& languages); |
320 | 331 |
321 // Determines if |gurl| has a whitelisted scheme and returns true if so. | 332 // Determines if |gurl| has a whitelisted scheme and returns true if so. |
322 static bool URLSchemeIsWhitelisted(const GURL& gurl, | 333 static bool URLSchemeIsWhitelisted(const GURL& gurl, |
323 const std::set<std::string>& whitelist); | 334 const std::set<std::string>& whitelist); |
324 | 335 |
325 // Cache of search terms. | 336 // Cache of search terms. |
326 SearchTermCacheMap search_term_cache_; | 337 SearchTermCacheMap search_term_cache_; |
327 | 338 |
328 // Start of data members that are cached ------------------------------------- | 339 // Start of data members that are cached ------------------------------------- |
329 | 340 |
(...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
382 // restore. | 393 // restore. |
383 int saved_cache_version_; | 394 int saved_cache_version_; |
384 | 395 |
385 // Used for unit testing only. Records the number of candidate history items | 396 // Used for unit testing only. Records the number of candidate history items |
386 // at three stages in the index searching process. | 397 // at three stages in the index searching process. |
387 size_t pre_filter_item_count_; // After word index is queried. | 398 size_t pre_filter_item_count_; // After word index is queried. |
388 size_t post_filter_item_count_; // After trimming large result set. | 399 size_t post_filter_item_count_; // After trimming large result set. |
389 size_t post_scoring_item_count_; // After performing final filter/scoring. | 400 size_t post_scoring_item_count_; // After performing final filter/scoring. |
390 }; | 401 }; |
391 | 402 |
392 } // namespace history | |
393 | |
394 #endif // CHROME_BROWSER_AUTOCOMPLETE_URL_INDEX_PRIVATE_DATA_H_ | 403 #endif // CHROME_BROWSER_AUTOCOMPLETE_URL_INDEX_PRIVATE_DATA_H_ |
OLD | NEW |