OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef COMPONENTS_OMNIBOX_BROWSER_URL_INDEX_PRIVATE_DATA_H_ | 5 #ifndef COMPONENTS_OMNIBOX_BROWSER_URL_INDEX_PRIVATE_DATA_H_ |
6 #define COMPONENTS_OMNIBOX_BROWSER_URL_INDEX_PRIVATE_DATA_H_ | 6 #define COMPONENTS_OMNIBOX_BROWSER_URL_INDEX_PRIVATE_DATA_H_ |
7 | 7 |
8 #include <stddef.h> | 8 #include <stddef.h> |
9 | 9 |
10 #include <set> | 10 #include <set> |
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
59 // set). Once we have a set of candidates, they are filtered to ensure | 59 // set). Once we have a set of candidates, they are filtered to ensure |
60 // that all |term_string| terms, as separated by whitespace and the | 60 // that all |term_string| terms, as separated by whitespace and the |
61 // cursor (if set), occur within the candidate's URL or page title. | 61 // cursor (if set), occur within the candidate's URL or page title. |
62 // Scores are then calculated on no more than |kItemsToScoreLimit| | 62 // Scores are then calculated on no more than |kItemsToScoreLimit| |
63 // candidates, as the scoring of such a large number of candidates may | 63 // candidates, as the scoring of such a large number of candidates may |
64 // cause perceptible typing response delays in the omnibox. This is | 64 // cause perceptible typing response delays in the omnibox. This is |
65 // likely to occur for short omnibox terms such as 'h' and 'w' which | 65 // likely to occur for short omnibox terms such as 'h' and 'w' which |
66 // will be found in nearly all history candidates. Results are sorted by | 66 // will be found in nearly all history candidates. Results are sorted by |
67 // descending score. The full results set (i.e. beyond the | 67 // descending score. The full results set (i.e. beyond the |
68 // |kItemsToScoreLimit| limit) will be retained and used for subsequent calls | 68 // |kItemsToScoreLimit| limit) will be retained and used for subsequent calls |
69 // to this function. |languages| is used to help parse/format the URLs in the | 69 // to this function. In total, |max_matches| of items will be returned in the |
70 // history index. In total, |max_matches| of items will be returned in the | |
71 // |ScoredHistoryMatches| vector. | 70 // |ScoredHistoryMatches| vector. |
72 ScoredHistoryMatches HistoryItemsForTerms( | 71 ScoredHistoryMatches HistoryItemsForTerms( |
73 base::string16 term_string, | 72 base::string16 term_string, |
74 size_t cursor_position, | 73 size_t cursor_position, |
75 size_t max_matches, | 74 size_t max_matches, |
76 const std::string& languages, | |
77 bookmarks::BookmarkModel* bookmark_model, | 75 bookmarks::BookmarkModel* bookmark_model, |
78 TemplateURLService* template_url_service); | 76 TemplateURLService* template_url_service); |
79 | 77 |
80 // Adds the history item in |row| to the index if it does not already already | 78 // Adds the history item in |row| to the index if it does not already already |
81 // exist and it meets the minimum 'quick' criteria. If the row already exists | 79 // exist and it meets the minimum 'quick' criteria. If the row already exists |
82 // in the index then the index will be updated if the row still meets the | 80 // in the index then the index will be updated if the row still meets the |
83 // criteria, otherwise the row will be removed from the index. Returns true | 81 // criteria, otherwise the row will be removed from the index. Returns true |
84 // if the index was actually updated. |languages| gives a list of language | 82 // if the index was actually updated. |scheme_whitelist| is used to filter |
85 // encodings by which the URLs and page titles are broken down into words and | 83 // non-qualifying schemes. |history_service| is used to schedule an update to |
86 // characters. |scheme_whitelist| is used to filter non-qualifying schemes. | 84 // the recent visits component of this URL's entry in the index. |
87 // |history_service| is used to schedule an update to the recent visits | |
88 // component of this URL's entry in the index. | |
89 bool UpdateURL(history::HistoryService* history_service, | 85 bool UpdateURL(history::HistoryService* history_service, |
90 const history::URLRow& row, | 86 const history::URLRow& row, |
91 const std::string& languages, | |
92 const std::set<std::string>& scheme_whitelist, | 87 const std::set<std::string>& scheme_whitelist, |
93 base::CancelableTaskTracker* tracker); | 88 base::CancelableTaskTracker* tracker); |
94 | 89 |
95 // Updates the entry for |url_id| in the index, replacing its | 90 // Updates the entry for |url_id| in the index, replacing its |
96 // recent visits information with |recent_visits|. If |url_id| | 91 // recent visits information with |recent_visits|. If |url_id| |
97 // is not in the index, does nothing. | 92 // is not in the index, does nothing. |
98 void UpdateRecentVisits(history::URLID url_id, | 93 void UpdateRecentVisits(history::URLID url_id, |
99 const history::VisitVector& recent_visits); | 94 const history::VisitVector& recent_visits); |
100 | 95 |
101 // Using |history_service| schedules an update (using the historyDB | 96 // Using |history_service| schedules an update (using the historyDB |
102 // thread) for the recent visits information for |url_id|. Unless | 97 // thread) for the recent visits information for |url_id|. Unless |
103 // something unexpectedly goes wrong, UdpateRecentVisits() should | 98 // something unexpectedly goes wrong, UdpateRecentVisits() should |
104 // eventually be called from a callback. | 99 // eventually be called from a callback. |
105 void ScheduleUpdateRecentVisits(history::HistoryService* history_service, | 100 void ScheduleUpdateRecentVisits(history::HistoryService* history_service, |
106 history::URLID url_id, | 101 history::URLID url_id, |
107 base::CancelableTaskTracker* tracker); | 102 base::CancelableTaskTracker* tracker); |
108 | 103 |
109 // Deletes index data for the history item with the given |url|. | 104 // Deletes index data for the history item with the given |url|. |
110 // The item may not have actually been indexed, which is the case if it did | 105 // The item may not have actually been indexed, which is the case if it did |
111 // not previously meet minimum 'quick' criteria. Returns true if the index | 106 // not previously meet minimum 'quick' criteria. Returns true if the index |
112 // was actually updated. | 107 // was actually updated. |
113 bool DeleteURL(const GURL& url); | 108 bool DeleteURL(const GURL& url); |
114 | 109 |
115 // Constructs a new object by restoring its contents from the cache file | 110 // Constructs a new object by restoring its contents from the cache file |
116 // at |path|. Returns the new URLIndexPrivateData which on success will | 111 // at |path|. Returns the new URLIndexPrivateData which on success will |
117 // contain the restored data but upon failure will be empty. |languages| | 112 // contain the restored data but upon failure will be empty. |
118 // is used to break URLs and page titles into words. This function | 113 // This function should be run on the the file thread. |
119 // should be run on the the file thread. | |
120 static scoped_refptr<URLIndexPrivateData> RestoreFromFile( | 114 static scoped_refptr<URLIndexPrivateData> RestoreFromFile( |
121 const base::FilePath& path, | 115 const base::FilePath& path); |
122 const std::string& languages); | |
123 | 116 |
124 // Constructs a new object by rebuilding its contents from the history | 117 // Constructs a new object by rebuilding its contents from the history |
125 // database in |history_db|. Returns the new URLIndexPrivateData which on | 118 // database in |history_db|. Returns the new URLIndexPrivateData which on |
126 // success will contain the rebuilt data but upon failure will be empty. | 119 // success will contain the rebuilt data but upon failure will be empty. |
127 // |languages| gives a list of language encodings by which the URLs and page | |
128 // titles are broken down into words and characters. | |
129 static scoped_refptr<URLIndexPrivateData> RebuildFromHistory( | 120 static scoped_refptr<URLIndexPrivateData> RebuildFromHistory( |
130 history::HistoryDatabase* history_db, | 121 history::HistoryDatabase* history_db, |
131 const std::string& languages, | |
132 const std::set<std::string>& scheme_whitelist); | 122 const std::set<std::string>& scheme_whitelist); |
133 | 123 |
134 // Writes |private_data| as a cache file to |file_path| and returns success. | 124 // Writes |private_data| as a cache file to |file_path| and returns success. |
135 static bool WritePrivateDataToCacheFileTask( | 125 static bool WritePrivateDataToCacheFileTask( |
136 scoped_refptr<URLIndexPrivateData> private_data, | 126 scoped_refptr<URLIndexPrivateData> private_data, |
137 const base::FilePath& file_path); | 127 const base::FilePath& file_path); |
138 | 128 |
139 // Creates a copy of ourself. | 129 // Creates a copy of ourself. |
140 scoped_refptr<URLIndexPrivateData> Duplicate() const; | 130 scoped_refptr<URLIndexPrivateData> Duplicate() const; |
141 | 131 |
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
196 }; | 186 }; |
197 typedef std::map<base::string16, SearchTermCacheItem> SearchTermCacheMap; | 187 typedef std::map<base::string16, SearchTermCacheItem> SearchTermCacheMap; |
198 | 188 |
199 // A helper class which performs the final filter on each candidate | 189 // A helper class which performs the final filter on each candidate |
200 // history URL match, inserting accepted matches into |scored_matches_|. | 190 // history URL match, inserting accepted matches into |scored_matches_|. |
201 class AddHistoryMatch { | 191 class AddHistoryMatch { |
202 public: | 192 public: |
203 AddHistoryMatch(bookmarks::BookmarkModel* bookmark_model, | 193 AddHistoryMatch(bookmarks::BookmarkModel* bookmark_model, |
204 TemplateURLService* template_url_service, | 194 TemplateURLService* template_url_service, |
205 const URLIndexPrivateData& private_data, | 195 const URLIndexPrivateData& private_data, |
206 const std::string& languages, | |
207 const base::string16& lower_string, | 196 const base::string16& lower_string, |
208 const String16Vector& lower_terms, | 197 const String16Vector& lower_terms, |
209 const base::Time now); | 198 const base::Time now); |
210 AddHistoryMatch(const AddHistoryMatch& other); | 199 AddHistoryMatch(const AddHistoryMatch& other); |
211 ~AddHistoryMatch(); | 200 ~AddHistoryMatch(); |
212 | 201 |
213 void operator()(const HistoryID history_id); | 202 void operator()(const HistoryID history_id); |
214 | 203 |
215 ScoredHistoryMatches ScoredMatches() const { return scored_matches_; } | 204 ScoredHistoryMatches ScoredMatches() const { return scored_matches_; } |
216 | 205 |
217 private: | 206 private: |
218 friend class InMemoryURLIndexTest; | 207 friend class InMemoryURLIndexTest; |
219 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, AddHistoryMatch); | 208 FRIEND_TEST_ALL_PREFIXES(InMemoryURLIndexTest, AddHistoryMatch); |
220 bookmarks::BookmarkModel* bookmark_model_; | 209 bookmarks::BookmarkModel* bookmark_model_; |
221 TemplateURLService* template_url_service_; | 210 TemplateURLService* template_url_service_; |
222 const URLIndexPrivateData& private_data_; | 211 const URLIndexPrivateData& private_data_; |
223 const std::string& languages_; | |
224 ScoredHistoryMatches scored_matches_; | 212 ScoredHistoryMatches scored_matches_; |
225 const base::string16& lower_string_; | 213 const base::string16& lower_string_; |
226 const String16Vector& lower_terms_; | 214 const String16Vector& lower_terms_; |
227 WordStarts lower_terms_to_word_starts_offsets_; | 215 WordStarts lower_terms_to_word_starts_offsets_; |
228 const base::Time now_; | 216 const base::Time now_; |
229 }; | 217 }; |
230 | 218 |
231 // A helper predicate class used to filter excess history items when the | 219 // A helper predicate class used to filter excess history items when the |
232 // candidate results set is too large. | 220 // candidate results set is too large. |
233 class HistoryItemFactorGreater { | 221 class HistoryItemFactorGreater { |
(...skipping 14 matching lines...) Expand all Loading... |
248 HistoryIDSet HistoryIDSetFromWords(const String16Vector& unsorted_words); | 236 HistoryIDSet HistoryIDSetFromWords(const String16Vector& unsorted_words); |
249 | 237 |
250 // Helper function to HistoryIDSetFromWords which composes a set of history | 238 // Helper function to HistoryIDSetFromWords which composes a set of history |
251 // ids for the given term given in |term|. | 239 // ids for the given term given in |term|. |
252 HistoryIDSet HistoryIDsForTerm(const base::string16& term); | 240 HistoryIDSet HistoryIDsForTerm(const base::string16& term); |
253 | 241 |
254 // Given a set of Char16s, finds words containing those characters. | 242 // Given a set of Char16s, finds words containing those characters. |
255 WordIDSet WordIDSetForTermChars(const Char16Set& term_chars); | 243 WordIDSet WordIDSetForTermChars(const Char16Set& term_chars); |
256 | 244 |
257 // Indexes one URL history item as described by |row|. Returns true if the | 245 // Indexes one URL history item as described by |row|. Returns true if the |
258 // row was actually indexed. |languages| gives a list of language encodings by | 246 // row was actually indexed. |scheme_whitelist| is used to filter |
259 // which the URLs and page titles are broken down into words and characters. | 247 // non-qualifying schemes. If |history_db| is not NULL then this function |
260 // |scheme_whitelist| is used to filter non-qualifying schemes. If | 248 // uses the history database synchronously to get the URL's recent visits |
261 // |history_db| is not NULL then this function uses the history database | 249 // information. This mode should/ only be used on the historyDB thread. |
262 // synchronously to get the URL's recent visits information. This mode should | 250 // If |history_db| is NULL, then this function uses |history_service| to |
263 // only be used on the historyDB thread. If |history_db| is NULL, then | 251 // schedule a task on the historyDB thread to fetch and update the recent |
264 // this function uses |history_service| to schedule a task on the | 252 // visits information. |
265 // historyDB thread to fetch and update the recent visits | |
266 // information. | |
267 bool IndexRow(history::HistoryDatabase* history_db, | 253 bool IndexRow(history::HistoryDatabase* history_db, |
268 history::HistoryService* history_service, | 254 history::HistoryService* history_service, |
269 const history::URLRow& row, | 255 const history::URLRow& row, |
270 const std::string& languages, | |
271 const std::set<std::string>& scheme_whitelist, | 256 const std::set<std::string>& scheme_whitelist, |
272 base::CancelableTaskTracker* tracker); | 257 base::CancelableTaskTracker* tracker); |
273 | 258 |
274 // Parses and indexes the words in the URL and page title of |row| and | 259 // Parses and indexes the words in the URL and page title of |row| and |
275 // calculate the word starts in each, saving the starts in |word_starts|. | 260 // calculate the word starts in each, saving the starts in |word_starts|. |
276 // |languages| gives a list of language encodings by which the URLs and page | |
277 // titles are broken down into words and characters. | |
278 void AddRowWordsToIndex(const history::URLRow& row, | 261 void AddRowWordsToIndex(const history::URLRow& row, |
279 RowWordStarts* word_starts, | 262 RowWordStarts* word_starts); |
280 const std::string& languages); | |
281 | 263 |
282 // Given a single word in |uni_word|, adds a reference for the containing | 264 // Given a single word in |uni_word|, adds a reference for the containing |
283 // history item identified by |history_id| to the index. | 265 // history item identified by |history_id| to the index. |
284 void AddWordToIndex(const base::string16& uni_word, HistoryID history_id); | 266 void AddWordToIndex(const base::string16& uni_word, HistoryID history_id); |
285 | 267 |
286 // Creates a new entry in the word/history map for |word_id| and add | 268 // Creates a new entry in the word/history map for |word_id| and add |
287 // |history_id| as the initial element of the word's set. | 269 // |history_id| as the initial element of the word's set. |
288 void AddWordHistory(const base::string16& uni_word, HistoryID history_id); | 270 void AddWordHistory(const base::string16& uni_word, HistoryID history_id); |
289 | 271 |
290 // Updates an existing entry in the word/history index by adding the | 272 // Updates an existing entry in the word/history index by adding the |
(...skipping 26 matching lines...) Expand all Loading... |
317 void SaveCharWordMap( | 299 void SaveCharWordMap( |
318 in_memory_url_index::InMemoryURLIndexCacheItem* cache) const; | 300 in_memory_url_index::InMemoryURLIndexCacheItem* cache) const; |
319 void SaveWordIDHistoryMap( | 301 void SaveWordIDHistoryMap( |
320 in_memory_url_index::InMemoryURLIndexCacheItem* cache) const; | 302 in_memory_url_index::InMemoryURLIndexCacheItem* cache) const; |
321 void SaveHistoryInfoMap( | 303 void SaveHistoryInfoMap( |
322 in_memory_url_index::InMemoryURLIndexCacheItem* cache) const; | 304 in_memory_url_index::InMemoryURLIndexCacheItem* cache) const; |
323 void SaveWordStartsMap( | 305 void SaveWordStartsMap( |
324 in_memory_url_index::InMemoryURLIndexCacheItem* cache) const; | 306 in_memory_url_index::InMemoryURLIndexCacheItem* cache) const; |
325 | 307 |
326 // Decode a data structure from the protobuf |cache|. Return false if there | 308 // Decode a data structure from the protobuf |cache|. Return false if there |
327 // is any kind of failure. |languages| will be used to break URLs and page | 309 // is any kind of failure. |
328 // titles into words | |
329 bool RestorePrivateData( | 310 bool RestorePrivateData( |
330 const in_memory_url_index::InMemoryURLIndexCacheItem& cache, | 311 const in_memory_url_index::InMemoryURLIndexCacheItem& cache); |
331 const std::string& languages); | |
332 bool RestoreWordList( | 312 bool RestoreWordList( |
333 const in_memory_url_index::InMemoryURLIndexCacheItem& cache); | 313 const in_memory_url_index::InMemoryURLIndexCacheItem& cache); |
334 bool RestoreWordMap( | 314 bool RestoreWordMap( |
335 const in_memory_url_index::InMemoryURLIndexCacheItem& cache); | 315 const in_memory_url_index::InMemoryURLIndexCacheItem& cache); |
336 bool RestoreCharWordMap( | 316 bool RestoreCharWordMap( |
337 const in_memory_url_index::InMemoryURLIndexCacheItem& cache); | 317 const in_memory_url_index::InMemoryURLIndexCacheItem& cache); |
338 bool RestoreWordIDHistoryMap( | 318 bool RestoreWordIDHistoryMap( |
339 const in_memory_url_index::InMemoryURLIndexCacheItem& cache); | 319 const in_memory_url_index::InMemoryURLIndexCacheItem& cache); |
340 bool RestoreHistoryInfoMap( | 320 bool RestoreHistoryInfoMap( |
341 const in_memory_url_index::InMemoryURLIndexCacheItem& cache); | 321 const in_memory_url_index::InMemoryURLIndexCacheItem& cache); |
342 bool RestoreWordStartsMap( | 322 bool RestoreWordStartsMap( |
343 const in_memory_url_index::InMemoryURLIndexCacheItem& cache, | 323 const in_memory_url_index::InMemoryURLIndexCacheItem& cache); |
344 const std::string& languages); | |
345 | 324 |
346 // Determines if |gurl| has a whitelisted scheme and returns true if so. | 325 // Determines if |gurl| has a whitelisted scheme and returns true if so. |
347 static bool URLSchemeIsWhitelisted(const GURL& gurl, | 326 static bool URLSchemeIsWhitelisted(const GURL& gurl, |
348 const std::set<std::string>& whitelist); | 327 const std::set<std::string>& whitelist); |
349 | 328 |
350 // Cache of search terms. | 329 // Cache of search terms. |
351 SearchTermCacheMap search_term_cache_; | 330 SearchTermCacheMap search_term_cache_; |
352 | 331 |
353 // Start of data members that are cached ------------------------------------- | 332 // Start of data members that are cached ------------------------------------- |
354 | 333 |
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
408 int saved_cache_version_; | 387 int saved_cache_version_; |
409 | 388 |
410 // Used for unit testing only. Records the number of candidate history items | 389 // Used for unit testing only. Records the number of candidate history items |
411 // at three stages in the index searching process. | 390 // at three stages in the index searching process. |
412 size_t pre_filter_item_count_; // After word index is queried. | 391 size_t pre_filter_item_count_; // After word index is queried. |
413 size_t post_filter_item_count_; // After trimming large result set. | 392 size_t post_filter_item_count_; // After trimming large result set. |
414 size_t post_scoring_item_count_; // After performing final filter/scoring. | 393 size_t post_scoring_item_count_; // After performing final filter/scoring. |
415 }; | 394 }; |
416 | 395 |
417 #endif // COMPONENTS_OMNIBOX_BROWSER_URL_INDEX_PRIVATE_DATA_H_ | 396 #endif // COMPONENTS_OMNIBOX_BROWSER_URL_INDEX_PRIVATE_DATA_H_ |
OLD | NEW |