OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef COMPONENTS_OMNIBOX_BROWSER_URL_INDEX_PRIVATE_DATA_H_ | 5 #ifndef COMPONENTS_OMNIBOX_BROWSER_URL_INDEX_PRIVATE_DATA_H_ |
6 #define COMPONENTS_OMNIBOX_BROWSER_URL_INDEX_PRIVATE_DATA_H_ | 6 #define COMPONENTS_OMNIBOX_BROWSER_URL_INDEX_PRIVATE_DATA_H_ |
7 | 7 |
8 #include <stddef.h> | 8 #include <stddef.h> |
9 | 9 |
10 #include <set> | 10 #include <set> |
| 11 #include <stack> |
11 #include <string> | 12 #include <string> |
12 | 13 |
13 #include "base/files/file_path.h" | 14 #include "base/files/file_path.h" |
14 #include "base/gtest_prod_util.h" | 15 #include "base/gtest_prod_util.h" |
15 #include "base/memory/ref_counted.h" | 16 #include "base/memory/ref_counted.h" |
16 #include "components/history/core/browser/history_service.h" | 17 #include "components/history/core/browser/history_service.h" |
17 #include "components/omnibox/browser/in_memory_url_index_cache.pb.h" | 18 #include "components/omnibox/browser/in_memory_url_index_cache.pb.h" |
18 #include "components/omnibox/browser/in_memory_url_index_types.h" | 19 #include "components/omnibox/browser/in_memory_url_index_types.h" |
19 #include "components/omnibox/browser/scored_history_match.h" | 20 #include "components/omnibox/browser/scored_history_match.h" |
20 | 21 |
(...skipping 177 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
198 ~HistoryItemFactorGreater(); | 199 ~HistoryItemFactorGreater(); |
199 | 200 |
200 bool operator()(const HistoryID h1, const HistoryID h2); | 201 bool operator()(const HistoryID h1, const HistoryID h2); |
201 | 202 |
202 private: | 203 private: |
203 const HistoryInfoMap& history_info_map_; | 204 const HistoryInfoMap& history_info_map_; |
204 }; | 205 }; |
205 | 206 |
206 // URL History indexing support functions. | 207 // URL History indexing support functions. |
207 | 208 |
208 // Composes a set of history item IDs by intersecting the set for each word | 209 // Composes a vector of history item IDs by intersecting the set for each word |
209 // in |unsorted_words|. | 210 // in |unsorted_words|. |
210 HistoryIDSet HistoryIDSetFromWords(const String16Vector& unsorted_words); | 211 HistoryIDVector HistoryIDsFromWords(const String16Vector& unsorted_words); |
211 | 212 |
212 // Trims the candidate pool in advance of doing proper substring searching, to | 213 // Trims the candidate pool in advance of doing proper substring searching, to |
213 // cap the cost of such searching. Discards the least-relevant items (based on | 214 // cap the cost of such searching. Discards the least-relevant items (based on |
214 // visit stats), which are least likely to score highly in the end. To | 215 // visit stats), which are least likely to score highly in the end. To |
215 // minimize the risk of discarding a valuable URL, the candidate pool is still | 216 // minimize the risk of discarding a valuable URL, the candidate pool is still |
216 // left two orders of magnitude larger than the final number of results | 217 // left two orders of magnitude larger than the final number of results |
217 // returned from the HQP. Returns whether anything was trimmed. | 218 // returned from the HQP. Returns whether anything was trimmed. |
218 bool TrimHistoryIdsPool(HistoryIDSet* history_id_set) const; | 219 bool TrimHistoryIdsPool(HistoryIDVector* history_ids) const; |
219 | 220 |
220 // Helper function to HistoryIDSetFromWords which composes a set of history | 221 // Helper function to HistoryIDSetFromWords which composes a set of history |
221 // ids for the given term given in |term|. | 222 // ids for the given term given in |term|. |
222 HistoryIDSet HistoryIDsForTerm(const base::string16& term); | 223 HistoryIDSet HistoryIDsForTerm(const base::string16& term); |
223 | 224 |
224 // Given a set of Char16s, finds words containing those characters. | 225 // Given a set of Char16s, finds words containing those characters. |
225 WordIDSet WordIDSetForTermChars(const Char16Set& term_chars); | 226 WordIDSet WordIDSetForTermChars(const Char16Set& term_chars); |
226 | 227 |
227 // Helper function for HistoryItemsForTerms(). Fills in |scored_items| from | 228 // Helper function for HistoryItemsForTerms(). Fills in |scored_items| from |
228 // the matches listed in |history_id_set|. | 229 // the matches listed in |history_ids|. |
229 void HistoryIdSetToScoredMatches( | 230 void HistoryIdsToScoredMatches(HistoryIDVector history_ids, |
230 HistoryIDSet history_id_set, | 231 const base::string16& lower_raw_string, |
231 const base::string16& lower_raw_string, | 232 const TemplateURLService* template_url_service, |
232 const TemplateURLService* template_url_service, | 233 bookmarks::BookmarkModel* bookmark_model, |
233 bookmarks::BookmarkModel* bookmark_model, | 234 ScoredHistoryMatches* scored_items) const; |
234 ScoredHistoryMatches* scored_items) const; | |
235 | 235 |
236 // Fills in |terms_to_word_starts_offsets| according to where the word starts | 236 // Fills in |terms_to_word_starts_offsets| according to where the word starts |
237 // in each term. For example, in the term "-foo" the word starts at offset 1. | 237 // in each term. For example, in the term "-foo" the word starts at offset 1. |
238 static void CalculateWordStartsOffsets( | 238 static void CalculateWordStartsOffsets( |
239 const String16Vector& terms, | 239 const String16Vector& terms, |
240 WordStarts* terms_to_word_starts_offsets); | 240 WordStarts* terms_to_word_starts_offsets); |
241 | 241 |
242 // Indexes one URL history item as described by |row|. Returns true if the | 242 // Indexes one URL history item as described by |row|. Returns true if the |
243 // row was actually indexed. |scheme_whitelist| is used to filter | 243 // row was actually indexed. |scheme_whitelist| is used to filter |
244 // non-qualifying schemes. If |history_db| is not NULL then this function | 244 // non-qualifying schemes. If |history_db| is not NULL then this function |
(...skipping 10 matching lines...) Expand all Loading... |
255 | 255 |
256 // Parses and indexes the words in the URL and page title of |row| and | 256 // Parses and indexes the words in the URL and page title of |row| and |
257 // calculate the word starts in each, saving the starts in |word_starts|. | 257 // calculate the word starts in each, saving the starts in |word_starts|. |
258 void AddRowWordsToIndex(const history::URLRow& row, | 258 void AddRowWordsToIndex(const history::URLRow& row, |
259 RowWordStarts* word_starts); | 259 RowWordStarts* word_starts); |
260 | 260 |
261 // Given a single word in |uni_word|, adds a reference for the containing | 261 // Given a single word in |uni_word|, adds a reference for the containing |
262 // history item identified by |history_id| to the index. | 262 // history item identified by |history_id| to the index. |
263 void AddWordToIndex(const base::string16& uni_word, HistoryID history_id); | 263 void AddWordToIndex(const base::string16& uni_word, HistoryID history_id); |
264 | 264 |
265 // Creates a new entry in the word/history map for |word_id| and add | 265 // Adds a new entry to |word_list_|. Uses previously freed positions if |
266 // |history_id| as the initial element of the word's set. | 266 // available. |
267 void AddWordHistory(const base::string16& uni_word, HistoryID history_id); | 267 WordID AddNewWordToWordList(const base::string16& term); |
268 | |
269 // Updates an existing entry in the word/history index by adding the | |
270 // |history_id| to set for |word_id| in the word_id_history_map_. | |
271 void UpdateWordHistory(WordID word_id, HistoryID history_id); | |
272 | |
273 // Adds |word_id| to |history_id|'s entry in the history/word map, | |
274 // creating a new entry if one does not already exist. | |
275 void AddToHistoryIDWordMap(HistoryID history_id, WordID word_id); | |
276 | 268 |
277 // Removes |row| and all associated words and characters from the index. | 269 // Removes |row| and all associated words and characters from the index. |
278 void RemoveRowFromIndex(const history::URLRow& row); | 270 void RemoveRowFromIndex(const history::URLRow& row); |
279 | 271 |
280 // Removes all words and characters associated with |row| from the index. | 272 // Removes all words and characters associated with |row| from the index. |
281 void RemoveRowWordsFromIndex(const history::URLRow& row); | 273 void RemoveRowWordsFromIndex(const history::URLRow& row); |
282 | 274 |
283 // Clears |used_| for each item in the search term cache. | 275 // Clears |used_| for each item in the search term cache. |
284 void ResetSearchTermCache(); | 276 void ResetSearchTermCache(); |
285 | 277 |
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
346 // ID of the word in the word_map_. It reduces the memory overhead by | 338 // ID of the word in the word_map_. It reduces the memory overhead by |
347 // replacing a potentially long and repeated string with a simple index. | 339 // replacing a potentially long and repeated string with a simple index. |
348 String16Vector word_list_; | 340 String16Vector word_list_; |
349 | 341 |
350 // A list of available words slots in |word_list_|. An available word slot | 342 // A list of available words slots in |word_list_|. An available word slot |
351 // is the index of a unused word in word_list_ vector, also referred to as | 343 // is the index of a unused word in word_list_ vector, also referred to as |
352 // a WordID. As URL visits are added or modified new words may be added to | 344 // a WordID. As URL visits are added or modified new words may be added to |
353 // the index, in which case any available words are used, if any, and then | 345 // the index, in which case any available words are used, if any, and then |
354 // words are added to the end of the word_list_. When URL visits are | 346 // words are added to the end of the word_list_. When URL visits are |
355 // modified or deleted old words may be removed from the index, in which | 347 // modified or deleted old words may be removed from the index, in which |
356 // case the slots for those words are added to available_words_ for resuse | 348 // case the slots for those words are added to available_words_ for reuse |
357 // by future URL updates. | 349 // by future URL updates. |
358 WordIDSet available_words_; | 350 std::stack<WordID> available_words_; |
359 | 351 |
360 // A one-to-one mapping from the a word string to its slot number (i.e. | 352 // A one-to-one mapping from the a word string to its slot number (i.e. |
361 // WordID) in the |word_list_|. | 353 // WordID) in the |word_list_|. |
362 WordMap word_map_; | 354 WordMap word_map_; |
363 | 355 |
364 // A one-to-many mapping from a single character to all WordIDs of words | 356 // A one-to-many mapping from a single character to all WordIDs of words |
365 // containing that character. | 357 // containing that character. |
366 CharWordIDMap char_word_map_; | 358 CharWordIDMap char_word_map_; |
367 | 359 |
368 // A one-to-many mapping from a WordID to all HistoryIDs (the row_id as | 360 // A one-to-many mapping from a WordID to all HistoryIDs (the row_id as |
(...skipping 15 matching lines...) Expand all Loading... |
384 | 376 |
385 // End of data members that are cached --------------------------------------- | 377 // End of data members that are cached --------------------------------------- |
386 | 378 |
387 // For unit testing only. Specifies the version of the cache file to be saved. | 379 // For unit testing only. Specifies the version of the cache file to be saved. |
388 // Used only for testing upgrading of an older version of the cache upon | 380 // Used only for testing upgrading of an older version of the cache upon |
389 // restore. | 381 // restore. |
390 int saved_cache_version_; | 382 int saved_cache_version_; |
391 }; | 383 }; |
392 | 384 |
393 #endif // COMPONENTS_OMNIBOX_BROWSER_URL_INDEX_PRIVATE_DATA_H_ | 385 #endif // COMPONENTS_OMNIBOX_BROWSER_URL_INDEX_PRIVATE_DATA_H_ |
OLD | NEW |