OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef COMPONENTS_OMNIBOX_BROWSER_URL_INDEX_PRIVATE_DATA_H_ | 5 #ifndef COMPONENTS_OMNIBOX_BROWSER_URL_INDEX_PRIVATE_DATA_H_ |
6 #define COMPONENTS_OMNIBOX_BROWSER_URL_INDEX_PRIVATE_DATA_H_ | 6 #define COMPONENTS_OMNIBOX_BROWSER_URL_INDEX_PRIVATE_DATA_H_ |
7 | 7 |
8 #include <stddef.h> | 8 #include <stddef.h> |
9 | 9 |
10 #include <set> | 10 #include <set> |
11 #include <stack> | |
11 #include <string> | 12 #include <string> |
12 | 13 |
13 #include "base/files/file_path.h" | 14 #include "base/files/file_path.h" |
14 #include "base/gtest_prod_util.h" | 15 #include "base/gtest_prod_util.h" |
15 #include "base/memory/ref_counted.h" | 16 #include "base/memory/ref_counted.h" |
16 #include "components/history/core/browser/history_service.h" | 17 #include "components/history/core/browser/history_service.h" |
17 #include "components/omnibox/browser/in_memory_url_index_cache.pb.h" | 18 #include "components/omnibox/browser/in_memory_url_index_cache.pb.h" |
18 #include "components/omnibox/browser/in_memory_url_index_types.h" | 19 #include "components/omnibox/browser/in_memory_url_index_types.h" |
19 #include "components/omnibox/browser/scored_history_match.h" | 20 #include "components/omnibox/browser/scored_history_match.h" |
20 | 21 |
(...skipping 176 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
197 ~HistoryItemFactorGreater(); | 198 ~HistoryItemFactorGreater(); |
198 | 199 |
199 bool operator()(const HistoryID h1, const HistoryID h2); | 200 bool operator()(const HistoryID h1, const HistoryID h2); |
200 | 201 |
201 private: | 202 private: |
202 const HistoryInfoMap& history_info_map_; | 203 const HistoryInfoMap& history_info_map_; |
203 }; | 204 }; |
204 | 205 |
205 // URL History indexing support functions. | 206 // URL History indexing support functions. |
206 | 207 |
207 // Composes a set of history item IDs by intersecting the set for each word | 208 // Composes a vector of history item IDs by intersecting the set for each word |
208 // in |unsorted_words|. | 209 // in |unsorted_words|. |
209 HistoryIDSet HistoryIDSetFromWords(const String16Vector& unsorted_words); | 210 HistoryIDVector HistoryIDsFromWords(const String16Vector& unsorted_words); |
211 | |
212 // Trims the candidate pool in advance of doing proper substring searching, to | |
213 // cap the cost of such searching. Discards the least-relevant items (based on | |
214 // visit stats), which are least likely to score highly in the end. To | |
215 // minimize the risk of discarding a valuable URL, the candidate pool is still | |
216 // left two orders of magnitude larger than the final number of results | |
217 // returned from the HQP. | |
218 void TrimHistoryIdsPool(HistoryIDVector* history_ids) const; | |
210 | 219 |
211 // Helper function to HistoryIDSetFromWords which composes a set of history | 220 // Helper function to HistoryIDSetFromWords which composes a set of history |
212 // ids for the given term given in |term|. | 221 // ids for the given term given in |term|. |
213 HistoryIDSet HistoryIDsForTerm(const base::string16& term); | 222 HistoryIDSet HistoryIDsForTerm(const base::string16& term); |
214 | 223 |
215 // Given a set of Char16s, finds words containing those characters. | 224 // Given a set of Char16s, finds words containing those characters. |
216 WordIDSet WordIDSetForTermChars(const Char16Set& term_chars); | 225 WordIDSet WordIDSetForTermChars(const Char16Set& term_chars); |
217 | 226 |
218 // Helper function for HistoryItemsForTerms(). Fills in |scored_items| from | 227 // Helper function for HistoryItemsForTerms(). Fills in |scored_items| from |
219 // the matches listed in |history_id_set|. | 228 // the matches listed in |history_ids|. |
220 void HistoryIdSetToScoredMatches( | 229 void HistoryIdsToScoredMatches(HistoryIDVector history_ids, |
221 HistoryIDSet history_id_set, | 230 const base::string16& lower_raw_string, |
222 const base::string16& lower_raw_string, | 231 const TemplateURLService* template_url_service, |
223 const TemplateURLService* template_url_service, | 232 bookmarks::BookmarkModel* bookmark_model, |
224 bookmarks::BookmarkModel* bookmark_model, | 233 ScoredHistoryMatches* scored_items) const; |
225 ScoredHistoryMatches* scored_items) const; | |
226 | 234 |
227 // Fills in |terms_to_word_starts_offsets| according to where the word starts | 235 // Fills in |terms_to_word_starts_offsets| according to where the word starts |
228 // in each term. For example, in the term "-foo" the word starts at offset 1. | 236 // in each term. For example, in the term "-foo" the word starts at offset 1. |
229 static void CalculateWordStartsOffsets( | 237 static void CalculateWordStartsOffsets( |
230 const String16Vector& terms, | 238 const String16Vector& terms, |
231 WordStarts* terms_to_word_starts_offsets); | 239 WordStarts* terms_to_word_starts_offsets); |
232 | 240 |
233 // Indexes one URL history item as described by |row|. Returns true if the | 241 // Indexes one URL history item as described by |row|. Returns true if the |
234 // row was actually indexed. |scheme_whitelist| is used to filter | 242 // row was actually indexed. |scheme_whitelist| is used to filter |
235 // non-qualifying schemes. If |history_db| is not NULL then this function | 243 // non-qualifying schemes. If |history_db| is not NULL then this function |
(...skipping 10 matching lines...) Expand all Loading... | |
246 | 254 |
247 // Parses and indexes the words in the URL and page title of |row| and | 255 // Parses and indexes the words in the URL and page title of |row| and |
248 // calculate the word starts in each, saving the starts in |word_starts|. | 256 // calculate the word starts in each, saving the starts in |word_starts|. |
249 void AddRowWordsToIndex(const history::URLRow& row, | 257 void AddRowWordsToIndex(const history::URLRow& row, |
250 RowWordStarts* word_starts); | 258 RowWordStarts* word_starts); |
251 | 259 |
252 // Given a single word in |uni_word|, adds a reference for the containing | 260 // Given a single word in |uni_word|, adds a reference for the containing |
253 // history item identified by |history_id| to the index. | 261 // history item identified by |history_id| to the index. |
254 void AddWordToIndex(const base::string16& uni_word, HistoryID history_id); | 262 void AddWordToIndex(const base::string16& uni_word, HistoryID history_id); |
255 | 263 |
256 // Creates a new entry in the word/history map for |word_id| and add | 264 // Adds a new entry to word_list. Uses previously freed positions if |
Peter Kasting
2017/02/25 04:54:42
Nit: word_list -> |word_list_|
dyaroshev
2017/02/26 01:12:17
Done.
| |
257 // |history_id| as the initial element of the word's set. | 265 // available. |
258 void AddWordHistory(const base::string16& uni_word, HistoryID history_id); | 266 WordID AddNewWordToWordList(const base::string16& term); |
259 | |
260 // Updates an existing entry in the word/history index by adding the | |
261 // |history_id| to set for |word_id| in the word_id_history_map_. | |
262 void UpdateWordHistory(WordID word_id, HistoryID history_id); | |
263 | |
264 // Adds |word_id| to |history_id|'s entry in the history/word map, | |
265 // creating a new entry if one does not already exist. | |
266 void AddToHistoryIDWordMap(HistoryID history_id, WordID word_id); | |
267 | 267 |
268 // Removes |row| and all associated words and characters from the index. | 268 // Removes |row| and all associated words and characters from the index. |
269 void RemoveRowFromIndex(const history::URLRow& row); | 269 void RemoveRowFromIndex(const history::URLRow& row); |
270 | 270 |
271 // Removes all words and characters associated with |row| from the index. | 271 // Removes all words and characters associated with |row| from the index. |
272 void RemoveRowWordsFromIndex(const history::URLRow& row); | 272 void RemoveRowWordsFromIndex(const history::URLRow& row); |
273 | 273 |
274 // Clears |used_| for each item in the search term cache. | 274 // Clears |used_| for each item in the search term cache. |
275 void ResetSearchTermCache(); | 275 void ResetSearchTermCache(); |
276 | 276 |
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
337 // ID of the word in the word_map_. It reduces the memory overhead by | 337 // ID of the word in the word_map_. It reduces the memory overhead by |
338 // replacing a potentially long and repeated string with a simple index. | 338 // replacing a potentially long and repeated string with a simple index. |
339 String16Vector word_list_; | 339 String16Vector word_list_; |
340 | 340 |
341 // A list of available words slots in |word_list_|. An available word slot | 341 // A list of available words slots in |word_list_|. An available word slot |
342 // is the index of a unused word in word_list_ vector, also referred to as | 342 // is the index of a unused word in word_list_ vector, also referred to as |
343 // a WordID. As URL visits are added or modified new words may be added to | 343 // a WordID. As URL visits are added or modified new words may be added to |
344 // the index, in which case any available words are used, if any, and then | 344 // the index, in which case any available words are used, if any, and then |
345 // words are added to the end of the word_list_. When URL visits are | 345 // words are added to the end of the word_list_. When URL visits are |
346 // modified or deleted old words may be removed from the index, in which | 346 // modified or deleted old words may be removed from the index, in which |
347 // case the slots for those words are added to available_words_ for resuse | 347 // case the slots for those words are added to available_words_ for resuse |
Alexander Yashkin
2017/02/21 07:07:15
reuse
dyaroshev
2017/02/26 01:12:17
Done.
| |
348 // by future URL updates. | 348 // by future URL updates. |
349 WordIDSet available_words_; | 349 std::stack<WordID> available_words_; |
350 | 350 |
351 // A one-to-one mapping from the a word string to its slot number (i.e. | 351 // A one-to-one mapping from the a word string to its slot number (i.e. |
352 // WordID) in the |word_list_|. | 352 // WordID) in the |word_list_|. |
353 WordMap word_map_; | 353 WordMap word_map_; |
354 | 354 |
355 // A one-to-many mapping from a single character to all WordIDs of words | 355 // A one-to-many mapping from a single character to all WordIDs of words |
356 // containing that character. | 356 // containing that character. |
357 CharWordIDMap char_word_map_; | 357 CharWordIDMap char_word_map_; |
358 | 358 |
359 // A one-to-many mapping from a WordID to all HistoryIDs (the row_id as | 359 // A one-to-many mapping from a WordID to all HistoryIDs (the row_id as |
(...skipping 21 matching lines...) Expand all Loading... | |
381 int saved_cache_version_; | 381 int saved_cache_version_; |
382 | 382 |
383 // Used for unit testing only. Records the number of candidate history items | 383 // Used for unit testing only. Records the number of candidate history items |
384 // at three stages in the index searching process. | 384 // at three stages in the index searching process. |
385 size_t pre_filter_item_count_; // After word index is queried. | 385 size_t pre_filter_item_count_; // After word index is queried. |
386 size_t post_filter_item_count_; // After trimming large result set. | 386 size_t post_filter_item_count_; // After trimming large result set. |
387 size_t post_scoring_item_count_; // After performing final filter/scoring. | 387 size_t post_scoring_item_count_; // After performing final filter/scoring. |
388 }; | 388 }; |
389 | 389 |
390 #endif // COMPONENTS_OMNIBOX_BROWSER_URL_INDEX_PRIVATE_DATA_H_ | 390 #endif // COMPONENTS_OMNIBOX_BROWSER_URL_INDEX_PRIVATE_DATA_H_ |
OLD | NEW |