Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(41)

Side by Side Diff: components/omnibox/browser/url_index_private_data.h

Issue 2690303012: Cleaning up url_index_private_data and in_memory_url_index_types. (Closed)
Patch Set: Review, round 3. Created 3 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #ifndef COMPONENTS_OMNIBOX_BROWSER_URL_INDEX_PRIVATE_DATA_H_ 5 #ifndef COMPONENTS_OMNIBOX_BROWSER_URL_INDEX_PRIVATE_DATA_H_
6 #define COMPONENTS_OMNIBOX_BROWSER_URL_INDEX_PRIVATE_DATA_H_ 6 #define COMPONENTS_OMNIBOX_BROWSER_URL_INDEX_PRIVATE_DATA_H_
7 7
8 #include <stddef.h> 8 #include <stddef.h>
9 9
10 #include <set> 10 #include <set>
11 #include <stack>
11 #include <string> 12 #include <string>
12 13
13 #include "base/files/file_path.h" 14 #include "base/files/file_path.h"
14 #include "base/gtest_prod_util.h" 15 #include "base/gtest_prod_util.h"
15 #include "base/memory/ref_counted.h" 16 #include "base/memory/ref_counted.h"
16 #include "components/history/core/browser/history_service.h" 17 #include "components/history/core/browser/history_service.h"
17 #include "components/omnibox/browser/in_memory_url_index_cache.pb.h" 18 #include "components/omnibox/browser/in_memory_url_index_cache.pb.h"
18 #include "components/omnibox/browser/in_memory_url_index_types.h" 19 #include "components/omnibox/browser/in_memory_url_index_types.h"
19 #include "components/omnibox/browser/scored_history_match.h" 20 #include "components/omnibox/browser/scored_history_match.h"
20 21
(...skipping 176 matching lines...) Expand 10 before | Expand all | Expand 10 after
197 ~HistoryItemFactorGreater(); 198 ~HistoryItemFactorGreater();
198 199
199 bool operator()(const HistoryID h1, const HistoryID h2); 200 bool operator()(const HistoryID h1, const HistoryID h2);
200 201
201 private: 202 private:
202 const HistoryInfoMap& history_info_map_; 203 const HistoryInfoMap& history_info_map_;
203 }; 204 };
204 205
205 // URL History indexing support functions. 206 // URL History indexing support functions.
206 207
207 // Composes a set of history item IDs by intersecting the set for each word 208 // Composes a vector of history item IDs by intersecting the set for each word
208 // in |unsorted_words|. 209 // in |unsorted_words|.
209 HistoryIDSet HistoryIDSetFromWords(const String16Vector& unsorted_words); 210 HistoryIDVector HistoryIDsFromWords(const String16Vector& unsorted_words);
211
212 // Trims the candidate pool in advance of doing proper substring searching, to
213 // cap the cost of such searching. Discards the least-relevant items (based on
214 // visit stats), which are least likely to score highly in the end. To
215 // minimize the risk of discarding a valuable URL, the candidate pool is still
216 // left two orders of magnitude larger than the final number of results
217 // returned from the HQP.
218 void TrimHistoryIdsPool(HistoryIDVector* history_ids) const;
210 219
211 // Helper function to HistoryIDSetFromWords which composes a set of history 220 // Helper function to HistoryIDSetFromWords which composes a set of history
212 // ids for the given term given in |term|. 221 // ids for the given term given in |term|.
213 HistoryIDSet HistoryIDsForTerm(const base::string16& term); 222 HistoryIDSet HistoryIDsForTerm(const base::string16& term);
214 223
215 // Given a set of Char16s, finds words containing those characters. 224 // Given a set of Char16s, finds words containing those characters.
216 WordIDSet WordIDSetForTermChars(const Char16Set& term_chars); 225 WordIDSet WordIDSetForTermChars(const Char16Set& term_chars);
217 226
218 // Helper function for HistoryItemsForTerms(). Fills in |scored_items| from 227 // Helper function for HistoryItemsForTerms(). Fills in |scored_items| from
219 // the matches listed in |history_id_set|. 228 // the matches listed in |history_ids|.
220 void HistoryIdSetToScoredMatches( 229 void HistoryIdsToScoredMatches(HistoryIDVector history_ids,
221 HistoryIDSet history_id_set, 230 const base::string16& lower_raw_string,
222 const base::string16& lower_raw_string, 231 const TemplateURLService* template_url_service,
223 const TemplateURLService* template_url_service, 232 bookmarks::BookmarkModel* bookmark_model,
224 bookmarks::BookmarkModel* bookmark_model, 233 ScoredHistoryMatches* scored_items) const;
225 ScoredHistoryMatches* scored_items) const;
226 234
227 // Fills in |terms_to_word_starts_offsets| according to where the word starts 235 // Fills in |terms_to_word_starts_offsets| according to where the word starts
228 // in each term. For example, in the term "-foo" the word starts at offset 1. 236 // in each term. For example, in the term "-foo" the word starts at offset 1.
229 static void CalculateWordStartsOffsets( 237 static void CalculateWordStartsOffsets(
230 const String16Vector& terms, 238 const String16Vector& terms,
231 WordStarts* terms_to_word_starts_offsets); 239 WordStarts* terms_to_word_starts_offsets);
232 240
233 // Indexes one URL history item as described by |row|. Returns true if the 241 // Indexes one URL history item as described by |row|. Returns true if the
234 // row was actually indexed. |scheme_whitelist| is used to filter 242 // row was actually indexed. |scheme_whitelist| is used to filter
235 // non-qualifying schemes. If |history_db| is not NULL then this function 243 // non-qualifying schemes. If |history_db| is not NULL then this function
(...skipping 10 matching lines...) Expand all
246 254
247 // Parses and indexes the words in the URL and page title of |row| and 255 // Parses and indexes the words in the URL and page title of |row| and
248 // calculate the word starts in each, saving the starts in |word_starts|. 256 // calculate the word starts in each, saving the starts in |word_starts|.
249 void AddRowWordsToIndex(const history::URLRow& row, 257 void AddRowWordsToIndex(const history::URLRow& row,
250 RowWordStarts* word_starts); 258 RowWordStarts* word_starts);
251 259
252 // Given a single word in |uni_word|, adds a reference for the containing 260 // Given a single word in |uni_word|, adds a reference for the containing
253 // history item identified by |history_id| to the index. 261 // history item identified by |history_id| to the index.
254 void AddWordToIndex(const base::string16& uni_word, HistoryID history_id); 262 void AddWordToIndex(const base::string16& uni_word, HistoryID history_id);
255 263
256 // Creates a new entry in the word/history map for |word_id| and add 264 // Adds a new entry to word_list. Uses previously freed positions if
Peter Kasting 2017/02/25 04:54:42 Nit: word_list -> |word_list_|
dyaroshev 2017/02/26 01:12:17 Done.
257 // |history_id| as the initial element of the word's set. 265 // available.
258 void AddWordHistory(const base::string16& uni_word, HistoryID history_id); 266 WordID AddNewWordToWordList(const base::string16& term);
259
260 // Updates an existing entry in the word/history index by adding the
261 // |history_id| to set for |word_id| in the word_id_history_map_.
262 void UpdateWordHistory(WordID word_id, HistoryID history_id);
263
264 // Adds |word_id| to |history_id|'s entry in the history/word map,
265 // creating a new entry if one does not already exist.
266 void AddToHistoryIDWordMap(HistoryID history_id, WordID word_id);
267 267
268 // Removes |row| and all associated words and characters from the index. 268 // Removes |row| and all associated words and characters from the index.
269 void RemoveRowFromIndex(const history::URLRow& row); 269 void RemoveRowFromIndex(const history::URLRow& row);
270 270
271 // Removes all words and characters associated with |row| from the index. 271 // Removes all words and characters associated with |row| from the index.
272 void RemoveRowWordsFromIndex(const history::URLRow& row); 272 void RemoveRowWordsFromIndex(const history::URLRow& row);
273 273
274 // Clears |used_| for each item in the search term cache. 274 // Clears |used_| for each item in the search term cache.
275 void ResetSearchTermCache(); 275 void ResetSearchTermCache();
276 276
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after
337 // ID of the word in the word_map_. It reduces the memory overhead by 337 // ID of the word in the word_map_. It reduces the memory overhead by
338 // replacing a potentially long and repeated string with a simple index. 338 // replacing a potentially long and repeated string with a simple index.
339 String16Vector word_list_; 339 String16Vector word_list_;
340 340
341 // A list of available words slots in |word_list_|. An available word slot 341 // A list of available words slots in |word_list_|. An available word slot
342 // is the index of a unused word in word_list_ vector, also referred to as 342 // is the index of a unused word in word_list_ vector, also referred to as
343 // a WordID. As URL visits are added or modified new words may be added to 343 // a WordID. As URL visits are added or modified new words may be added to
344 // the index, in which case any available words are used, if any, and then 344 // the index, in which case any available words are used, if any, and then
345 // words are added to the end of the word_list_. When URL visits are 345 // words are added to the end of the word_list_. When URL visits are
346 // modified or deleted old words may be removed from the index, in which 346 // modified or deleted old words may be removed from the index, in which
347 // case the slots for those words are added to available_words_ for resuse 347 // case the slots for those words are added to available_words_ for resuse
Alexander Yashkin 2017/02/21 07:07:15 reuse
dyaroshev 2017/02/26 01:12:17 Done.
348 // by future URL updates. 348 // by future URL updates.
349 WordIDSet available_words_; 349 std::stack<WordID> available_words_;
350 350
351 // A one-to-one mapping from the a word string to its slot number (i.e. 351 // A one-to-one mapping from the a word string to its slot number (i.e.
352 // WordID) in the |word_list_|. 352 // WordID) in the |word_list_|.
353 WordMap word_map_; 353 WordMap word_map_;
354 354
355 // A one-to-many mapping from a single character to all WordIDs of words 355 // A one-to-many mapping from a single character to all WordIDs of words
356 // containing that character. 356 // containing that character.
357 CharWordIDMap char_word_map_; 357 CharWordIDMap char_word_map_;
358 358
359 // A one-to-many mapping from a WordID to all HistoryIDs (the row_id as 359 // A one-to-many mapping from a WordID to all HistoryIDs (the row_id as
(...skipping 21 matching lines...) Expand all
381 int saved_cache_version_; 381 int saved_cache_version_;
382 382
383 // Used for unit testing only. Records the number of candidate history items 383 // Used for unit testing only. Records the number of candidate history items
384 // at three stages in the index searching process. 384 // at three stages in the index searching process.
385 size_t pre_filter_item_count_; // After word index is queried. 385 size_t pre_filter_item_count_; // After word index is queried.
386 size_t post_filter_item_count_; // After trimming large result set. 386 size_t post_filter_item_count_; // After trimming large result set.
387 size_t post_scoring_item_count_; // After performing final filter/scoring. 387 size_t post_scoring_item_count_; // After performing final filter/scoring.
388 }; 388 };
389 389
390 #endif // COMPONENTS_OMNIBOX_BROWSER_URL_INDEX_PRIVATE_DATA_H_ 390 #endif // COMPONENTS_OMNIBOX_BROWSER_URL_INDEX_PRIVATE_DATA_H_
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698