components/omnibox/browser/url_index_private_data.h - Issue 2690303012: Cleaning up url_index_private_data and in_memory_url_index_types.

Side by Side Diff: components/omnibox/browser/url_index_private_data.h

Issue 2690303012: Cleaning up url_index_private_data and in_memory_url_index_types. (Closed)

Patch Set: Review, round 3. Created 3 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

« no previous file with comments | « components/omnibox/browser/in_memory_url_index_types.cc ('k') | components/omnibox/browser/url_index_private_data.cc » ('j') | components/omnibox/browser/url_index_private_data.cc » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #ifndef COMPONENTS_OMNIBOX_BROWSER_URL_INDEX_PRIVATE_DATA_H_	5 #ifndef COMPONENTS_OMNIBOX_BROWSER_URL_INDEX_PRIVATE_DATA_H_

6 #define COMPONENTS_OMNIBOX_BROWSER_URL_INDEX_PRIVATE_DATA_H_	6 #define COMPONENTS_OMNIBOX_BROWSER_URL_INDEX_PRIVATE_DATA_H_

7	7

8 #include <stddef.h>	8 #include <stddef.h>

9	9

10 #include <set>	10 #include <set>

	11 #include <stack>

11 #include <string>	12 #include <string>

12	13

13 #include "base/files/file_path.h"	14 #include "base/files/file_path.h"

14 #include "base/gtest_prod_util.h"	15 #include "base/gtest_prod_util.h"

15 #include "base/memory/ref_counted.h"	16 #include "base/memory/ref_counted.h"

16 #include "components/history/core/browser/history_service.h"	17 #include "components/history/core/browser/history_service.h"

17 #include "components/omnibox/browser/in_memory_url_index_cache.pb.h"	18 #include "components/omnibox/browser/in_memory_url_index_cache.pb.h"

18 #include "components/omnibox/browser/in_memory_url_index_types.h"	19 #include "components/omnibox/browser/in_memory_url_index_types.h"

19 #include "components/omnibox/browser/scored_history_match.h"	20 #include "components/omnibox/browser/scored_history_match.h"

20	21

(...skipping 176 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
197 ~HistoryItemFactorGreater();	198 ~HistoryItemFactorGreater();

198	199

199 bool operator()(const HistoryID h1, const HistoryID h2);	200 bool operator()(const HistoryID h1, const HistoryID h2);

200	201

201 private:	202 private:

202 const HistoryInfoMap& history_info_map_;	203 const HistoryInfoMap& history_info_map_;

203 };	204 };

204	205

205 // URL History indexing support functions.	206 // URL History indexing support functions.

206	207

207 // Composes a set of history item IDs by intersecting the set for each word	208 // Composes a vector of history item IDs by intersecting the set for each word

208 // in \|unsorted_words\|.	209 // in \|unsorted_words\|.

209 HistoryIDSet HistoryIDSetFromWords(const String16Vector& unsorted_words);	210 HistoryIDVector HistoryIDsFromWords(const String16Vector& unsorted_words);

	211

	212 // Trims the candidate pool in advance of doing proper substring searching, to

	213 // cap the cost of such searching. Discards the least-relevant items (based on

	214 // visit stats), which are least likely to score highly in the end. To

	215 // minimize the risk of discarding a valuable URL, the candidate pool is still

	216 // left two orders of magnitude larger than the final number of results

	217 // returned from the HQP.

	218 void TrimHistoryIdsPool(HistoryIDVector* history_ids) const;

210	219

211 // Helper function to HistoryIDSetFromWords which composes a set of history	220 // Helper function to HistoryIDSetFromWords which composes a set of history

212 // ids for the given term given in \|term\|.	221 // ids for the given term given in \|term\|.

213 HistoryIDSet HistoryIDsForTerm(const base::string16& term);	222 HistoryIDSet HistoryIDsForTerm(const base::string16& term);

214	223

215 // Given a set of Char16s, finds words containing those characters.	224 // Given a set of Char16s, finds words containing those characters.

216 WordIDSet WordIDSetForTermChars(const Char16Set& term_chars);	225 WordIDSet WordIDSetForTermChars(const Char16Set& term_chars);

217	226

218 // Helper function for HistoryItemsForTerms(). Fills in \|scored_items\| from	227 // Helper function for HistoryItemsForTerms(). Fills in \|scored_items\| from

219 // the matches listed in \|history_id_set\|.	228 // the matches listed in \|history_ids\|.

220 void HistoryIdSetToScoredMatches(	229 void HistoryIdsToScoredMatches(HistoryIDVector history_ids,

221 HistoryIDSet history_id_set,	230 const base::string16& lower_raw_string,

222 const base::string16& lower_raw_string,	231 const TemplateURLService* template_url_service,

223 const TemplateURLService* template_url_service,	232 bookmarks::BookmarkModel* bookmark_model,

224 bookmarks::BookmarkModel* bookmark_model,	233 ScoredHistoryMatches* scored_items) const;

225 ScoredHistoryMatches* scored_items) const;

226	234

227 // Fills in \|terms_to_word_starts_offsets\| according to where the word starts	235 // Fills in \|terms_to_word_starts_offsets\| according to where the word starts

228 // in each term. For example, in the term "-foo" the word starts at offset 1.	236 // in each term. For example, in the term "-foo" the word starts at offset 1.

229 static void CalculateWordStartsOffsets(	237 static void CalculateWordStartsOffsets(

230 const String16Vector& terms,	238 const String16Vector& terms,

231 WordStarts* terms_to_word_starts_offsets);	239 WordStarts* terms_to_word_starts_offsets);

232	240

233 // Indexes one URL history item as described by \|row\|. Returns true if the	241 // Indexes one URL history item as described by \|row\|. Returns true if the

234 // row was actually indexed. \|scheme_whitelist\| is used to filter	242 // row was actually indexed. \|scheme_whitelist\| is used to filter

235 // non-qualifying schemes. If \|history_db\| is not NULL then this function	243 // non-qualifying schemes. If \|history_db\| is not NULL then this function

(...skipping 10 matching lines...) Expand all Loading...
246	254

247 // Parses and indexes the words in the URL and page title of \|row\| and	255 // Parses and indexes the words in the URL and page title of \|row\| and

248 // calculate the word starts in each, saving the starts in \|word_starts\|.	256 // calculate the word starts in each, saving the starts in \|word_starts\|.

249 void AddRowWordsToIndex(const history::URLRow& row,	257 void AddRowWordsToIndex(const history::URLRow& row,

250 RowWordStarts* word_starts);	258 RowWordStarts* word_starts);

251	259

252 // Given a single word in \|uni_word\|, adds a reference for the containing	260 // Given a single word in \|uni_word\|, adds a reference for the containing

253 // history item identified by \|history_id\| to the index.	261 // history item identified by \|history_id\| to the index.

254 void AddWordToIndex(const base::string16& uni_word, HistoryID history_id);	262 void AddWordToIndex(const base::string16& uni_word, HistoryID history_id);

255	263

256 // Creates a new entry in the word/history map for \|word_id\| and add	264 // Adds a new entry to word_list. Uses previously freed positions if
	Peter Kasting 2017/02/25 04:54:42 Nit: word_list -> \|word_list_\| Nit: word_list -> \|word_list_\| dyaroshev 2017/02/26 01:12:17 Done. Show quoted text On 2017/02/25 04:54:42, Peter Kasting wrote: > Nit: word_list -> \|word_list_\| Done.
257 // \|history_id\| as the initial element of the word's set.	265 // available.

258 void AddWordHistory(const base::string16& uni_word, HistoryID history_id);	266 WordID AddNewWordToWordList(const base::string16& term);

259

260 // Updates an existing entry in the word/history index by adding the

261 // \|history_id\| to set for \|word_id\| in the word_id_history_map_.

262 void UpdateWordHistory(WordID word_id, HistoryID history_id);

263

264 // Adds \|word_id\| to \|history_id\|'s entry in the history/word map,

265 // creating a new entry if one does not already exist.

266 void AddToHistoryIDWordMap(HistoryID history_id, WordID word_id);

267	267

268 // Removes \|row\| and all associated words and characters from the index.	268 // Removes \|row\| and all associated words and characters from the index.

269 void RemoveRowFromIndex(const history::URLRow& row);	269 void RemoveRowFromIndex(const history::URLRow& row);

270	270

271 // Removes all words and characters associated with \|row\| from the index.	271 // Removes all words and characters associated with \|row\| from the index.

272 void RemoveRowWordsFromIndex(const history::URLRow& row);	272 void RemoveRowWordsFromIndex(const history::URLRow& row);

273	273

274 // Clears \|used_\| for each item in the search term cache.	274 // Clears \|used_\| for each item in the search term cache.

275 void ResetSearchTermCache();	275 void ResetSearchTermCache();

276	276

(...skipping 60 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
337 // ID of the word in the word_map_. It reduces the memory overhead by	337 // ID of the word in the word_map_. It reduces the memory overhead by

338 // replacing a potentially long and repeated string with a simple index.	338 // replacing a potentially long and repeated string with a simple index.

339 String16Vector word_list_;	339 String16Vector word_list_;

340	340

341 // A list of available words slots in \|word_list_\|. An available word slot	341 // A list of available words slots in \|word_list_\|. An available word slot

342 // is the index of a unused word in word_list_ vector, also referred to as	342 // is the index of a unused word in word_list_ vector, also referred to as

343 // a WordID. As URL visits are added or modified new words may be added to	343 // a WordID. As URL visits are added or modified new words may be added to

344 // the index, in which case any available words are used, if any, and then	344 // the index, in which case any available words are used, if any, and then

345 // words are added to the end of the word_list_. When URL visits are	345 // words are added to the end of the word_list_. When URL visits are

346 // modified or deleted old words may be removed from the index, in which	346 // modified or deleted old words may be removed from the index, in which

347 // case the slots for those words are added to available_words_ for resuse	347 // case the slots for those words are added to available_words_ for resuse
	Alexander Yashkin 2017/02/21 07:07:15 reuse reuse dyaroshev 2017/02/26 01:12:17 Done. Show quoted text On 2017/02/21 07:07:15, Alexander Yashkin wrote: > reuse Done.
348 // by future URL updates.	348 // by future URL updates.

349 WordIDSet available_words_;	349 std::stack<WordID> available_words_;

350	350

351 // A one-to-one mapping from the a word string to its slot number (i.e.	351 // A one-to-one mapping from the a word string to its slot number (i.e.

352 // WordID) in the \|word_list_\|.	352 // WordID) in the \|word_list_\|.

353 WordMap word_map_;	353 WordMap word_map_;

354	354

355 // A one-to-many mapping from a single character to all WordIDs of words	355 // A one-to-many mapping from a single character to all WordIDs of words

356 // containing that character.	356 // containing that character.

357 CharWordIDMap char_word_map_;	357 CharWordIDMap char_word_map_;

358	358

359 // A one-to-many mapping from a WordID to all HistoryIDs (the row_id as	359 // A one-to-many mapping from a WordID to all HistoryIDs (the row_id as

(...skipping 21 matching lines...) Expand all Loading...
381 int saved_cache_version_;	381 int saved_cache_version_;

382	382

383 // Used for unit testing only. Records the number of candidate history items	383 // Used for unit testing only. Records the number of candidate history items

384 // at three stages in the index searching process.	384 // at three stages in the index searching process.

385 size_t pre_filter_item_count_; // After word index is queried.	385 size_t pre_filter_item_count_; // After word index is queried.

386 size_t post_filter_item_count_; // After trimming large result set.	386 size_t post_filter_item_count_; // After trimming large result set.

387 size_t post_scoring_item_count_; // After performing final filter/scoring.	387 size_t post_scoring_item_count_; // After performing final filter/scoring.

388 };	388 };

389	389

390 #endif // COMPONENTS_OMNIBOX_BROWSER_URL_INDEX_PRIVATE_DATA_H_	390 #endif // COMPONENTS_OMNIBOX_BROWSER_URL_INDEX_PRIVATE_DATA_H_

OLD	NEW