components/omnibox/browser/url_index_private_data.cc - Issue 2690303012: Cleaning up url_index_private_data and in_memory_url_index_types.

Side by Side Diff: components/omnibox/browser/url_index_private_data.cc

Issue 2690303012: Cleaning up url_index_private_data and in_memory_url_index_types. (Closed)

Patch Set: Removing sorting, utilitiy for sets intersection. Created 3 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "components/omnibox/browser/url_index_private_data.h"	5 #include "components/omnibox/browser/url_index_private_data.h"

6	6

7 #include <stdint.h>	7 #include <stdint.h>

8	8

9 #include <functional>	9 #include <functional>

10 #include <iterator>	10 #include <iterator>

(...skipping 19 matching lines...) Expand all Loading...
30 #include "components/omnibox/browser/in_memory_url_index.h"	30 #include "components/omnibox/browser/in_memory_url_index.h"

31 #include "components/search_engines/template_url_service.h"	31 #include "components/search_engines/template_url_service.h"

32 #include "components/url_formatter/url_formatter.h"	32 #include "components/url_formatter/url_formatter.h"

33	33

34 #if defined(USE_SYSTEM_PROTOBUF)	34 #if defined(USE_SYSTEM_PROTOBUF)

35 #include <google/protobuf/repeated_field.h>	35 #include <google/protobuf/repeated_field.h>

36 #else	36 #else

37 #include "third_party/protobuf/src/google/protobuf/repeated_field.h"	37 #include "third_party/protobuf/src/google/protobuf/repeated_field.h"

38 #endif	38 #endif

39	39

	40 namespace {

	41

40 using google::protobuf::RepeatedField;	42 using google::protobuf::RepeatedField;

41 using google::protobuf::RepeatedPtrField;	43 using google::protobuf::RepeatedPtrField;

42 using in_memory_url_index::InMemoryURLIndexCacheItem;	44 using in_memory_url_index::InMemoryURLIndexCacheItem;

43	45

44 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordListItem	46 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordListItem

45 WordListItem;	47 WordListItem;

46 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordMapItem_WordMapEntry	48 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordMapItem_WordMapEntry

47 WordMapEntry;	49 WordMapEntry;

48 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordMapItem WordMapItem;	50 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordMapItem WordMapItem;

49 typedef in_memory_url_index::InMemoryURLIndexCacheItem_CharWordMapItem	51 typedef in_memory_url_index::InMemoryURLIndexCacheItem_CharWordMapItem

(...skipping 20 matching lines...) Expand all Loading...
70 WordStartsMapEntry;	72 WordStartsMapEntry;

71	73

72 // Algorithm Functions ---------------------------------------------------------	74 // Algorithm Functions ---------------------------------------------------------

73	75

74 // Comparison function for sorting search terms by descending length.	76 // Comparison function for sorting search terms by descending length.

75 bool LengthGreater(const base::string16& string_a,	77 bool LengthGreater(const base::string16& string_a,

76 const base::string16& string_b) {	78 const base::string16& string_b) {

77 return string_a.length() > string_b.length();	79 return string_a.length() > string_b.length();

78 }	80 }

79	81

	82 template <typename ResultingSet, typename I, typename Transformation>
	Peter Kasting 2017/02/18 01:46:31 Nit: I -> Iter? Nit: I -> Iter? dyaroshev 2017/02/18 11:48:13 Done. Show quoted text On 2017/02/18 01:46:31, Peter Kasting wrote: > Nit: I -> Iter? Done.
	83 // Requires ResultingSet is a Container, I is an InputIterator, Transformation
	Peter Kasting 2017/02/18 01:46:31 Nit: template<...> is part of the function declara Nit: template<...> is part of the function declaration, so move this comment above it. Add (above this requirement) what the function actually does: "Constructs sets from each element in a range of inputs and returns the intersection of those sets." dyaroshev 2017/02/18 11:48:14 This comment describes concepts. Used concepts her Show quoted text On 2017/02/18 01:46:31, Peter Kasting wrote: > Nit: template<...> is part of the function declaration, so move this comment > above it. > > Add (above this requirement) what the function actually does: "Constructs sets > from each element in a range of inputs and returns the intersection of those > sets." This comment describes concepts. Used concepts here are from Concepts TS and when (in some distant future) concepts come, this comments could be rewritten with requires. If you want I can drop them. Peter Kasting 2017/02/20 10:02:54 You can leave them if you want, but don't insert t Show quoted text On 2017/02/18 11:48:14, dyaroshev wrote: > On 2017/02/18 01:46:31, Peter Kasting wrote: > > Nit: template<...> is part of the function declaration, so move this comment > > above it. > > > > Add (above this requirement) what the function actually does: "Constructs sets > > from each element in a range of inputs and returns the intersection of those > > sets." > > This comment describes concepts. Used concepts here are from Concepts TS and > when (in some distant future) concepts come, this comments could be rewritten > with requires. > > If you want I can drop them. You can leave them if you want, but don't insert them mid-declaration to save one line of reordering many years down the line when Concepts are in C++, we support that version of the standard, and someone wants to rewrite this. Basically, write your comments for reader clarity, not to ease hypothetical future refactoring. Judge what to say at all, and where to say it, based solely on that reader clarity.
	84 // is an UnaryFunction on ValueType<I> that returns a Container.

	85 ResultingSet IntersectSets(I first, I last, Transformation get_set) {
	Peter Kasting 2017/02/18 01:46:31 Nit: GetTransformIntersection()? IntersectConstru Nit: GetTransformIntersection()? IntersectConstructedSets()? (I'm looking for a name that makes clear that the inputs to this function are not sets) dyaroshev 2017/02/18 11:48:13 Done. Show quoted text On 2017/02/18 01:46:31, Peter Kasting wrote: > Nit: GetTransformIntersection()? IntersectConstructedSets()? > > (I'm looking for a name that makes clear that the inputs to this function are > not sets) Done.
	86 if (first == last)

	87 return ResultingSet();

	88

	89 auto tmp_set = get_set(*first);

	90 ResultingSet res(tmp_set.begin(), tmp_set.end());
	dyaroshev 2017/02/18 01:22:14 If we keep this, I should write smth like Contain If we keep this, I should write smth like ContainerCast: template <typename To, typename From> To ContainerCast(From from) { return To(std::make_move_iterator(from.begin()), std::make_move_iterator(from.end())); } template <typename To> To ContainerCast(To from) { return from; } dyaroshev 2017/02/18 11:48:14 Done. @pkasting: there are number of places here w Show quoted text On 2017/02/18 01:22:14, dyaroshev wrote: > If we keep this, I should write smth like ContainerCast: > > template <typename To, typename From> > To ContainerCast(From from) { > return To(std::make_move_iterator(from.begin()), > std::make_move_iterator(from.end())); > } > > template <typename To> > To ContainerCast(To from) { > return from; > } Done. @pkasting: there are number of places here where one container is constructed from the other. What's better - to keep them as is or to rewrite as smth like: auto history_id_set = ContainerCast<HistoryIdSet>(std::move(buffer)); Peter Kasting 2017/02/20 10:02:54 I find things more readable without ContainerCast. Show quoted text On 2017/02/18 11:48:14, dyaroshev wrote: > On 2017/02/18 01:22:14, dyaroshev wrote: > > If we keep this, I should write smth like ContainerCast: > > > > template <typename To, typename From> > > To ContainerCast(From from) { > > return To(std::make_move_iterator(from.begin()), > > std::make_move_iterator(from.end())); > > } > > > > template <typename To> > > To ContainerCast(To from) { > > return from; > > } > > Done. > @pkasting: there are number of places here where one container is constructed > from the other. > What's better - to keep them as is or to rewrite as smth like: > auto history_id_set = ContainerCast<HistoryIdSet>(std::move(buffer)); I find things more readable without ContainerCast. (Looks like in your new patch set you implemented but did not use it, which makes it slightly hard to tell how many places it's useful, but it looks to me like an extremely small change that buys arguable readability on the caller side, with templates + boilerplate on the callee side.)
	91

	92 // Like std::accumulate but short circuits if set is empty.

	93 for (++first; first != last && !res.empty(); ++first) {

	94 auto tmp_set = get_set(*first);

	95 if (tmp_set.empty())

	96 break;
	Peter Kasting 2017/02/18 01:46:31 Shouldn't this return the empty set instead of bre Shouldn't this return the empty set instead of breaking and returning the existing intersection? That seems very wrong. I'd expect unit tests to catch this sort of thing (if they don't maybe we need more tests). dyaroshev 2017/02/18 11:48:14 Done. One of the advantages of keeping algorithms Show quoted text On 2017/02/18 01:46:31, Peter Kasting wrote: > Shouldn't this return the empty set instead of breaking and returning the > existing intersection? That seems very wrong. > > I'd expect unit tests to catch this sort of thing (if they don't maybe we need > more tests). Done. One of the advantages of keeping algorithms in a different header is that we can test them very easily and make sure that we've covered all the corner cases. Peter Kasting 2017/02/20 10:02:54 If there's really a need to test them, they can be Show quoted text On 2017/02/18 11:48:14, dyaroshev wrote: > On 2017/02/18 01:46:31, Peter Kasting wrote: > > Shouldn't this return the empty set instead of breaking and returning the > > existing intersection? That seems very wrong. > > > > I'd expect unit tests to catch this sort of thing (if they don't maybe we need > > more tests). > > Done. > > One of the advantages of keeping algorithms in a different header is that we can > test them very easily and make sure that we've covered all the corner cases. If there's really a need to test them, they can be exposed in this class' header too, as a compromise between keeping implementation detail scoped locally and being testable. But I was more saying, I'd hope the tests for the caller of this code are sufficient to have caught this error.
	97

	98 res = base::STLSetIntersection<ResultingSet>(res, tmp_set);

	99 }

	100

	101 return res;

	102 }

	103

	104 } // namespace

80	105

81 // UpdateRecentVisitsFromHistoryDBTask -----------------------------------------	106 // UpdateRecentVisitsFromHistoryDBTask -----------------------------------------

82	107

83 // HistoryDBTask used to update the recent visit data for a particular	108 // HistoryDBTask used to update the recent visit data for a particular

84 // row from the history database.	109 // row from the history database.

85 class UpdateRecentVisitsFromHistoryDBTask : public history::HistoryDBTask {	110 class UpdateRecentVisitsFromHistoryDBTask : public history::HistoryDBTask {

86 public:	111 public:

87 explicit UpdateRecentVisitsFromHistoryDBTask(	112 explicit UpdateRecentVisitsFromHistoryDBTask(

88 URLIndexPrivateData* private_data,	113 URLIndexPrivateData* private_data,

89 history::URLID url_id);	114 history::URLID url_id);

(...skipping 97 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
187 // the index we need individual, lower-cased words, ignoring escapings. For	212 // the index we need individual, lower-cased words, ignoring escapings. For

188 // the final filtering we need whitespace separated substrings possibly	213 // the final filtering we need whitespace separated substrings possibly

189 // containing escaped characters.	214 // containing escaped characters.

190 base::string16 lower_raw_string(base::i18n::ToLower(search_string));	215 base::string16 lower_raw_string(base::i18n::ToLower(search_string));

191 base::string16 lower_unescaped_string = net::UnescapeURLComponent(	216 base::string16 lower_unescaped_string = net::UnescapeURLComponent(

192 lower_raw_string,	217 lower_raw_string,

193 net::UnescapeRule::SPACES \| net::UnescapeRule::PATH_SEPARATORS \|	218 net::UnescapeRule::SPACES \| net::UnescapeRule::PATH_SEPARATORS \|

194 net::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS);	219 net::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS);

195	220

196 // Extract individual 'words' (as opposed to 'terms'; see comment in	221 // Extract individual 'words' (as opposed to 'terms'; see comment in

197 // HistoryIdSetToScoredMatches()) from the search string. When the user	222 // HistoryIdsToScoredMatches()) from the search string. When the user

198 // types "colspec=ID%20Mstone Release" we get four 'words': "colspec", "id",	223 // types "colspec=ID%20Mstone Release" we get four 'words': "colspec", "id",

199 // "mstone" and "release".	224 // "mstone" and "release".

200 String16Vector lower_words(	225 String16Vector lower_words(

201 String16VectorFromString16(lower_unescaped_string, false, nullptr));	226 String16VectorFromString16(lower_unescaped_string, false, nullptr));

202 if (lower_words.empty())	227 if (lower_words.empty())

203 continue;	228 continue;

204 HistoryIDSet history_id_set = HistoryIDSetFromWords(lower_words);	229

205 pre_filter_item_count_ += history_id_set.size();	230 HistoryIDVector history_ids = HistoryIDsFromWords(lower_words);

206 // Trim the candidate pool if it is large. Note that we do not filter out	231

207 // items that do not contain the search terms as proper substrings --	232 pre_filter_item_count_ += history_ids.size();

208 // doing so is the performance-costly operation we are trying to avoid in	233 TrimHistoryIdsPool(&history_ids);

209 // order to maintain omnibox responsiveness.	234 post_filter_item_count_ += history_ids.size();
	Peter Kasting 2017/02/18 01:46:31 This line seems like a functional change (it was a This line seems like a functional change (it was adding \|pre_filter_item_count_\|), but the old code looks wrong to me and your new code looks correct. (The old code was introduced as-is in https://codereview.chromium.org/2364553004 .) You might want to make this functional fix separately; I'm not sure. Mark would understand better the consequence of overcounting the post_filter_item_count_, if that is indeed what this was doing. Presumably, not clearing the cached results often enough, leading to lower result quality, but in what way I'm not sure. dyaroshev 2017/02/18 11:48:14 Moved this logic into TrimHistoryIdsPool and rever Show quoted text On 2017/02/18 01:46:31, Peter Kasting wrote: > This line seems like a functional change (it was adding > \|pre_filter_item_count_\|), but the old code looks wrong to me and your new code > looks correct. (The old code was introduced as-is in > https://codereview.chromium.org/2364553004 .) > > You might want to make this functional fix separately; I'm not sure. Mark would > understand better the consequence of overcounting the post_filter_item_count_, > if that is indeed what this was doing. Presumably, not clearing the cached > results often enough, leading to lower result quality, but in what way I'm not > sure. Moved this logic into TrimHistoryIdsPool and revert to the way it was before. Peter Kasting 2017/02/20 10:02:54 Ugh, don't to that. Now the code is as wrong as b Show quoted text On 2017/02/18 11:48:14, dyaroshev wrote: > On 2017/02/18 01:46:31, Peter Kasting wrote: > > This line seems like a functional change (it was adding > > \|pre_filter_item_count_\|), but the old code looks wrong to me and your new > code > > looks correct. (The old code was introduced as-is in > > https://codereview.chromium.org/2364553004 .) > > > > You might want to make this functional fix separately; I'm not sure. Mark > would > > understand better the consequence of overcounting the post_filter_item_count_, > > if that is indeed what this was doing. Presumably, not clearing the cached > > results often enough, leading to lower result quality, but in what way I'm not > > sure. > > Moved this logic into TrimHistoryIdsPool and revert to the way it was before. Ugh, don't to that. Now the code is as wrong as before but the wrongness is far less obvious to the reader, so it's liable to stay broken. What you were doing here before is vastly better. To simplify this, I would pull out just changing post_filter_item_count_ to always add history_id_set.size() into its own CL that lands before this one, and having mpearson review that. dyaroshev 2017/02/20 22:21:31 Other than creating a separate CL with this fix se Show quoted text On 2017/02/20 10:02:54, Peter Kasting wrote: > On 2017/02/18 11:48:14, dyaroshev wrote: > > On 2017/02/18 01:46:31, Peter Kasting wrote: > > > This line seems like a functional change (it was adding > > > \|pre_filter_item_count_\|), but the old code looks wrong to me and your new > > code > > > looks correct. (The old code was introduced as-is in > > > https://codereview.chromium.org/2364553004 .) > > > > > > You might want to make this functional fix separately; I'm not sure. Mark > > would > > > understand better the consequence of overcounting the > post_filter_item_count_, > > > if that is indeed what this was doing. Presumably, not clearing the cached > > > results often enough, leading to lower result quality, but in what way I'm > not > > > sure. > > > > Moved this logic into TrimHistoryIdsPool and revert to the way it was before. > > Ugh, don't to that. Now the code is as wrong as before but the wrongness is far > less obvious to the reader, so it's liable to stay broken. > > What you were doing here before is vastly better. > > To simplify this, I would pull out just changing post_filter_item_count_ to > always add history_id_set.size() into its own CL that lands before this one, and > having mpearson review that. Other than creating a separate CL with this fix seems like I'm done. That patch requires some digging - for now I have no idea why would pre/post counts be member variables - seems like right now we'll never clean the cache (I might be wrong - haven't looked at it too closely). dyaroshev 2017/02/22 22:57:25 New CL: https://codereview.chromium.org/2702413007 Show quoted text On 2017/02/20 22:21:31, dyaroshev wrote: > On 2017/02/20 10:02:54, Peter Kasting wrote: > > On 2017/02/18 11:48:14, dyaroshev wrote: > > > On 2017/02/18 01:46:31, Peter Kasting wrote: > > > > This line seems like a functional change (it was adding > > > > \|pre_filter_item_count_\|), but the old code looks wrong to me and your new > > > code > > > > looks correct. (The old code was introduced as-is in > > > > https://codereview.chromium.org/2364553004 .) > > > > > > > > You might want to make this functional fix separately; I'm not sure. Mark > > > would > > > > understand better the consequence of overcounting the > > post_filter_item_count_, > > > > if that is indeed what this was doing. Presumably, not clearing the > cached > > > > results often enough, leading to lower result quality, but in what way I'm > > not > > > > sure. > > > > > > Moved this logic into TrimHistoryIdsPool and revert to the way it was > before. > > > > Ugh, don't to that. Now the code is as wrong as before but the wrongness is > far > > less obvious to the reader, so it's liable to stay broken. > > > > What you were doing here before is vastly better. > > > > To simplify this, I would pull out just changing post_filter_item_count_ to > > always add history_id_set.size() into its own CL that lands before this one, > and > > having mpearson review that. > > Other than creating a separate CL with this fix seems like I'm done. > That patch requires some digging - for now I have no idea why would pre/post > counts be member variables - seems like right now we'll never clean the cache (I > might be wrong - haven't looked at it too closely). New CL: https://codereview.chromium.org/2702413007/
210 const size_t kItemsToScoreLimit = 500;	235

211 if (history_id_set.size() > kItemsToScoreLimit) {

212 HistoryIDVector history_ids;

213 std::copy(history_id_set.begin(), history_id_set.end(),

214 std::back_inserter(history_ids));

215 // Trim down the set by sorting by typed-count, visit-count, and last

216 // visit.

217 HistoryItemFactorGreater item_factor_functor(history_info_map_);

218 std::partial_sort(history_ids.begin(),

219 history_ids.begin() + kItemsToScoreLimit,

220 history_ids.end(), item_factor_functor);

221 history_id_set.clear();

222 std::copy(history_ids.begin(), history_ids.begin() + kItemsToScoreLimit,

223 std::inserter(history_id_set, history_id_set.end()));

224 post_filter_item_count_ += history_id_set.size();

225 } else {

226 post_filter_item_count_ += pre_filter_item_count_;

227 }

228 ScoredHistoryMatches temp_scored_items;	236 ScoredHistoryMatches temp_scored_items;

229 HistoryIdSetToScoredMatches(history_id_set, lower_raw_string,	237 HistoryIdsToScoredMatches(std::move(history_ids), lower_raw_string,

230 template_url_service, bookmark_model,	238 template_url_service, bookmark_model,

231 &temp_scored_items);	239 &temp_scored_items);

232 scored_items.insert(scored_items.end(), temp_scored_items.begin(),	240 scored_items.insert(scored_items.end(), temp_scored_items.begin(),

233 temp_scored_items.end());	241 temp_scored_items.end());

234 }	242 }

235 // Select and sort only the top \|max_matches\| results.	243 // Select and sort only the top \|max_matches\| results.

236 if (scored_items.size() > max_matches) {	244 if (scored_items.size() > max_matches) {

237 std::partial_sort(scored_items.begin(), scored_items.begin() + max_matches,	245 std::partial_sort(scored_items.begin(), scored_items.begin() + max_matches,

238 scored_items.end(),	246 scored_items.end(),

239 ScoredHistoryMatch::MatchScoreGreater);	247 ScoredHistoryMatch::MatchScoreGreater);

240 scored_items.resize(max_matches);	248 scored_items.resize(max_matches);

241 } else {	249 } else {

(...skipping 235 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
477 // post_scoring_item_count_	485 // post_scoring_item_count_

478 }	486 }

479	487

480 bool URLIndexPrivateData::Empty() const {	488 bool URLIndexPrivateData::Empty() const {

481 return history_info_map_.empty();	489 return history_info_map_.empty();

482 }	490 }

483	491

484 void URLIndexPrivateData::Clear() {	492 void URLIndexPrivateData::Clear() {

485 last_time_rebuilt_from_history_ = base::Time();	493 last_time_rebuilt_from_history_ = base::Time();

486 word_list_.clear();	494 word_list_.clear();

487 available_words_.clear();	495 available_words_ = {};

488 word_map_.clear();	496 word_map_.clear();

489 char_word_map_.clear();	497 char_word_map_.clear();

490 word_id_history_map_.clear();	498 word_id_history_map_.clear();

491 history_id_word_map_.clear();	499 history_id_word_map_.clear();

492 history_info_map_.clear();	500 history_info_map_.clear();

493 word_starts_map_.clear();	501 word_starts_map_.clear();

494 }	502 }

495	503

496 URLIndexPrivateData::~URLIndexPrivateData() {}	504 URLIndexPrivateData::~URLIndexPrivateData() {}

497	505

498 HistoryIDSet URLIndexPrivateData::HistoryIDSetFromWords(	506 HistoryIDVector URLIndexPrivateData::HistoryIDsFromWords(

499 const String16Vector& unsorted_words) {	507 const String16Vector& unsorted_words) {

	508 // The name of the histogram wasn't changed to HistoryIdsFromWords to preserve

	509 // old data valid.
	Peter Kasting 2017/02/18 01:46:31 Should we worry about this? Maybe it's OK to just Should we worry about this? Maybe it's OK to just change this? Ask Mark. dyaroshev 2017/02/18 11:48:14 Well, it's for you mostly - I don't have access to Show quoted text On 2017/02/18 01:46:31, Peter Kasting wrote: > Should we worry about this? Maybe it's OK to just change this? Ask Mark. Well, it's for you mostly - I don't have access to your histograms. @Mark? Mark P 2017/02/24 00:34:16 Might as well keep the name of the histogram to ke Show quoted text On 2017/02/18 11:48:14, dyaroshev wrote: > On 2017/02/18 01:46:31, Peter Kasting wrote: > > Should we worry about this? Maybe it's OK to just change this? Ask Mark. > > Well, it's for you mostly - I don't have access to your histograms. > @Mark? Might as well keep the name of the histogram to keep history being valid. These names are close enough that one is discoverable if the user is searching for the other.
500 SCOPED_UMA_HISTOGRAM_TIMER("Omnibox.HistoryQuickHistoryIDSetFromWords");	510 SCOPED_UMA_HISTOGRAM_TIMER("Omnibox.HistoryQuickHistoryIDSetFromWords");

501 // Break the terms down into individual terms (words), get the candidate	511 // Break the terms down into individual terms (words), get the candidate

502 // set for each term, and intersect each to get a final candidate list.	512 // set for each term, and intersect each to get a final candidate list.

503 // Note that a single 'term' from the user's perspective might be	513 // Note that a single 'term' from the user's perspective might be

504 // a string like "http://www.somewebsite.com" which, from our perspective,	514 // a string like "http://www.somewebsite.com" which, from our perspective,

505 // is four words: 'http', 'www', 'somewebsite', and 'com'.	515 // is four words: 'http', 'www', 'somewebsite', and 'com'.

506 HistoryIDSet history_id_set;	516 HistoryIDVector history_ids;
	dyaroshev 2017/02/18 01:22:14 Unused variable. Unused variable. dyaroshev 2017/02/18 11:48:14 Done. Show quoted text On 2017/02/18 01:22:14, dyaroshev wrote: > Unused variable. Done.
507 String16Vector words(unsorted_words);	517 String16Vector words(unsorted_words);

508 // Sort the words into the longest first as such are likely to narrow down	518 // Sort the words into the longest first as such are likely to narrow down

509 // the results quicker. Also, single character words are the most expensive	519 // the results quicker. Also, single character words are the most expensive

510 // to process so save them for last.	520 // to process so save them for last.

511 std::sort(words.begin(), words.end(), LengthGreater);	521 std::sort(words.begin(), words.end(), LengthGreater);

512 for (String16Vector::iterator iter = words.begin(); iter != words.end();	522

513 ++iter) {	523 return IntersectSets<HistoryIDVector>(

514 base::string16 uni_word = *iter;	524 words.begin(), words.end(),

515 HistoryIDSet term_history_set = HistoryIDsForTerm(uni_word);	525 [this](const base::string16& word) { return HistoryIDsForTerm(word); });

516 if (term_history_set.empty()) {	526 }

517 history_id_set.clear();	527

518 break;	528 void URLIndexPrivateData::TrimHistoryIdsPool(

519 }	529 HistoryIDVector* history_ids) const {

520 if (iter == words.begin()) {	530 constexpr size_t kItemsToScoreLimit = 500;

521 history_id_set.swap(term_history_set);	531
	Peter Kasting 2017/02/18 01:46:31 Nit: No blank line Nit: No blank line dyaroshev 2017/02/18 11:48:14 Done. Show quoted text On 2017/02/18 01:46:31, Peter Kasting wrote: > Nit: No blank line Done.
522 } else {	532 if (history_ids->size() < kItemsToScoreLimit)

523 HistoryIDSet new_history_id_set = base::STLSetIntersection<HistoryIDSet>(	533 return;

524 history_id_set, term_history_set);	534

525 history_id_set.swap(new_history_id_set);	535 // Trim down the set by sorting by typed-count, visit-count, and last

526 }	536 // visit.

527 }	537 auto new_end = history_ids->begin() + kItemsToScoreLimit;

528 return history_id_set;	538 HistoryItemFactorGreater item_factor_functor(history_info_map_);

	539

	540 std::nth_element(history_ids->begin(), new_end, history_ids->end(),

	541 item_factor_functor);

	542 history_ids->erase(new_end, history_ids->end());

529 }	543 }

530	544

531 HistoryIDSet URLIndexPrivateData::HistoryIDsForTerm(	545 HistoryIDSet URLIndexPrivateData::HistoryIDsForTerm(

532 const base::string16& term) {	546 const base::string16& term) {

533 if (term.empty())	547 if (term.empty())

534 return HistoryIDSet();	548 return HistoryIDSet();

535	549

536 // TODO(mrossetti): Consider optimizing for very common terms such as	550 // TODO(mrossetti): Consider optimizing for very common terms such as

537 // 'http[s]', 'www', 'com', etc. Or collect the top 100 more frequently	551 // 'http[s]', 'www', 'com', etc. Or collect the top 100 more frequently

538 // occuring words in the user's searches.	552 // occuring words in the user's searches.

(...skipping 46 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
585	599

586 // Reduce the word set with any leftover, unprocessed characters.	600 // Reduce the word set with any leftover, unprocessed characters.

587 if (!unique_chars.empty()) {	601 if (!unique_chars.empty()) {

588 WordIDSet leftover_set(WordIDSetForTermChars(unique_chars));	602 WordIDSet leftover_set(WordIDSetForTermChars(unique_chars));

589 // We might come up empty on the leftovers.	603 // We might come up empty on the leftovers.

590 if (leftover_set.empty()) {	604 if (leftover_set.empty()) {

591 search_term_cache_[term] = SearchTermCacheItem();	605 search_term_cache_[term] = SearchTermCacheItem();

592 return HistoryIDSet();	606 return HistoryIDSet();

593 }	607 }

594 // Or there may not have been a prefix from which to start.	608 // Or there may not have been a prefix from which to start.

595 if (prefix_chars.empty()) {	609 word_id_set = prefix_chars.empty() ? std::move(leftover_set)

596 word_id_set.swap(leftover_set);	610 : base::STLSetIntersection<WordIDSet>(

597 } else {	611 word_id_set, leftover_set);

598 WordIDSet new_word_id_set = base::STLSetIntersection<WordIDSet>(

599 word_id_set, leftover_set);

600 word_id_set.swap(new_word_id_set);

601 }

602 }	612 }

603	613

604 // We must filter the word list because the resulting word set surely	614 // We must filter the word list because the resulting word set surely

605 // contains words which do not have the search term as a proper subset.	615 // contains words which do not have the search term as a proper subset.

606 for (WordIDSet::iterator word_set_iter = word_id_set.begin();	616 for (WordIDSet::iterator word_set_iter = word_id_set.begin();

607 word_set_iter != word_id_set.end(); ) {	617 word_set_iter != word_id_set.end(); ) {

608 if (word_list_[*word_set_iter].find(term) == base::string16::npos)	618 if (word_list_[*word_set_iter].find(term) == base::string16::npos)

609 word_set_iter = word_id_set.erase(word_set_iter);	619 word_set_iter = word_id_set.erase(word_set_iter);

610 else	620 else

611 ++word_set_iter;	621 ++word_set_iter;

612 }	622 }

613 } else {	623 } else {

614 word_id_set = WordIDSetForTermChars(Char16SetFromString16(term));	624 word_id_set = WordIDSetForTermChars(Char16SetFromString16(term));

615 }	625 }

616	626

617 // If any words resulted then we can compose a set of history IDs by unioning	627 // If any words resulted then we can compose a set of history IDs by unioning

618 // the sets from each word.	628 // the sets from each word.

619 HistoryIDSet history_id_set;	629 HistoryIDSet history_id_set;

620 if (!word_id_set.empty()) {	630 if (!word_id_set.empty()) {

621 for (WordIDSet::iterator word_id_iter = word_id_set.begin();	631 for (WordID word_id : word_id_set) {

622 word_id_iter != word_id_set.end(); ++word_id_iter) {

623 WordID word_id = *word_id_iter;

624 WordIDHistoryMap::iterator word_iter = word_id_history_map_.find(word_id);	632 WordIDHistoryMap::iterator word_iter = word_id_history_map_.find(word_id);

625 if (word_iter != word_id_history_map_.end()) {	633 if (word_iter != word_id_history_map_.end()) {

626 HistoryIDSet& word_history_id_set(word_iter->second);	634 HistoryIDSet& word_history_id_set(word_iter->second);

627 history_id_set.insert(word_history_id_set.begin(),	635 history_id_set.insert(word_history_id_set.begin(),

628 word_history_id_set.end());	636 word_history_id_set.end());

629 }	637 }

630 }	638 }

631 }	639 }

632	640

633 // Record a new cache entry for this word if the term is longer than	641 // Record a new cache entry for this word if the term is longer than

634 // a single character.	642 // a single character.

635 if (term_length > 1)	643 if (term_length > 1)

636 search_term_cache_[term] = SearchTermCacheItem(word_id_set, history_id_set);	644 search_term_cache_[term] = SearchTermCacheItem(word_id_set, history_id_set);

637	645

638 return history_id_set;	646 return history_id_set;

639 }	647 }

640	648

641 WordIDSet URLIndexPrivateData::WordIDSetForTermChars(	649 WordIDSet URLIndexPrivateData::WordIDSetForTermChars(

642 const Char16Set& term_chars) {	650 const Char16Set& term_chars) {

643 WordIDSet word_id_set;	651 return IntersectSets<WordIDSet>(term_chars.begin(), term_chars.end(),

644 for (Char16Set::const_iterator c_iter = term_chars.begin();	652 [&](base::char16 c) {

645 c_iter != term_chars.end(); ++c_iter) {	653 auto char_iter = char_word_map_.find(c);

646 CharWordIDMap::iterator char_iter = char_word_map_.find(*c_iter);	654 if (char_iter == char_word_map_.end())

647 if (char_iter == char_word_map_.end()) {	655 return WordIDSet();

648 // A character was not found so there are no matching results: bail.	656 return char_iter->second;
	Peter Kasting 2017/02/18 01:46:31 Nit: Shorter: auto GetSet = [this](base::char16 Nit: Shorter: auto GetSet = [this](base::char16 c) { auto char_iter = char_word_map_.find(c); return char_iter == char_word_map_.end() ? WordIDSet() : char_iter->second; } return IntersectSets<WordIDSet>(term_chars.begin(), term_chars.end(), GetSet); dyaroshev 2017/02/18 11:48:13 Done. Show quoted text On 2017/02/18 01:46:31, Peter Kasting wrote: > Nit: Shorter: > > auto GetSet = [this](base::char16 c) { > auto char_iter = char_word_map_.find(c); > return char_iter == char_word_map_.end() ? WordIDSet() : char_iter->second; > } > return IntersectSets<WordIDSet>(term_chars.begin(), term_chars.end(), GetSet); Done.
649 word_id_set.clear();	657 });

650 break;

651 }

652 WordIDSet& char_word_id_set(char_iter->second);

653 // It is possible for there to no longer be any words associated with

654 // a particular character. Give up in that case.

655 if (char_word_id_set.empty()) {

656 word_id_set.clear();

657 break;

658 }

659

660 if (c_iter == term_chars.begin()) {

661 // First character results becomes base set of results.

662 word_id_set = char_word_id_set;

663 } else {

664 // Subsequent character results get intersected in.

665 WordIDSet new_word_id_set = base::STLSetIntersection<WordIDSet>(

666 word_id_set, char_word_id_set);

667 word_id_set.swap(new_word_id_set);

668 }

669 }

670 return word_id_set;

671 }	658 }

672	659

673 void URLIndexPrivateData::HistoryIdSetToScoredMatches(	660 void URLIndexPrivateData::HistoryIdsToScoredMatches(

674 HistoryIDSet history_id_set,	661 HistoryIDVector history_ids,

675 const base::string16& lower_raw_string,	662 const base::string16& lower_raw_string,

676 const TemplateURLService* template_url_service,	663 const TemplateURLService* template_url_service,

677 bookmarks::BookmarkModel* bookmark_model,	664 bookmarks::BookmarkModel* bookmark_model,

678 ScoredHistoryMatches* scored_items) const {	665 ScoredHistoryMatches* scored_items) const {

679 if (history_id_set.empty())	666 if (history_ids.empty())

680 return;	667 return;

681	668

682 // Break up the raw search string (complete with escaped URL elements) into	669 // Break up the raw search string (complete with escaped URL elements) into

683 // 'terms' (as opposed to 'words'; see comment in HistoryItemsForTerms()).	670 // 'terms' (as opposed to 'words'; see comment in HistoryItemsForTerms()).

684 // We only want to break up the search string on 'true' whitespace rather than	671 // We only want to break up the search string on 'true' whitespace rather than

685 // escaped whitespace. For example, when the user types	672 // escaped whitespace. For example, when the user types

686 // "colspec=ID%20Mstone Release" we get two 'terms': "colspec=id%20mstone" and	673 // "colspec=ID%20Mstone Release" we get two 'terms': "colspec=id%20mstone" and

687 // "release".	674 // "release".

688 String16Vector lower_raw_terms =	675 String16Vector lower_raw_terms =

689 base::SplitString(lower_raw_string, base::kWhitespaceUTF16,	676 base::SplitString(lower_raw_string, base::kWhitespaceUTF16,

690 base::KEEP_WHITESPACE, base::SPLIT_WANT_NONEMPTY);	677 base::KEEP_WHITESPACE, base::SPLIT_WANT_NONEMPTY);

691	678

692 // Don't score matches when there are no terms to score against. (It's	679 // Don't score matches when there are no terms to score against. (It's

693 // possible that the word break iterater that extracts words to search for in	680 // possible that the word break iterater that extracts words to search for in

694 // the database allows some whitespace "words" whereas SplitString excludes a	681 // the database allows some whitespace "words" whereas SplitString excludes a

695 // long list of whitespace.) One could write a scoring function that gives a	682 // long list of whitespace.) One could write a scoring function that gives a

696 // reasonable order to matches when there are no terms (i.e., all the words	683 // reasonable order to matches when there are no terms (i.e., all the words

697 // are some form of whitespace), but this is such a rare edge case that it's	684 // are some form of whitespace), but this is such a rare edge case that it's

698 // not worth the time.	685 // not worth the time.

699 if (lower_raw_terms.empty())	686 if (lower_raw_terms.empty())

700 return;	687 return;

701	688

702 WordStarts lower_terms_to_word_starts_offsets;	689 WordStarts lower_terms_to_word_starts_offsets;

703 CalculateWordStartsOffsets(lower_raw_terms,	690 CalculateWordStartsOffsets(lower_raw_terms,

704 &lower_terms_to_word_starts_offsets);	691 &lower_terms_to_word_starts_offsets);

705	692

706 // Filter bad matches and other matches we don't want to display.	693 // Filter bad matches and other matches we don't want to display.

707 for (auto it = history_id_set.begin();;) {	694 auto filter = [this, template_url_service](const HistoryID history_id) {

708 it = std::find_if(it, history_id_set.end(),	695 return ShouldFilter(history_id, template_url_service);

709 [this, template_url_service](const HistoryID history_id) {	696 };

710 return ShouldFilter(history_id, template_url_service);	697 history_ids.erase(

711 });	698 std::remove_if(history_ids.begin(), history_ids.end(), filter),

712 if (it == history_id_set.end())	699 history_ids.end());
	Peter Kasting 2017/02/18 01:46:31 Nit: Pulling history_ids.end() out into a temp \|en Nit: Pulling history_ids.end() out into a temp \|end\| above this call saves one line on net (since then the whole call fits on one line), and should be safe since remove_if() won't invalidate end(). dyaroshev 2017/02/18 11:48:14 I don't like invalid iterators lying around and it Show quoted text On 2017/02/18 01:46:31, Peter Kasting wrote: > Nit: Pulling history_ids.end() out into a temp \|end\| above this call saves one > line on net (since then the whole call fits on one line), and should be safe > since remove_if() won't invalidate end(). I don't like invalid iterators lying around and it seems like I should declare begin than as well. I put remove_if on a separate line - is it ok? Peter Kasting 2017/02/20 10:02:54 That's a reasonable objection to have. Show quoted text On 2017/02/18 11:48:14, dyaroshev wrote: > On 2017/02/18 01:46:31, Peter Kasting wrote: > > Nit: Pulling history_ids.end() out into a temp \|end\| above this call saves one > > line on net (since then the whole call fits on one line), and should be safe > > since remove_if() won't invalidate end(). > > I don't like invalid iterators lying around That's a reasonable objection to have. Show quoted text > and it seems like I should declare > begin than as well. I feel no sense of "if I declare a temp for one concept I use, I have to declare a temp for every concept I use", so I don't resonate with this. Show quoted text > I put remove_if on a separate line - is it ok? What you've done now is just as long as what you had before, but has more temps, and an extra scope that makes us ask what's going out of scope and why the scoping matters. So it's more work for the brain to process. So I would consider it worse than before. If you dislike my suggestion, I would just not implement it :).
713 break;

714 it = history_id_set.erase(it);

715 }

716	700

717 // Score the matches.	701 // Score the matches.

718 const size_t num_matches = history_id_set.size();	702 const size_t num_matches = history_ids.size();

719 const base::Time now = base::Time::Now();	703 const base::Time now = base::Time::Now();

720 std::transform(	704 std::transform(

721 history_id_set.begin(), history_id_set.end(),	705 history_ids.begin(), history_ids.end(), std::back_inserter(*scored_items),

722 std::back_inserter(*scored_items), [&](const HistoryID history_id) {	706 [&](const HistoryID history_id) {

723 auto hist_pos = history_info_map_.find(history_id);	707 auto hist_pos = history_info_map_.find(history_id);

724 const history::URLRow& hist_item = hist_pos->second.url_row;	708 const history::URLRow& hist_item = hist_pos->second.url_row;

725 auto starts_pos = word_starts_map_.find(history_id);	709 auto starts_pos = word_starts_map_.find(history_id);

726 DCHECK(starts_pos != word_starts_map_.end());	710 DCHECK(starts_pos != word_starts_map_.end());

727 return ScoredHistoryMatch(	711 return ScoredHistoryMatch(

728 hist_item, hist_pos->second.visits, lower_raw_string,	712 hist_item, hist_pos->second.visits, lower_raw_string,

729 lower_raw_terms, lower_terms_to_word_starts_offsets,	713 lower_raw_terms, lower_terms_to_word_starts_offsets,

730 starts_pos->second,	714 starts_pos->second,

731 bookmark_model && bookmark_model->IsBookmarked(hist_item.url()),	715 bookmark_model && bookmark_model->IsBookmarked(hist_item.url()),

732 num_matches, now);	716 num_matches, now);

(...skipping 90 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
823 HistoryID history_id = static_cast<HistoryID>(row.id());	807 HistoryID history_id = static_cast<HistoryID>(row.id());

824 // Split URL into individual, unique words then add in the title words.	808 // Split URL into individual, unique words then add in the title words.

825 const GURL& gurl(row.url());	809 const GURL& gurl(row.url());

826 const base::string16& url =	810 const base::string16& url =

827 bookmarks::CleanUpUrlForMatching(gurl, nullptr);	811 bookmarks::CleanUpUrlForMatching(gurl, nullptr);

828 String16Set url_words = String16SetFromString16(url,	812 String16Set url_words = String16SetFromString16(url,

829 word_starts ? &word_starts->url_word_starts_ : nullptr);	813 word_starts ? &word_starts->url_word_starts_ : nullptr);

830 const base::string16& title = bookmarks::CleanUpTitleForMatching(row.title());	814 const base::string16& title = bookmarks::CleanUpTitleForMatching(row.title());

831 String16Set title_words = String16SetFromString16(title,	815 String16Set title_words = String16SetFromString16(title,

832 word_starts ? &word_starts->title_word_starts_ : nullptr);	816 word_starts ? &word_starts->title_word_starts_ : nullptr);

833 String16Set words = base::STLSetUnion<String16Set>(url_words, title_words);	817 for (const auto& word :

834 for (String16Set::iterator word_iter = words.begin();	818 base::STLSetUnion<String16Set>(url_words, title_words))

835 word_iter != words.end(); ++word_iter)	819 AddWordToIndex(word, history_id);

836 AddWordToIndex(*word_iter, history_id);

837	820

838 search_term_cache_.clear(); // Invalidate the term cache.	821 search_term_cache_.clear(); // Invalidate the term cache.

839 }	822 }

840	823

841 void URLIndexPrivateData::AddWordToIndex(const base::string16& term,	824 void URLIndexPrivateData::AddWordToIndex(const base::string16& term,

842 HistoryID history_id) {	825 HistoryID history_id) {

843 WordMap::iterator word_pos = word_map_.find(term);	826 WordMap::iterator word_pos = word_map_.lower_bound(term);

844 if (word_pos != word_map_.end())	827

845 UpdateWordHistory(word_pos->second, history_id);	828 // Adding a new word (i.e. a word that is not already in the word index).

846 else	829 if (word_pos->first != term) {

847 AddWordHistory(term, history_id);	830 word_pos =

	831 word_map_.emplace_hint(word_pos, term, AddNewWordToWordList(term));

	832

	833 // For each character in the newly added word add the word to the character

	834 // index.

	835 for (base::char16 uni_char : Char16SetFromString16(term))

	836 char_word_map_[uni_char].insert(word_pos->second);

	837 }

	838

	839 DCHECK_EQ(word_pos->first, term);

	840

	841 word_id_history_map_[word_pos->second].insert(history_id);

	842 history_id_word_map_[history_id].insert(word_pos->second);

848 }	843 }

849	844

850 void URLIndexPrivateData::AddWordHistory(const base::string16& term,	845 WordID URLIndexPrivateData::AddNewWordToWordList(const base::string16& term) {

851 HistoryID history_id) {

852 WordID word_id = word_list_.size();	846 WordID word_id = word_list_.size();

853 if (available_words_.empty()) {	847 if (available_words_.empty()) {

854 word_list_.push_back(term);	848 word_list_.push_back(term);

855 } else {	849 return word_id;

856 word_id = *(available_words_.begin());

857 word_list_[word_id] = term;

858 available_words_.erase(word_id);

859 }	850 }

860 word_map_[term] = word_id;

861	851

862 HistoryIDSet history_id_set;	852 word_id = available_words_.top();

863 history_id_set.insert(history_id);	853 available_words_.pop();

864 word_id_history_map_[word_id] = history_id_set;	854 return word_id;

865 AddToHistoryIDWordMap(history_id, word_id);

866

867 // For each character in the newly added word (i.e. a word that is not

868 // already in the word index), add the word to the character index.

869 Char16Set characters = Char16SetFromString16(term);

870 for (Char16Set::iterator uni_char_iter = characters.begin();

871 uni_char_iter != characters.end(); ++uni_char_iter) {

872 base::char16 uni_char = *uni_char_iter;

873 CharWordIDMap::iterator char_iter = char_word_map_.find(uni_char);

874 if (char_iter != char_word_map_.end()) {

875 // Update existing entry in the char/word index.

876 WordIDSet& word_id_set(char_iter->second);

877 word_id_set.insert(word_id);

878 } else {

879 // Create a new entry in the char/word index.

880 WordIDSet word_id_set;

881 word_id_set.insert(word_id);

882 char_word_map_[uni_char] = word_id_set;

883 }

884 }

885 }

886

887 void URLIndexPrivateData::UpdateWordHistory(WordID word_id,

888 HistoryID history_id) {

889 WordIDHistoryMap::iterator history_pos = word_id_history_map_.find(word_id);

890 DCHECK(history_pos != word_id_history_map_.end());

891 HistoryIDSet& history_id_set(history_pos->second);

892 history_id_set.insert(history_id);

893 AddToHistoryIDWordMap(history_id, word_id);

894 }

895

896 void URLIndexPrivateData::AddToHistoryIDWordMap(HistoryID history_id,

897 WordID word_id) {

898 HistoryIDWordMap::iterator iter = history_id_word_map_.find(history_id);

899 if (iter != history_id_word_map_.end()) {

900 WordIDSet& word_id_set(iter->second);

901 word_id_set.insert(word_id);

902 } else {

903 WordIDSet word_id_set;

904 word_id_set.insert(word_id);

905 history_id_word_map_[history_id] = word_id_set;

906 }

907 }	855 }

908	856

909 void URLIndexPrivateData::RemoveRowFromIndex(const history::URLRow& row) {	857 void URLIndexPrivateData::RemoveRowFromIndex(const history::URLRow& row) {

910 RemoveRowWordsFromIndex(row);	858 RemoveRowWordsFromIndex(row);

911 HistoryID history_id = static_cast<HistoryID>(row.id());	859 HistoryID history_id = static_cast<HistoryID>(row.id());

912 history_info_map_.erase(history_id);	860 history_info_map_.erase(history_id);

913 word_starts_map_.erase(history_id);	861 word_starts_map_.erase(history_id);

914 }	862 }

915	863

916 void URLIndexPrivateData::RemoveRowWordsFromIndex(const history::URLRow& row) {	864 void URLIndexPrivateData::RemoveRowWordsFromIndex(const history::URLRow& row) {

917 // Remove the entries in history_id_word_map_ and word_id_history_map_ for	865 // Remove the entries in history_id_word_map_ and word_id_history_map_ for

918 // this row.	866 // this row.

919 HistoryID history_id = static_cast<HistoryID>(row.id());	867 HistoryID history_id = static_cast<HistoryID>(row.id());

920 WordIDSet word_id_set = history_id_word_map_[history_id];	868 WordIDSet word_id_set = history_id_word_map_[history_id];

921 history_id_word_map_.erase(history_id);	869 history_id_word_map_.erase(history_id);

922	870

923 // Reconcile any changes to word usage.	871 // Reconcile any changes to word usage.

924 for (WordIDSet::iterator word_id_iter = word_id_set.begin();	872 for (WordID word_id : word_id_set) {

925 word_id_iter != word_id_set.end(); ++word_id_iter) {	873 auto word_id_history_map_iter = word_id_history_map_.find(word_id);

926 WordID word_id = *word_id_iter;	874 DCHECK(word_id_history_map_iter != word_id_history_map_.end());

927 word_id_history_map_[word_id].erase(history_id);	875

928 if (!word_id_history_map_[word_id].empty())	876 word_id_history_map_iter->second.erase(history_id);

929 continue; // The word is still in use.	877 if (!word_id_history_map_iter->second.empty())

	878 continue;

930	879

931 // The word is no longer in use. Reconcile any changes to character usage.	880 // The word is no longer in use. Reconcile any changes to character usage.

932 base::string16 word = word_list_[word_id];	881 base::string16 word = word_list_[word_id];

933 Char16Set characters = Char16SetFromString16(word);	882 for (base::char16 uni_char : Char16SetFromString16(word)) {

934 for (Char16Set::iterator uni_char_iter = characters.begin();	883 auto char_word_map_iter = char_word_map_.find(uni_char);

935 uni_char_iter != characters.end(); ++uni_char_iter) {	884 char_word_map_iter->second.erase(word_id);

936 base::char16 uni_char = *uni_char_iter;	885 if (char_word_map_iter->second.empty())

937 char_word_map_[uni_char].erase(word_id);	886 char_word_map_.erase(char_word_map_iter);

938 if (char_word_map_[uni_char].empty())

939 char_word_map_.erase(uni_char); // No longer in use.

940 }	887 }

941	888

942 // Complete the removal of references to the word.	889 // Complete the removal of references to the word.

943 word_id_history_map_.erase(word_id);	890 word_id_history_map_.erase(word_id_history_map_iter);

944 word_map_.erase(word);	891 word_map_.erase(word);

945 word_list_[word_id] = base::string16();	892 word_list_[word_id] = base::string16();

946 available_words_.insert(word_id);	893 available_words_.push(word_id);

947 }	894 }

948 }	895 }

949	896

950 void URLIndexPrivateData::ResetSearchTermCache() {	897 void URLIndexPrivateData::ResetSearchTermCache() {

951 for (SearchTermCacheMap::iterator iter = search_term_cache_.begin();	898 for (auto& item : search_term_cache_)

952 iter != search_term_cache_.end(); ++iter)	899 item.second.used_ = false;

953 iter->second.used_ = false;

954 }	900 }

955	901

956 bool URLIndexPrivateData::SaveToFile(const base::FilePath& file_path) {	902 bool URLIndexPrivateData::SaveToFile(const base::FilePath& file_path) {

957 base::TimeTicks beginning_time = base::TimeTicks::Now();	903 base::TimeTicks beginning_time = base::TimeTicks::Now();

958 InMemoryURLIndexCacheItem index_cache;	904 InMemoryURLIndexCacheItem index_cache;

959 SavePrivateData(&index_cache);	905 SavePrivateData(&index_cache);

960 std::string data;	906 std::string data;

961 if (!index_cache.SerializeToString(&data)) {	907 if (!index_cache.SerializeToString(&data)) {

962 LOG(WARNING) << "Failed to serialize the InMemoryURLIndex cache.";	908 LOG(WARNING) << "Failed to serialize the InMemoryURLIndex cache.";

963 return false;	909 return false;

(...skipping 24 matching lines...) Expand all Loading...
988 SaveWordIDHistoryMap(cache);	934 SaveWordIDHistoryMap(cache);

989 SaveHistoryInfoMap(cache);	935 SaveHistoryInfoMap(cache);

990 SaveWordStartsMap(cache);	936 SaveWordStartsMap(cache);

991 }	937 }

992	938

993 void URLIndexPrivateData::SaveWordList(InMemoryURLIndexCacheItem* cache) const {	939 void URLIndexPrivateData::SaveWordList(InMemoryURLIndexCacheItem* cache) const {

994 if (word_list_.empty())	940 if (word_list_.empty())

995 return;	941 return;

996 WordListItem* list_item = cache->mutable_word_list();	942 WordListItem* list_item = cache->mutable_word_list();

997 list_item->set_word_count(word_list_.size());	943 list_item->set_word_count(word_list_.size());

998 for (String16Vector::const_iterator iter = word_list_.begin();	944 for (const base::string16& word : word_list_)

999 iter != word_list_.end(); ++iter)	945 list_item->add_word(base::UTF16ToUTF8(word));

1000 list_item->add_word(base::UTF16ToUTF8(*iter));

1001 }	946 }

1002	947

1003 void URLIndexPrivateData::SaveWordMap(InMemoryURLIndexCacheItem* cache) const {	948 void URLIndexPrivateData::SaveWordMap(InMemoryURLIndexCacheItem* cache) const {

1004 if (word_map_.empty())	949 if (word_map_.empty())

1005 return;	950 return;

1006 WordMapItem* map_item = cache->mutable_word_map();	951 WordMapItem* map_item = cache->mutable_word_map();

1007 map_item->set_item_count(word_map_.size());	952 map_item->set_item_count(word_map_.size());

1008 for (WordMap::const_iterator iter = word_map_.begin();	953 for (const auto& elem : word_map_) {

1009 iter != word_map_.end(); ++iter) {

1010 WordMapEntry* map_entry = map_item->add_word_map_entry();	954 WordMapEntry* map_entry = map_item->add_word_map_entry();

1011 map_entry->set_word(base::UTF16ToUTF8(iter->first));	955 map_entry->set_word(base::UTF16ToUTF8(elem.first));

1012 map_entry->set_word_id(iter->second);	956 map_entry->set_word_id(elem.second);

1013 }	957 }

1014 }	958 }

1015	959

1016 void URLIndexPrivateData::SaveCharWordMap(	960 void URLIndexPrivateData::SaveCharWordMap(

1017 InMemoryURLIndexCacheItem* cache) const {	961 InMemoryURLIndexCacheItem* cache) const {

1018 if (char_word_map_.empty())	962 if (char_word_map_.empty())

1019 return;	963 return;

1020 CharWordMapItem* map_item = cache->mutable_char_word_map();	964 CharWordMapItem* map_item = cache->mutable_char_word_map();

1021 map_item->set_item_count(char_word_map_.size());	965 map_item->set_item_count(char_word_map_.size());

1022 for (CharWordIDMap::const_iterator iter = char_word_map_.begin();	966 for (const auto& entry : char_word_map_) {

1023 iter != char_word_map_.end(); ++iter) {

1024 CharWordMapEntry* map_entry = map_item->add_char_word_map_entry();	967 CharWordMapEntry* map_entry = map_item->add_char_word_map_entry();

1025 map_entry->set_char_16(iter->first);	968 map_entry->set_char_16(entry.first);

1026 const WordIDSet& word_id_set(iter->second);	969 const WordIDSet& word_id_set(entry.second);

1027 map_entry->set_item_count(word_id_set.size());	970 map_entry->set_item_count(word_id_set.size());

1028 for (WordIDSet::const_iterator set_iter = word_id_set.begin();	971 for (WordID word_id : word_id_set)

1029 set_iter != word_id_set.end(); ++set_iter)	972 map_entry->add_word_id(word_id);

1030 map_entry->add_word_id(*set_iter);

1031 }	973 }

1032 }	974 }

1033	975

1034 void URLIndexPrivateData::SaveWordIDHistoryMap(	976 void URLIndexPrivateData::SaveWordIDHistoryMap(

1035 InMemoryURLIndexCacheItem* cache) const {	977 InMemoryURLIndexCacheItem* cache) const {

1036 if (word_id_history_map_.empty())	978 if (word_id_history_map_.empty())

1037 return;	979 return;

1038 WordIDHistoryMapItem* map_item = cache->mutable_word_id_history_map();	980 WordIDHistoryMapItem* map_item = cache->mutable_word_id_history_map();

1039 map_item->set_item_count(word_id_history_map_.size());	981 map_item->set_item_count(word_id_history_map_.size());

1040 for (WordIDHistoryMap::const_iterator iter = word_id_history_map_.begin();	982 for (const auto& entry : word_id_history_map_) {

1041 iter != word_id_history_map_.end(); ++iter) {

1042 WordIDHistoryMapEntry* map_entry =	983 WordIDHistoryMapEntry* map_entry =

1043 map_item->add_word_id_history_map_entry();	984 map_item->add_word_id_history_map_entry();

1044 map_entry->set_word_id(iter->first);	985 map_entry->set_word_id(entry.first);

1045 const HistoryIDSet& history_id_set(iter->second);	986 const HistoryIDSet& history_id_set(entry.second);

1046 map_entry->set_item_count(history_id_set.size());	987 map_entry->set_item_count(history_id_set.size());

1047 for (HistoryIDSet::const_iterator set_iter = history_id_set.begin();	988 for (HistoryID history_id : history_id_set)

1048 set_iter != history_id_set.end(); ++set_iter)	989 map_entry->add_history_id(history_id);

1049 map_entry->add_history_id(*set_iter);

1050 }	990 }

1051 }	991 }

1052	992

1053 void URLIndexPrivateData::SaveHistoryInfoMap(	993 void URLIndexPrivateData::SaveHistoryInfoMap(

1054 InMemoryURLIndexCacheItem* cache) const {	994 InMemoryURLIndexCacheItem* cache) const {

1055 if (history_info_map_.empty())	995 if (history_info_map_.empty())

1056 return;	996 return;

1057 HistoryInfoMapItem* map_item = cache->mutable_history_info_map();	997 HistoryInfoMapItem* map_item = cache->mutable_history_info_map();

1058 map_item->set_item_count(history_info_map_.size());	998 map_item->set_item_count(history_info_map_.size());

1059 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin();	999 for (const auto& entry : history_info_map_) {

1060 iter != history_info_map_.end(); ++iter) {

1061 HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry();	1000 HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry();

1062 map_entry->set_history_id(iter->first);	1001 map_entry->set_history_id(entry.first);

1063 const history::URLRow& url_row(iter->second.url_row);	1002 const history::URLRow& url_row(entry.second.url_row);

1064 // Note: We only save information that contributes to the index so there	1003 // Note: We only save information that contributes to the index so there

1065 // is no need to save search_term_cache_ (not persistent).	1004 // is no need to save search_term_cache_ (not persistent).

1066 map_entry->set_visit_count(url_row.visit_count());	1005 map_entry->set_visit_count(url_row.visit_count());

1067 map_entry->set_typed_count(url_row.typed_count());	1006 map_entry->set_typed_count(url_row.typed_count());

1068 map_entry->set_last_visit(url_row.last_visit().ToInternalValue());	1007 map_entry->set_last_visit(url_row.last_visit().ToInternalValue());

1069 map_entry->set_url(url_row.url().spec());	1008 map_entry->set_url(url_row.url().spec());

1070 map_entry->set_title(base::UTF16ToUTF8(url_row.title()));	1009 map_entry->set_title(base::UTF16ToUTF8(url_row.title()));

1071 const VisitInfoVector& visits(iter->second.visits);	1010 for (const auto& visit : entry.second.visits) {

1072 for (VisitInfoVector::const_iterator visit_iter = visits.begin();

1073 visit_iter != visits.end(); ++visit_iter) {

1074 HistoryInfoMapEntry_VisitInfo* visit_info = map_entry->add_visits();	1011 HistoryInfoMapEntry_VisitInfo* visit_info = map_entry->add_visits();

1075 visit_info->set_visit_time(visit_iter->first.ToInternalValue());	1012 visit_info->set_visit_time(visit.first.ToInternalValue());

1076 visit_info->set_transition_type(visit_iter->second);	1013 visit_info->set_transition_type(visit.second);

1077 }	1014 }

1078 }	1015 }

1079 }	1016 }

1080	1017

1081 void URLIndexPrivateData::SaveWordStartsMap(	1018 void URLIndexPrivateData::SaveWordStartsMap(

1082 InMemoryURLIndexCacheItem* cache) const {	1019 InMemoryURLIndexCacheItem* cache) const {

1083 if (word_starts_map_.empty())	1020 if (word_starts_map_.empty())

1084 return;	1021 return;

1085 // For unit testing: Enable saving of the cache as an earlier version to	1022 // For unit testing: Enable saving of the cache as an earlier version to

1086 // allow testing of cache file upgrading in ReadFromFile().	1023 // allow testing of cache file upgrading in ReadFromFile().

1087 // TODO(mrossetti): Instead of intruding on production code with this kind of	1024 // TODO(mrossetti): Instead of intruding on production code with this kind of

1088 // test harness, save a copy of an older version cache with known results.	1025 // test harness, save a copy of an older version cache with known results.

1089 // Implement this when switching the caching over to SQLite.	1026 // Implement this when switching the caching over to SQLite.

1090 if (saved_cache_version_ < 1)	1027 if (saved_cache_version_ < 1)

1091 return;	1028 return;

1092	1029

1093 WordStartsMapItem* map_item = cache->mutable_word_starts_map();	1030 WordStartsMapItem* map_item = cache->mutable_word_starts_map();

1094 map_item->set_item_count(word_starts_map_.size());	1031 map_item->set_item_count(word_starts_map_.size());

1095 for (WordStartsMap::const_iterator iter = word_starts_map_.begin();	1032 for (const auto& entry : word_starts_map_) {

1096 iter != word_starts_map_.end(); ++iter) {

1097 WordStartsMapEntry* map_entry = map_item->add_word_starts_map_entry();	1033 WordStartsMapEntry* map_entry = map_item->add_word_starts_map_entry();

1098 map_entry->set_history_id(iter->first);	1034 map_entry->set_history_id(entry.first);

1099 const RowWordStarts& word_starts(iter->second);	1035 const RowWordStarts& word_starts(entry.second);

1100 for (WordStarts::const_iterator i = word_starts.url_word_starts_.begin();	1036 for (auto url_word_start : word_starts.url_word_starts_)

1101 i != word_starts.url_word_starts_.end(); ++i)	1037 map_entry->add_url_word_starts(url_word_start);

1102 map_entry->add_url_word_starts(*i);	1038 for (auto title_word_start : word_starts.title_word_starts_)

1103 for (WordStarts::const_iterator i = word_starts.title_word_starts_.begin();	1039 map_entry->add_title_word_starts(title_word_start);

1104 i != word_starts.title_word_starts_.end(); ++i)

1105 map_entry->add_title_word_starts(*i);

1106 }	1040 }

1107 }	1041 }

1108	1042

1109 bool URLIndexPrivateData::RestorePrivateData(	1043 bool URLIndexPrivateData::RestorePrivateData(

1110 const InMemoryURLIndexCacheItem& cache) {	1044 const InMemoryURLIndexCacheItem& cache) {

1111 last_time_rebuilt_from_history_ =	1045 last_time_rebuilt_from_history_ =

1112 base::Time::FromInternalValue(cache.last_rebuild_timestamp());	1046 base::Time::FromInternalValue(cache.last_rebuild_timestamp());

1113 const base::TimeDelta rebuilt_ago =	1047 const base::TimeDelta rebuilt_ago =

1114 base::Time::Now() - last_time_rebuilt_from_history_;	1048 base::Time::Now() - last_time_rebuilt_from_history_;

1115 if ((rebuilt_ago > base::TimeDelta::FromDays(7)) \|\|	1049 if ((rebuilt_ago > base::TimeDelta::FromDays(7)) \|\|

(...skipping 22 matching lines...) Expand all Loading...
1138 bool URLIndexPrivateData::RestoreWordList(	1072 bool URLIndexPrivateData::RestoreWordList(

1139 const InMemoryURLIndexCacheItem& cache) {	1073 const InMemoryURLIndexCacheItem& cache) {

1140 if (!cache.has_word_list())	1074 if (!cache.has_word_list())

1141 return false;	1075 return false;

1142 const WordListItem& list_item(cache.word_list());	1076 const WordListItem& list_item(cache.word_list());

1143 uint32_t expected_item_count = list_item.word_count();	1077 uint32_t expected_item_count = list_item.word_count();

1144 uint32_t actual_item_count = list_item.word_size();	1078 uint32_t actual_item_count = list_item.word_size();

1145 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)	1079 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)

1146 return false;	1080 return false;

1147 const RepeatedPtrField<std::string>& words(list_item.word());	1081 const RepeatedPtrField<std::string>& words(list_item.word());

1148 for (RepeatedPtrField<std::string>::const_iterator iter = words.begin();	1082 word_list_.reserve(words.size());

1149 iter != words.end(); ++iter)	1083 std::transform(

1150 word_list_.push_back(base::UTF8ToUTF16(*iter));	1084 words.begin(), words.end(), std::back_inserter(word_list_),

	1085 [](const std::string& word) { return base::UTF8ToUTF16(word); });

1151 return true;	1086 return true;

1152 }	1087 }

1153	1088

1154 bool URLIndexPrivateData::RestoreWordMap(	1089 bool URLIndexPrivateData::RestoreWordMap(

1155 const InMemoryURLIndexCacheItem& cache) {	1090 const InMemoryURLIndexCacheItem& cache) {

1156 if (!cache.has_word_map())	1091 if (!cache.has_word_map())

1157 return false;	1092 return false;

1158 const WordMapItem& list_item(cache.word_map());	1093 const WordMapItem& list_item(cache.word_map());

1159 uint32_t expected_item_count = list_item.item_count();	1094 uint32_t expected_item_count = list_item.item_count();

1160 uint32_t actual_item_count = list_item.word_map_entry_size();	1095 uint32_t actual_item_count = list_item.word_map_entry_size();

1161 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)	1096 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)

1162 return false;	1097 return false;

1163 const RepeatedPtrField<WordMapEntry>& entries(list_item.word_map_entry());	1098 for (const auto& entry : list_item.word_map_entry())

1164 for (RepeatedPtrField<WordMapEntry>::const_iterator iter = entries.begin();	1099 word_map_[base::UTF8ToUTF16(entry.word())] = entry.word_id();

1165 iter != entries.end(); ++iter)	1100

1166 word_map_[base::UTF8ToUTF16(iter->word())] = iter->word_id();

1167 return true;	1101 return true;

1168 }	1102 }

1169	1103

1170 bool URLIndexPrivateData::RestoreCharWordMap(	1104 bool URLIndexPrivateData::RestoreCharWordMap(

1171 const InMemoryURLIndexCacheItem& cache) {	1105 const InMemoryURLIndexCacheItem& cache) {

1172 if (!cache.has_char_word_map())	1106 if (!cache.has_char_word_map())

1173 return false;	1107 return false;

1174 const CharWordMapItem& list_item(cache.char_word_map());	1108 const CharWordMapItem& list_item(cache.char_word_map());

1175 uint32_t expected_item_count = list_item.item_count();	1109 uint32_t expected_item_count = list_item.item_count();

1176 uint32_t actual_item_count = list_item.char_word_map_entry_size();	1110 uint32_t actual_item_count = list_item.char_word_map_entry_size();

1177 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)	1111 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)

1178 return false;	1112 return false;

1179 const RepeatedPtrField<CharWordMapEntry>&	1113

1180 entries(list_item.char_word_map_entry());	1114 for (const auto& entry : list_item.char_word_map_entry()) {

1181 for (RepeatedPtrField<CharWordMapEntry>::const_iterator iter =	1115 expected_item_count = entry.item_count();

1182 entries.begin(); iter != entries.end(); ++iter) {	1116 actual_item_count = entry.word_id_size();

1183 expected_item_count = iter->item_count();

1184 actual_item_count = iter->word_id_size();

1185 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)	1117 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)

1186 return false;	1118 return false;

1187 base::char16 uni_char = static_cast<base::char16>(iter->char_16());	1119 base::char16 uni_char = static_cast<base::char16>(entry.char_16());

1188 WordIDSet word_id_set;	1120 const RepeatedField<int32_t>& word_ids(entry.word_id());

1189 const RepeatedField<int32_t>& word_ids(iter->word_id());	1121 char_word_map_[uni_char] = {word_ids.begin(), word_ids.end()};

1190 for (RepeatedField<int32_t>::const_iterator jiter = word_ids.begin();

1191 jiter != word_ids.end(); ++jiter)

1192 word_id_set.insert(*jiter);

1193 char_word_map_[uni_char] = word_id_set;

1194 }	1122 }

1195 return true;	1123 return true;

1196 }	1124 }

1197	1125

1198 bool URLIndexPrivateData::RestoreWordIDHistoryMap(	1126 bool URLIndexPrivateData::RestoreWordIDHistoryMap(

1199 const InMemoryURLIndexCacheItem& cache) {	1127 const InMemoryURLIndexCacheItem& cache) {

1200 if (!cache.has_word_id_history_map())	1128 if (!cache.has_word_id_history_map())

1201 return false;	1129 return false;

1202 const WordIDHistoryMapItem& list_item(cache.word_id_history_map());	1130 const WordIDHistoryMapItem& list_item(cache.word_id_history_map());

1203 uint32_t expected_item_count = list_item.item_count();	1131 uint32_t expected_item_count = list_item.item_count();

1204 uint32_t actual_item_count = list_item.word_id_history_map_entry_size();	1132 uint32_t actual_item_count = list_item.word_id_history_map_entry_size();

1205 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)	1133 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)

1206 return false;	1134 return false;

1207 const RepeatedPtrField<WordIDHistoryMapEntry>&	1135 for (const auto& entry : list_item.word_id_history_map_entry()) {

1208 entries(list_item.word_id_history_map_entry());	1136 expected_item_count = entry.item_count();

1209 for (RepeatedPtrField<WordIDHistoryMapEntry>::const_iterator iter =	1137 actual_item_count = entry.history_id_size();

1210 entries.begin(); iter != entries.end(); ++iter) {

1211 expected_item_count = iter->item_count();

1212 actual_item_count = iter->history_id_size();

1213 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)	1138 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)

1214 return false;	1139 return false;

1215 WordID word_id = iter->word_id();	1140 WordID word_id = entry.word_id();

1216 HistoryIDSet history_id_set;	1141 const RepeatedField<int64_t>& history_ids(entry.history_id());

1217 const RepeatedField<int64_t>& history_ids(iter->history_id());	1142 word_id_history_map_[word_id] = {history_ids.begin(), history_ids.end()};

1218 for (RepeatedField<int64_t>::const_iterator jiter = history_ids.begin();	1143 for (HistoryID history_id : history_ids)

1219 jiter != history_ids.end(); ++jiter) {	1144 history_id_word_map_[history_id].insert(word_id);

1220 history_id_set.insert(*jiter);

1221 AddToHistoryIDWordMap(*jiter, word_id);

1222 }

1223 word_id_history_map_[word_id] = history_id_set;

1224 }	1145 }

1225 return true;	1146 return true;

1226 }	1147 }

1227	1148

1228 bool URLIndexPrivateData::RestoreHistoryInfoMap(	1149 bool URLIndexPrivateData::RestoreHistoryInfoMap(

1229 const InMemoryURLIndexCacheItem& cache) {	1150 const InMemoryURLIndexCacheItem& cache) {

1230 if (!cache.has_history_info_map())	1151 if (!cache.has_history_info_map())

1231 return false;	1152 return false;

1232 const HistoryInfoMapItem& list_item(cache.history_info_map());	1153 const HistoryInfoMapItem& list_item(cache.history_info_map());

1233 uint32_t expected_item_count = list_item.item_count();	1154 uint32_t expected_item_count = list_item.item_count();

1234 uint32_t actual_item_count = list_item.history_info_map_entry_size();	1155 uint32_t actual_item_count = list_item.history_info_map_entry_size();

1235 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)	1156 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)

1236 return false;	1157 return false;

1237 const RepeatedPtrField<HistoryInfoMapEntry>&	1158

1238 entries(list_item.history_info_map_entry());	1159 for (const auto& entry : list_item.history_info_map_entry()) {

1239 for (RepeatedPtrField<HistoryInfoMapEntry>::const_iterator iter =	1160 HistoryID history_id = entry.history_id();

1240 entries.begin(); iter != entries.end(); ++iter) {	1161 history::URLRow url_row(GURL(entry.url()), history_id);

1241 HistoryID history_id = iter->history_id();	1162 url_row.set_visit_count(entry.visit_count());

1242 GURL url(iter->url());	1163 url_row.set_typed_count(entry.typed_count());

1243 history::URLRow url_row(url, history_id);	1164 url_row.set_last_visit(base::Time::FromInternalValue(entry.last_visit()));

1244 url_row.set_visit_count(iter->visit_count());	1165 if (entry.has_title())

1245 url_row.set_typed_count(iter->typed_count());	1166 url_row.set_title(base::UTF8ToUTF16(entry.title()));

1246 url_row.set_last_visit(base::Time::FromInternalValue(iter->last_visit()));	1167 history_info_map_[history_id].url_row = std::move(url_row);

1247 if (iter->has_title()) {

1248 base::string16 title(base::UTF8ToUTF16(iter->title()));

1249 url_row.set_title(title);

1250 }

1251 history_info_map_[history_id].url_row = url_row;

1252	1168

1253 // Restore visits list.	1169 // Restore visits list.

1254 VisitInfoVector visits;	1170 VisitInfoVector visits;

1255 visits.reserve(iter->visits_size());	1171 visits.reserve(entry.visits_size());

1256 for (int i = 0; i < iter->visits_size(); ++i) {	1172 for (const auto& entry_visit : entry.visits()) {

1257 visits.push_back(std::make_pair(	1173 visits.emplace_back(

1258 base::Time::FromInternalValue(iter->visits(i).visit_time()),	1174 base::Time::FromInternalValue(entry_visit.visit_time()),

1259 ui::PageTransitionFromInt(iter->visits(i).transition_type())));	1175 ui::PageTransitionFromInt(entry_visit.transition_type()));

1260 }	1176 }

1261 history_info_map_[history_id].visits = visits;	1177 history_info_map_[history_id].visits = std::move(visits);

1262 }	1178 }

1263 return true;	1179 return true;

1264 }	1180 }

1265	1181

1266 bool URLIndexPrivateData::RestoreWordStartsMap(	1182 bool URLIndexPrivateData::RestoreWordStartsMap(

1267 const InMemoryURLIndexCacheItem& cache) {	1183 const InMemoryURLIndexCacheItem& cache) {

1268 // Note that this function must be called after RestoreHistoryInfoMap() has	1184 // Note that this function must be called after RestoreHistoryInfoMap() has

1269 // been run as the word starts may have to be recalculated from the urls and	1185 // been run as the word starts may have to be recalculated from the urls and

1270 // page titles.	1186 // page titles.

1271 if (cache.has_word_starts_map()) {	1187 if (cache.has_word_starts_map()) {

1272 const WordStartsMapItem& list_item(cache.word_starts_map());	1188 const WordStartsMapItem& list_item(cache.word_starts_map());

1273 uint32_t expected_item_count = list_item.item_count();	1189 uint32_t expected_item_count = list_item.item_count();

1274 uint32_t actual_item_count = list_item.word_starts_map_entry_size();	1190 uint32_t actual_item_count = list_item.word_starts_map_entry_size();

1275 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)	1191 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)

1276 return false;	1192 return false;

1277 const RepeatedPtrField<WordStartsMapEntry>&	1193 for (const auto& entry : list_item.word_starts_map_entry()) {

1278 entries(list_item.word_starts_map_entry());	1194 HistoryID history_id = entry.history_id();

1279 for (RepeatedPtrField<WordStartsMapEntry>::const_iterator iter =

1280 entries.begin(); iter != entries.end(); ++iter) {

1281 HistoryID history_id = iter->history_id();

1282 RowWordStarts word_starts;	1195 RowWordStarts word_starts;

1283 // Restore the URL word starts.	1196 // Restore the URL word starts.

1284 const RepeatedField<int32_t>& url_starts(iter->url_word_starts());	1197 const RepeatedField<int32_t>& url_starts(entry.url_word_starts());

1285 for (RepeatedField<int32_t>::const_iterator jiter = url_starts.begin();	1198 word_starts.url_word_starts_ = {url_starts.begin(), url_starts.end()};

1286 jiter != url_starts.end(); ++jiter)	1199

1287 word_starts.url_word_starts_.push_back(*jiter);

1288 // Restore the page title word starts.	1200 // Restore the page title word starts.

1289 const RepeatedField<int32_t>& title_starts(iter->title_word_starts());	1201 const RepeatedField<int32_t>& title_starts(entry.title_word_starts());

1290 for (RepeatedField<int32_t>::const_iterator jiter = title_starts.begin();	1202 word_starts.title_word_starts_ = {title_starts.begin(),

1291 jiter != title_starts.end(); ++jiter)	1203 title_starts.end()};

1292 word_starts.title_word_starts_.push_back(*jiter);	1204

1293 word_starts_map_[history_id] = word_starts;	1205 word_starts_map_[history_id] = std::move(word_starts);

1294 }	1206 }

1295 } else {	1207 } else {

1296 // Since the cache did not contain any word starts we must rebuild then from	1208 // Since the cache did not contain any word starts we must rebuild then from

1297 // the URL and page titles.	1209 // the URL and page titles.

1298 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin();	1210 for (const auto& entry : history_info_map_) {

1299 iter != history_info_map_.end(); ++iter) {

1300 RowWordStarts word_starts;	1211 RowWordStarts word_starts;

1301 const history::URLRow& row(iter->second.url_row);	1212 const history::URLRow& row(entry.second.url_row);

1302 const base::string16& url =	1213 const base::string16& url =

1303 bookmarks::CleanUpUrlForMatching(row.url(), nullptr);	1214 bookmarks::CleanUpUrlForMatching(row.url(), nullptr);

1304 String16VectorFromString16(url, false, &word_starts.url_word_starts_);	1215 String16VectorFromString16(url, false, &word_starts.url_word_starts_);

1305 const base::string16& title =	1216 const base::string16& title =

1306 bookmarks::CleanUpTitleForMatching(row.title());	1217 bookmarks::CleanUpTitleForMatching(row.title());

1307 String16VectorFromString16(title, false, &word_starts.title_word_starts_);	1218 String16VectorFromString16(title, false, &word_starts.title_word_starts_);

1308 word_starts_map_[iter->first] = word_starts;	1219 word_starts_map_[entry.first] = std::move(word_starts);

1309 }	1220 }

1310 }	1221 }

1311 return true;	1222 return true;

1312 }	1223 }

1313	1224

1314 // static	1225 // static

1315 bool URLIndexPrivateData::URLSchemeIsWhitelisted(	1226 bool URLIndexPrivateData::URLSchemeIsWhitelisted(

1316 const GURL& gurl,	1227 const GURL& gurl,

1317 const std::set<std::string>& whitelist) {	1228 const std::set<std::string>& whitelist) {

1318 return whitelist.find(gurl.scheme()) != whitelist.end();	1229 return whitelist.find(gurl.scheme()) != whitelist.end();

(...skipping 62 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1381 // First cut: typed count, visit count, recency.	1292 // First cut: typed count, visit count, recency.

1382 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks	1293 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks

1383 // recently visited (within the last 12/24 hours) as highly important. Get	1294 // recently visited (within the last 12/24 hours) as highly important. Get

1384 // input from mpearson.	1295 // input from mpearson.

1385 if (r1.typed_count() != r2.typed_count())	1296 if (r1.typed_count() != r2.typed_count())

1386 return (r1.typed_count() > r2.typed_count());	1297 return (r1.typed_count() > r2.typed_count());

1387 if (r1.visit_count() != r2.visit_count())	1298 if (r1.visit_count() != r2.visit_count())

1388 return (r1.visit_count() > r2.visit_count());	1299 return (r1.visit_count() > r2.visit_count());

1389 return (r1.last_visit() > r2.last_visit());	1300 return (r1.last_visit() > r2.last_visit());

1390 }	1301 }

OLD	NEW

« components/omnibox/browser/url_index_private_data.h ('K') | « components/omnibox/browser/url_index_private_data.h ('k') | no next file » | no next file with comments »