components/omnibox/browser/url_index_private_data.cc - Issue 2690303012: Cleaning up url_index_private_data and in_memory_url_index_types.

Side by Side Diff: components/omnibox/browser/url_index_private_data.cc

Issue 2690303012: Cleaning up url_index_private_data and in_memory_url_index_types. (Closed)

Patch Set: Review, round 3. Created 3 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "components/omnibox/browser/url_index_private_data.h"	5 #include "components/omnibox/browser/url_index_private_data.h"

6	6

7 #include <stdint.h>	7 #include <stdint.h>

8	8

9 #include <functional>	9 #include <functional>

10 #include <iterator>	10 #include <iterator>

(...skipping 19 matching lines...) Expand all Loading...
30 #include "components/omnibox/browser/in_memory_url_index.h"	30 #include "components/omnibox/browser/in_memory_url_index.h"

31 #include "components/search_engines/template_url_service.h"	31 #include "components/search_engines/template_url_service.h"

32 #include "components/url_formatter/url_formatter.h"	32 #include "components/url_formatter/url_formatter.h"

33	33

34 #if defined(USE_SYSTEM_PROTOBUF)	34 #if defined(USE_SYSTEM_PROTOBUF)

35 #include <google/protobuf/repeated_field.h>	35 #include <google/protobuf/repeated_field.h>

36 #else	36 #else

37 #include "third_party/protobuf/src/google/protobuf/repeated_field.h"	37 #include "third_party/protobuf/src/google/protobuf/repeated_field.h"

38 #endif	38 #endif

39	39

	40 namespace {

	41

40 using google::protobuf::RepeatedField;	42 using google::protobuf::RepeatedField;

41 using google::protobuf::RepeatedPtrField;	43 using google::protobuf::RepeatedPtrField;

42 using in_memory_url_index::InMemoryURLIndexCacheItem;	44 using in_memory_url_index::InMemoryURLIndexCacheItem;

43	45

44 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordListItem	46 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordListItem

45 WordListItem;	47 WordListItem;

46 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordMapItem_WordMapEntry	48 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordMapItem_WordMapEntry

47 WordMapEntry;	49 WordMapEntry;

48 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordMapItem WordMapItem;	50 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordMapItem WordMapItem;

49 typedef in_memory_url_index::InMemoryURLIndexCacheItem_CharWordMapItem	51 typedef in_memory_url_index::InMemoryURLIndexCacheItem_CharWordMapItem

(...skipping 12 matching lines...) Expand all Loading...
62 HistoryInfoMapEntry;	64 HistoryInfoMapEntry;

63 typedef in_memory_url_index::	65 typedef in_memory_url_index::

64 InMemoryURLIndexCacheItem_HistoryInfoMapItem_HistoryInfoMapEntry_VisitInfo	66 InMemoryURLIndexCacheItem_HistoryInfoMapItem_HistoryInfoMapEntry_VisitInfo

65 HistoryInfoMapEntry_VisitInfo;	67 HistoryInfoMapEntry_VisitInfo;

66 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordStartsMapItem	68 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordStartsMapItem

67 WordStartsMapItem;	69 WordStartsMapItem;

68 typedef in_memory_url_index::	70 typedef in_memory_url_index::

69 InMemoryURLIndexCacheItem_WordStartsMapItem_WordStartsMapEntry	71 InMemoryURLIndexCacheItem_WordStartsMapItem_WordStartsMapEntry

70 WordStartsMapEntry;	72 WordStartsMapEntry;

71	73

	74 // ContainerCast ---------------------------------------------------------------

	75 // A helper to Intersect algorithm that takes advantage of copy/move constructor

	76 // if the original and resulting sets type are the same.

	77

	78 // ContainerTo has a constructor from ContainerFrom.

	79 template <typename ContainerTo, typename ContainerFrom>

	80 ContainerTo ContainerCastImpl(ContainerFrom&& cont, std::true_type) {

	81 return ContainerTo(std::forward<ContainerFrom>(cont));

	82 }

	83

	84 // ContainerTo does not have a constructor from ContainerFrom.

	85 template <typename ContainerTo, typename ContainerFrom>

	86 ContainerTo ContainerCastImpl(const ContainerFrom& cont, std::false_type) {

	87 return ContainerTo(cont.begin(), cont.end());

	88 }

	89

	90 // Type traits to disable perfect forwarding.

	91 template <typename T>

	92 using rvalue_t = typename std::add_rvalue_reference<

	93 typename std::remove_reference<T>::type>::type;

	94

	95 // ContainerTo does not have a constructor from ContainerFrom.

	96 // Note: we have to use remove_reference to disable perfect forwarding.

	97 template <typename ContainerTo, typename ContainerFrom>

	98 ContainerTo ContainerCastImpl(rvalue_t<ContainerFrom> cont, std::false_type) {

	99 return ContainerTo(std::make_move_iterator(cont.begin()),

	100 std::make_move_iterator(cont.end()));

	101 }

	102

	103 template <typename ContainerTo, typename ContainerFrom>

	104 ContainerTo ContinerCast(ContainerFrom&& cont) {
	Alexander Yashkin 2017/02/21 07:07:15 ContainerCast? ContainerCast? dyaroshev 2017/02/26 01:12:17 Not applicable. Show quoted text On 2017/02/21 07:07:15, Alexander Yashkin wrote: > ContainerCast? Not applicable.
	105 return ContainerCastImpl<ContainerTo>(

	106 std::forward<ContainerFrom>(cont),

	107 std::is_convertible<ContainerTo, ContainerFrom>{});

	108 }

	109

72 // Algorithm Functions ---------------------------------------------------------	110 // Algorithm Functions ---------------------------------------------------------

73	111

74 // Comparison function for sorting search terms by descending length.	112 // Comparison function for sorting search terms by descending length.

75 bool LengthGreater(const base::string16& string_a,	113 bool LengthGreater(const base::string16& string_a,

76 const base::string16& string_b) {	114 const base::string16& string_b) {

77 return string_a.length() > string_b.length();	115 return string_a.length() > string_b.length();

78 }	116 }

79	117

	118 // Creates a new set by intersecting all the sets in range defined by \|fist\|,
	Alexander Yashkin 2017/02/21 07:07:15 Can you make this comment more human readable? Wha Can you make this comment more human readable? Whats the customization point? I am not sure every mere mortal knows whats it. Until I saw how this function is used I could not figure what it was doing. Maybe example would be useful? I would prefer this function to be inlined in both places where its used instead of having all the ContainerCast magic. Peter, Mark? dyaroshev 2017/02/22 22:57:25 Another good argument for not inlining this -> thi Show quoted text On 2017/02/21 07:07:15, Alexander Yashkin wrote: > Can you make this comment more human readable? > Whats the customization point? I am not sure every mere mortal knows whats it. > > Until I saw how this function is used I could not figure what it was doing. > Maybe example would be useful? > > I would prefer this function to be inlined in both places where its used instead > of having > all the ContainerCast magic. > > Peter, Mark? Another good argument for not inlining this -> this seems to be a bad way to implement multiple sets intersect (still better than the previous one) - it calls an allocator multiple times an does many unnecessary copies. A good way could grow out of smth like this: https://godbolt.org/g/V67d6R - it's definitely not inlinable. And if we put a method in an independent location, we could easily improve it later and affect all the places of use. dyaroshev 2017/02/23 00:45:54 Significantly revised version: https://godbolt.org Show quoted text On 2017/02/22 22:57:25, dyaroshev wrote: > On 2017/02/21 07:07:15, Alexander Yashkin wrote: > > Can you make this comment more human readable? > > Whats the customization point? I am not sure every mere mortal knows whats it. > > > > Until I saw how this function is used I could not figure what it was doing. > > Maybe example would be useful? > > > > I would prefer this function to be inlined in both places where its used > instead > > of having > > all the ContainerCast magic. > > > > Peter, Mark? > > Another good argument for not inlining this -> this seems to be a bad way to > implement multiple sets intersect (still better than the previous one) - it > calls an allocator multiple times an does many unnecessary copies. > > A good way could grow out of smth like this: https://godbolt.org/g/V67d6R - it's > definitely not inlinable. And if we put a method in an independent location, we > could easily improve it later and affect all the places of use. Significantly revised version: https://godbolt.org/g/vgCVmr
	119 // \|last\|. \|get_set\| is a customization point for getting the set from the

	120 // element of the range.

	121 template <typename ResultingSet, typename Iter, typename Projection>

	122 ResultingSet IntersectSetsWithProjection(Iter first,

	123 Iter last,

	124 Projection get_set) {

	125 if (first == last)

	126 return ResultingSet();

	127

	128 auto res = ContinerCast<ResultingSet>(get_set(*first));
	Peter Kasting 2017/02/25 04:54:42 It seems like the main reason you need ContainerCa It seems like the main reason you need ContainerCast is because one of the callers is trying to supply a vector type for ResultingSet. This sort of makes the type names and comments of this function a lie. They shouldn't explicitly say "set" if this is really just about working on arbitrary containers. But the other fix would be to intersect to a set, and then pull a vector out of that at the end to return from the caller that wants to do so. This would allow removing all the ContainerCast stuff, which still seems like a desirable goal to me in terms of lowering boilerplate and improving readability. It's not instantly obvious how it would affect efficiency, but it seems like it would lower it slightly (but maybe not enough to matter). dyaroshev 2017/02/26 01:12:16 Since this is the cleaning up patch and I don't se Show quoted text On 2017/02/25 04:54:42, Peter Kasting wrote: > It seems like the main reason you need ContainerCast is because one of the > callers is trying to supply a vector type for ResultingSet. > > This sort of makes the type names and comments of this function a lie. They > shouldn't explicitly say "set" if this is really just about working on arbitrary > containers. > > But the other fix would be to intersect to a set, and then pull a vector out of > that at the end to return from the caller that wants to do so. This would allow > removing all the ContainerCast stuff, which still seems like a desirable goal to > me in terms of lowering boilerplate and improving readability. It's not > instantly obvious how it would affect efficiency, but it seems like it would > lower it slightly (but maybe not enough to matter). Since this is the cleaning up patch and I don't see a 100% satisfying solution - I've reverted the intersect patch, created a task for writing a generic union/intersect functions and added a todo. Is this ok? Peter Kasting 2017/02/28 00:33:54 It's fine. Show quoted text On 2017/02/26 01:12:16, dyaroshev wrote: > On 2017/02/25 04:54:42, Peter Kasting wrote: > > It seems like the main reason you need ContainerCast is because one of the > > callers is trying to supply a vector type for ResultingSet. > > > > This sort of makes the type names and comments of this function a lie. They > > shouldn't explicitly say "set" if this is really just about working on > arbitrary > > containers. > > > > But the other fix would be to intersect to a set, and then pull a vector out > of > > that at the end to return from the caller that wants to do so. This would > allow > > removing all the ContainerCast stuff, which still seems like a desirable goal > to > > me in terms of lowering boilerplate and improving readability. It's not > > instantly obvious how it would affect efficiency, but it seems like it would > > lower it slightly (but maybe not enough to matter). > > Since this is the cleaning up patch and I don't see a 100% satisfying solution - > I've reverted the intersect patch, created a task for writing a generic > union/intersect functions and added a todo. Is this ok? It's fine.
	129

	130 // Like std::accumulate but short circuits if set is empty.

	131 for (++first; first != last && !res.empty(); ++first)

	132 res = base::STLSetIntersection<ResultingSet>(res, get_set(*first));

	133

	134 return res;

	135 }

	136

	137 } // namespace

80	138

81 // UpdateRecentVisitsFromHistoryDBTask -----------------------------------------	139 // UpdateRecentVisitsFromHistoryDBTask -----------------------------------------

82	140

83 // HistoryDBTask used to update the recent visit data for a particular	141 // HistoryDBTask used to update the recent visit data for a particular

84 // row from the history database.	142 // row from the history database.

85 class UpdateRecentVisitsFromHistoryDBTask : public history::HistoryDBTask {	143 class UpdateRecentVisitsFromHistoryDBTask : public history::HistoryDBTask {

86 public:	144 public:

87 explicit UpdateRecentVisitsFromHistoryDBTask(	145 explicit UpdateRecentVisitsFromHistoryDBTask(

88 URLIndexPrivateData* private_data,	146 URLIndexPrivateData* private_data,

89 history::URLID url_id);	147 history::URLID url_id);

(...skipping 97 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
187 // the index we need individual, lower-cased words, ignoring escapings. For	245 // the index we need individual, lower-cased words, ignoring escapings. For

188 // the final filtering we need whitespace separated substrings possibly	246 // the final filtering we need whitespace separated substrings possibly

189 // containing escaped characters.	247 // containing escaped characters.

190 base::string16 lower_raw_string(base::i18n::ToLower(search_string));	248 base::string16 lower_raw_string(base::i18n::ToLower(search_string));

191 base::string16 lower_unescaped_string = net::UnescapeURLComponent(	249 base::string16 lower_unescaped_string = net::UnescapeURLComponent(

192 lower_raw_string,	250 lower_raw_string,

193 net::UnescapeRule::SPACES \| net::UnescapeRule::PATH_SEPARATORS \|	251 net::UnescapeRule::SPACES \| net::UnescapeRule::PATH_SEPARATORS \|

194 net::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS);	252 net::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS);

195	253

196 // Extract individual 'words' (as opposed to 'terms'; see comment in	254 // Extract individual 'words' (as opposed to 'terms'; see comment in

197 // HistoryIdSetToScoredMatches()) from the search string. When the user	255 // HistoryIdsToScoredMatches()) from the search string. When the user

198 // types "colspec=ID%20Mstone Release" we get four 'words': "colspec", "id",	256 // types "colspec=ID%20Mstone Release" we get four 'words': "colspec", "id",

199 // "mstone" and "release".	257 // "mstone" and "release".

200 String16Vector lower_words(	258 String16Vector lower_words(

201 String16VectorFromString16(lower_unescaped_string, false, nullptr));	259 String16VectorFromString16(lower_unescaped_string, false, nullptr));

202 if (lower_words.empty())	260 if (lower_words.empty())

203 continue;	261 continue;

204 HistoryIDSet history_id_set = HistoryIDSetFromWords(lower_words);	262

205 pre_filter_item_count_ += history_id_set.size();	263 HistoryIDVector history_ids = HistoryIDsFromWords(lower_words);

206 // Trim the candidate pool if it is large. Note that we do not filter out	264

207 // items that do not contain the search terms as proper substrings --	265 pre_filter_item_count_ += history_ids.size();

208 // doing so is the performance-costly operation we are trying to avoid in	266 TrimHistoryIdsPool(&history_ids);

209 // order to maintain omnibox responsiveness.	267 post_filter_item_count_ += history_ids.size();

210 const size_t kItemsToScoreLimit = 500;	268

211 if (history_id_set.size() > kItemsToScoreLimit) {

212 HistoryIDVector history_ids;

213 std::copy(history_id_set.begin(), history_id_set.end(),

214 std::back_inserter(history_ids));

215 // Trim down the set by sorting by typed-count, visit-count, and last

216 // visit.

217 HistoryItemFactorGreater item_factor_functor(history_info_map_);

218 std::partial_sort(history_ids.begin(),

219 history_ids.begin() + kItemsToScoreLimit,

220 history_ids.end(), item_factor_functor);

221 history_id_set.clear();

222 std::copy(history_ids.begin(), history_ids.begin() + kItemsToScoreLimit,

223 std::inserter(history_id_set, history_id_set.end()));

224 post_filter_item_count_ += history_id_set.size();

225 } else {

226 post_filter_item_count_ += pre_filter_item_count_;

227 }

228 ScoredHistoryMatches temp_scored_items;	269 ScoredHistoryMatches temp_scored_items;

229 HistoryIdSetToScoredMatches(history_id_set, lower_raw_string,	270 HistoryIdsToScoredMatches(std::move(history_ids), lower_raw_string,

230 template_url_service, bookmark_model,	271 template_url_service, bookmark_model,

231 &temp_scored_items);	272 &temp_scored_items);

232 scored_items.insert(scored_items.end(), temp_scored_items.begin(),	273 scored_items.insert(scored_items.end(), temp_scored_items.begin(),

233 temp_scored_items.end());	274 temp_scored_items.end());

234 }	275 }

235 // Select and sort only the top \|max_matches\| results.	276 // Select and sort only the top \|max_matches\| results.

236 if (scored_items.size() > max_matches) {	277 if (scored_items.size() > max_matches) {

237 std::partial_sort(scored_items.begin(), scored_items.begin() + max_matches,	278 std::partial_sort(scored_items.begin(), scored_items.begin() + max_matches,

238 scored_items.end(),	279 scored_items.end(),

239 ScoredHistoryMatch::MatchScoreGreater);	280 ScoredHistoryMatch::MatchScoreGreater);

240 scored_items.resize(max_matches);	281 scored_items.resize(max_matches);

241 } else {	282 } else {

(...skipping 235 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
477 // post_scoring_item_count_	518 // post_scoring_item_count_

478 }	519 }

479	520

480 bool URLIndexPrivateData::Empty() const {	521 bool URLIndexPrivateData::Empty() const {

481 return history_info_map_.empty();	522 return history_info_map_.empty();

482 }	523 }

483	524

484 void URLIndexPrivateData::Clear() {	525 void URLIndexPrivateData::Clear() {

485 last_time_rebuilt_from_history_ = base::Time();	526 last_time_rebuilt_from_history_ = base::Time();

486 word_list_.clear();	527 word_list_.clear();

487 available_words_.clear();	528 available_words_ = {};

488 word_map_.clear();	529 word_map_.clear();

489 char_word_map_.clear();	530 char_word_map_.clear();

490 word_id_history_map_.clear();	531 word_id_history_map_.clear();

491 history_id_word_map_.clear();	532 history_id_word_map_.clear();

492 history_info_map_.clear();	533 history_info_map_.clear();

493 word_starts_map_.clear();	534 word_starts_map_.clear();

494 }	535 }

495	536

496 URLIndexPrivateData::~URLIndexPrivateData() {}	537 URLIndexPrivateData::~URLIndexPrivateData() {}

497	538

498 HistoryIDSet URLIndexPrivateData::HistoryIDSetFromWords(	539 HistoryIDVector URLIndexPrivateData::HistoryIDsFromWords(

499 const String16Vector& unsorted_words) {	540 const String16Vector& unsorted_words) {

	541 // The name of the histogram wasn't changed to HistoryIdsFromWords to preserve

	542 // old data valid.
	Peter Kasting 2017/02/25 04:54:41 Nit: Sounds like we're leaving the old name. If s Nit: Sounds like we're leaving the old name. If so I'd just say: "This histogram name reflects the historic name of this function." dyaroshev 2017/02/26 01:12:16 Done. Show quoted text On 2017/02/25 04:54:41, Peter Kasting wrote: > Nit: Sounds like we're leaving the old name. If so I'd just say: "This > histogram name reflects the historic name of this function." Done.
500 SCOPED_UMA_HISTOGRAM_TIMER("Omnibox.HistoryQuickHistoryIDSetFromWords");	543 SCOPED_UMA_HISTOGRAM_TIMER("Omnibox.HistoryQuickHistoryIDSetFromWords");

501 // Break the terms down into individual terms (words), get the candidate	544 // Break the terms down into individual terms (words), get the candidate

502 // set for each term, and intersect each to get a final candidate list.	545 // set for each term, and intersect each to get a final candidate list.

503 // Note that a single 'term' from the user's perspective might be	546 // Note that a single 'term' from the user's perspective might be

504 // a string like "http://www.somewebsite.com" which, from our perspective,	547 // a string like "http://www.somewebsite.com" which, from our perspective,

505 // is four words: 'http', 'www', 'somewebsite', and 'com'.	548 // is four words: 'http', 'www', 'somewebsite', and 'com'.

506 HistoryIDSet history_id_set;

507 String16Vector words(unsorted_words);	549 String16Vector words(unsorted_words);

508 // Sort the words into the longest first as such are likely to narrow down	550 // Sort the words into the longest first as such are likely to narrow down

509 // the results quicker. Also, single character words are the most expensive	551 // the results quicker. Also, single character words are the most expensive

510 // to process so save them for last.	552 // to process so save them for last.

511 std::sort(words.begin(), words.end(), LengthGreater);	553 std::sort(words.begin(), words.end(), LengthGreater);

512 for (String16Vector::iterator iter = words.begin(); iter != words.end();	554

513 ++iter) {	555 return IntersectSetsWithProjection<HistoryIDVector>(

514 base::string16 uni_word = *iter;	556 words.begin(), words.end(),

515 HistoryIDSet term_history_set = HistoryIDsForTerm(uni_word);	557 [this](const base::string16& word) { return HistoryIDsForTerm(word); });

516 if (term_history_set.empty()) {	558 }

517 history_id_set.clear();	559

518 break;	560 void URLIndexPrivateData::TrimHistoryIdsPool(

519 }	561 HistoryIDVector* history_ids) const {

520 if (iter == words.begin()) {	562 constexpr size_t kItemsToScoreLimit = 500;

521 history_id_set.swap(term_history_set);	563 if (history_ids->size() < kItemsToScoreLimit)

522 } else {	564 return;

523 HistoryIDSet new_history_id_set = base::STLSetIntersection<HistoryIDSet>(	565

524 history_id_set, term_history_set);	566 // Trim down the set by sorting by typed-count, visit-count, and last

525 history_id_set.swap(new_history_id_set);	567 // visit.

526 }	568 auto new_end = history_ids->begin() + kItemsToScoreLimit;

527 }	569 HistoryItemFactorGreater item_factor_functor(history_info_map_);

528 return history_id_set;	570

	571 std::nth_element(history_ids->begin(), new_end, history_ids->end(),

	572 item_factor_functor);

	573 history_ids->erase(new_end, history_ids->end());

529 }	574 }

530	575

531 HistoryIDSet URLIndexPrivateData::HistoryIDsForTerm(	576 HistoryIDSet URLIndexPrivateData::HistoryIDsForTerm(

532 const base::string16& term) {	577 const base::string16& term) {

533 if (term.empty())	578 if (term.empty())

534 return HistoryIDSet();	579 return HistoryIDSet();

535	580

536 // TODO(mrossetti): Consider optimizing for very common terms such as	581 // TODO(mrossetti): Consider optimizing for very common terms such as

537 // 'http[s]', 'www', 'com', etc. Or collect the top 100 more frequently	582 // 'http[s]', 'www', 'com', etc. Or collect the top 100 more frequently

538 // occuring words in the user's searches.	583 // occuring words in the user's searches.

(...skipping 46 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
585	630

586 // Reduce the word set with any leftover, unprocessed characters.	631 // Reduce the word set with any leftover, unprocessed characters.

587 if (!unique_chars.empty()) {	632 if (!unique_chars.empty()) {

588 WordIDSet leftover_set(WordIDSetForTermChars(unique_chars));	633 WordIDSet leftover_set(WordIDSetForTermChars(unique_chars));

589 // We might come up empty on the leftovers.	634 // We might come up empty on the leftovers.

590 if (leftover_set.empty()) {	635 if (leftover_set.empty()) {

591 search_term_cache_[term] = SearchTermCacheItem();	636 search_term_cache_[term] = SearchTermCacheItem();

592 return HistoryIDSet();	637 return HistoryIDSet();

593 }	638 }

594 // Or there may not have been a prefix from which to start.	639 // Or there may not have been a prefix from which to start.

595 if (prefix_chars.empty()) {	640 word_id_set = prefix_chars.empty() ? std::move(leftover_set)

596 word_id_set.swap(leftover_set);	641 : base::STLSetIntersection<WordIDSet>(

597 } else {	642 word_id_set, leftover_set);

598 WordIDSet new_word_id_set = base::STLSetIntersection<WordIDSet>(

599 word_id_set, leftover_set);

600 word_id_set.swap(new_word_id_set);

601 }

602 }	643 }

603	644

604 // We must filter the word list because the resulting word set surely	645 // We must filter the word list because the resulting word set surely

605 // contains words which do not have the search term as a proper subset.	646 // contains words which do not have the search term as a proper subset.

606 for (WordIDSet::iterator word_set_iter = word_id_set.begin();	647 for (WordIDSet::iterator word_set_iter = word_id_set.begin();

607 word_set_iter != word_id_set.end(); ) {	648 word_set_iter != word_id_set.end(); ) {

608 if (word_list_[*word_set_iter].find(term) == base::string16::npos)	649 if (word_list_[*word_set_iter].find(term) == base::string16::npos)

609 word_set_iter = word_id_set.erase(word_set_iter);	650 word_set_iter = word_id_set.erase(word_set_iter);

610 else	651 else

611 ++word_set_iter;	652 ++word_set_iter;

612 }	653 }

613 } else {	654 } else {

614 word_id_set = WordIDSetForTermChars(Char16SetFromString16(term));	655 word_id_set = WordIDSetForTermChars(Char16SetFromString16(term));

615 }	656 }

616	657

617 // If any words resulted then we can compose a set of history IDs by unioning	658 // If any words resulted then we can compose a set of history IDs by unioning

618 // the sets from each word.	659 // the sets from each word.

619 HistoryIDSet history_id_set;	660 HistoryIDSet history_id_set;

620 if (!word_id_set.empty()) {	661 for (WordID word_id : word_id_set) {

621 for (WordIDSet::iterator word_id_iter = word_id_set.begin();	662 WordIDHistoryMap::iterator word_iter = word_id_history_map_.find(word_id);

622 word_id_iter != word_id_set.end(); ++word_id_iter) {	663 if (word_iter != word_id_history_map_.end()) {

623 WordID word_id = *word_id_iter;	664 HistoryIDSet& word_history_id_set(word_iter->second);

624 WordIDHistoryMap::iterator word_iter = word_id_history_map_.find(word_id);	665 history_id_set.insert(word_history_id_set.begin(),

625 if (word_iter != word_id_history_map_.end()) {	666 word_history_id_set.end());

626 HistoryIDSet& word_history_id_set(word_iter->second);

627 history_id_set.insert(word_history_id_set.begin(),

628 word_history_id_set.end());

629 }

630 }	667 }

631 }	668 }

632	669

633 // Record a new cache entry for this word if the term is longer than	670 // Record a new cache entry for this word if the term is longer than

634 // a single character.	671 // a single character.

635 if (term_length > 1)	672 if (term_length > 1)

636 search_term_cache_[term] = SearchTermCacheItem(word_id_set, history_id_set);	673 search_term_cache_[term] = SearchTermCacheItem(word_id_set, history_id_set);

637	674

638 return history_id_set;	675 return history_id_set;

639 }	676 }

640	677

641 WordIDSet URLIndexPrivateData::WordIDSetForTermChars(	678 WordIDSet URLIndexPrivateData::WordIDSetForTermChars(

642 const Char16Set& term_chars) {	679 const Char16Set& term_chars) {

643 WordIDSet word_id_set;	680 WordIDSet empty_set;

644 for (Char16Set::const_iterator c_iter = term_chars.begin();	681 auto GetSet = [&](base::char16 c) -> const WordIDSet& {
	Peter Kasting 2017/02/25 04:54:42 Nit: I would explicitly capture [this, &empty_set] Nit: I would explicitly capture [this, &empty_set]. That said; can you return the set by value and not const ref, and avoid declaring \|empty_set\| at all (just return WordIDSet() in the lambda)? dyaroshev 2017/02/26 01:12:17 Not applicable. Returning by value would add an e Show quoted text On 2017/02/25 04:54:42, Peter Kasting wrote: > Nit: I would explicitly capture [this, &empty_set]. > > That said; can you return the set by value and not const ref, and avoid > declaring \|empty_set\| at all (just return WordIDSet() in the lambda)? Not applicable. Returning by value would add an extra copy which can be expensive. Peter Kasting 2017/02/26 01:35:18 True. But is it expensive enough to matter? One Show quoted text On 2017/02/26 01:12:17, dyaroshev wrote: > On 2017/02/25 04:54:42, Peter Kasting wrote: > > Nit: I would explicitly capture [this, &empty_set]. > > > > That said; can you return the set by value and not const ref, and avoid > > declaring \|empty_set\| at all (just return WordIDSet() in the lambda)? > > Not applicable. > > Returning by value would add an extra copy which can be expensive. True. But is it expensive enough to matter? One of the themes of my review comments should be taken to be "use simpler and more readable forms when the performance difference has little practical effect". I honestly don't know the answer here, that's just my general guidance for the future. dyaroshev 2017/02/26 01:39:16 Well, for std::set I've seen cases where it does. Show quoted text On 2017/02/26 01:35:18, Peter Kasting wrote: > On 2017/02/26 01:12:17, dyaroshev wrote: > > On 2017/02/25 04:54:42, Peter Kasting wrote: > > > Nit: I would explicitly capture [this, &empty_set]. > > > > > > That said; can you return the set by value and not const ref, and avoid > > > declaring \|empty_set\| at all (just return WordIDSet() in the lambda)? > > > > Not applicable. > > > > Returning by value would add an extra copy which can be expensive. > > True. But is it expensive enough to matter? > > One of the themes of my review comments should be taken to be "use simpler and > more readable forms when the performance difference has little practical > effect". I honestly don't know the answer here, that's just my general guidance > for the future. Well, for std::set I've seen cases where it does. I agree with the general guidance.
645 c_iter != term_chars.end(); ++c_iter) {	682 auto char_iter = char_word_map_.find(c);

646 CharWordIDMap::iterator char_iter = char_word_map_.find(*c_iter);	683 if (char_iter == char_word_map_.end())

647 if (char_iter == char_word_map_.end()) {	684 return empty_set;

648 // A character was not found so there are no matching results: bail.	685 return char_iter->second;
	Peter Kasting 2017/02/25 04:54:41 Nit: Simpler: return char_iter == char_word_m Nit: Simpler: return char_iter == char_word_map_.end() ? empty_set : char_iter->second; dyaroshev 2017/02/26 01:12:16 Not applicable. Show quoted text On 2017/02/25 04:54:41, Peter Kasting wrote: > Nit: Simpler: > > return char_iter == char_word_map_.end() ? empty_set : char_iter->second; Not applicable.
649 word_id_set.clear();	686 };

650 break;	687 return IntersectSetsWithProjection<WordIDSet>(term_chars.begin(),

651 }	688 term_chars.end(), GetSet);

652 WordIDSet& char_word_id_set(char_iter->second);

653 // It is possible for there to no longer be any words associated with

654 // a particular character. Give up in that case.

655 if (char_word_id_set.empty()) {

656 word_id_set.clear();

657 break;

658 }

659

660 if (c_iter == term_chars.begin()) {

661 // First character results becomes base set of results.

662 word_id_set = char_word_id_set;

663 } else {

664 // Subsequent character results get intersected in.

665 WordIDSet new_word_id_set = base::STLSetIntersection<WordIDSet>(

666 word_id_set, char_word_id_set);

667 word_id_set.swap(new_word_id_set);

668 }

669 }

670 return word_id_set;

671 }	689 }

672	690

673 void URLIndexPrivateData::HistoryIdSetToScoredMatches(	691 void URLIndexPrivateData::HistoryIdsToScoredMatches(

674 HistoryIDSet history_id_set,	692 HistoryIDVector history_ids,

675 const base::string16& lower_raw_string,	693 const base::string16& lower_raw_string,

676 const TemplateURLService* template_url_service,	694 const TemplateURLService* template_url_service,

677 bookmarks::BookmarkModel* bookmark_model,	695 bookmarks::BookmarkModel* bookmark_model,

678 ScoredHistoryMatches* scored_items) const {	696 ScoredHistoryMatches* scored_items) const {

679 if (history_id_set.empty())	697 if (history_ids.empty())

680 return;	698 return;

681	699

682 // Break up the raw search string (complete with escaped URL elements) into	700 // Break up the raw search string (complete with escaped URL elements) into

683 // 'terms' (as opposed to 'words'; see comment in HistoryItemsForTerms()).	701 // 'terms' (as opposed to 'words'; see comment in HistoryItemsForTerms()).

684 // We only want to break up the search string on 'true' whitespace rather than	702 // We only want to break up the search string on 'true' whitespace rather than

685 // escaped whitespace. For example, when the user types	703 // escaped whitespace. For example, when the user types

686 // "colspec=ID%20Mstone Release" we get two 'terms': "colspec=id%20mstone" and	704 // "colspec=ID%20Mstone Release" we get two 'terms': "colspec=id%20mstone" and

687 // "release".	705 // "release".

688 String16Vector lower_raw_terms =	706 String16Vector lower_raw_terms =

689 base::SplitString(lower_raw_string, base::kWhitespaceUTF16,	707 base::SplitString(lower_raw_string, base::kWhitespaceUTF16,

690 base::KEEP_WHITESPACE, base::SPLIT_WANT_NONEMPTY);	708 base::KEEP_WHITESPACE, base::SPLIT_WANT_NONEMPTY);

691	709

692 // Don't score matches when there are no terms to score against. (It's	710 // Don't score matches when there are no terms to score against. (It's

693 // possible that the word break iterater that extracts words to search for in	711 // possible that the word break iterater that extracts words to search for in

694 // the database allows some whitespace "words" whereas SplitString excludes a	712 // the database allows some whitespace "words" whereas SplitString excludes a

695 // long list of whitespace.) One could write a scoring function that gives a	713 // long list of whitespace.) One could write a scoring function that gives a

696 // reasonable order to matches when there are no terms (i.e., all the words	714 // reasonable order to matches when there are no terms (i.e., all the words

697 // are some form of whitespace), but this is such a rare edge case that it's	715 // are some form of whitespace), but this is such a rare edge case that it's

698 // not worth the time.	716 // not worth the time.

699 if (lower_raw_terms.empty())	717 if (lower_raw_terms.empty())

700 return;	718 return;

701	719

702 WordStarts lower_terms_to_word_starts_offsets;	720 WordStarts lower_terms_to_word_starts_offsets;

703 CalculateWordStartsOffsets(lower_raw_terms,	721 CalculateWordStartsOffsets(lower_raw_terms,

704 &lower_terms_to_word_starts_offsets);	722 &lower_terms_to_word_starts_offsets);

705	723

706 // Filter bad matches and other matches we don't want to display.	724 // Filter bad matches and other matches we don't want to display.

707 for (auto it = history_id_set.begin();;) {	725 {
	Peter Kasting 2017/02/25 04:54:42 Nit: {} not necessary Nit: {} not necessary dyaroshev 2017/02/26 01:12:16 Done. Show quoted text On 2017/02/25 04:54:42, Peter Kasting wrote: > Nit: {} not necessary Done.
708 it = std::find_if(it, history_id_set.end(),	726 auto filter = [this, template_url_service](const HistoryID history_id) {

709 [this, template_url_service](const HistoryID history_id) {	727 return ShouldFilter(history_id, template_url_service);

710 return ShouldFilter(history_id, template_url_service);	728 };

711 });	729 history_ids.erase(

712 if (it == history_id_set.end())	730 std::remove_if(history_ids.begin(), history_ids.end(), filter),

713 break;	731 history_ids.end());

714 it = history_id_set.erase(it);

715 }	732 }

716	733

717 // Score the matches.	734 // Score the matches.

718 const size_t num_matches = history_id_set.size();	735 const size_t num_matches = history_ids.size();

719 const base::Time now = base::Time::Now();	736 const base::Time now = base::Time::Now();

720 std::transform(	737 std::transform(

721 history_id_set.begin(), history_id_set.end(),	738 history_ids.begin(), history_ids.end(), std::back_inserter(*scored_items),

722 std::back_inserter(*scored_items), [&](const HistoryID history_id) {	739 [&](const HistoryID history_id) {

723 auto hist_pos = history_info_map_.find(history_id);	740 auto hist_pos = history_info_map_.find(history_id);

724 const history::URLRow& hist_item = hist_pos->second.url_row;	741 const history::URLRow& hist_item = hist_pos->second.url_row;

725 auto starts_pos = word_starts_map_.find(history_id);	742 auto starts_pos = word_starts_map_.find(history_id);

726 DCHECK(starts_pos != word_starts_map_.end());	743 DCHECK(starts_pos != word_starts_map_.end());

727 return ScoredHistoryMatch(	744 return ScoredHistoryMatch(

728 hist_item, hist_pos->second.visits, lower_raw_string,	745 hist_item, hist_pos->second.visits, lower_raw_string,

729 lower_raw_terms, lower_terms_to_word_starts_offsets,	746 lower_raw_terms, lower_terms_to_word_starts_offsets,

730 starts_pos->second,	747 starts_pos->second,

731 bookmark_model && bookmark_model->IsBookmarked(hist_item.url()),	748 bookmark_model && bookmark_model->IsBookmarked(hist_item.url()),

732 num_matches, now);	749 num_matches, now);

(...skipping 90 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
823 HistoryID history_id = static_cast<HistoryID>(row.id());	840 HistoryID history_id = static_cast<HistoryID>(row.id());

824 // Split URL into individual, unique words then add in the title words.	841 // Split URL into individual, unique words then add in the title words.

825 const GURL& gurl(row.url());	842 const GURL& gurl(row.url());

826 const base::string16& url =	843 const base::string16& url =

827 bookmarks::CleanUpUrlForMatching(gurl, nullptr);	844 bookmarks::CleanUpUrlForMatching(gurl, nullptr);

828 String16Set url_words = String16SetFromString16(url,	845 String16Set url_words = String16SetFromString16(url,

829 word_starts ? &word_starts->url_word_starts_ : nullptr);	846 word_starts ? &word_starts->url_word_starts_ : nullptr);

830 const base::string16& title = bookmarks::CleanUpTitleForMatching(row.title());	847 const base::string16& title = bookmarks::CleanUpTitleForMatching(row.title());

831 String16Set title_words = String16SetFromString16(title,	848 String16Set title_words = String16SetFromString16(title,

832 word_starts ? &word_starts->title_word_starts_ : nullptr);	849 word_starts ? &word_starts->title_word_starts_ : nullptr);

833 String16Set words = base::STLSetUnion<String16Set>(url_words, title_words);	850 for (const auto& word :

834 for (String16Set::iterator word_iter = words.begin();	851 base::STLSetUnion<String16Set>(url_words, title_words))

835 word_iter != words.end(); ++word_iter)	852 AddWordToIndex(word, history_id);

836 AddWordToIndex(*word_iter, history_id);

837	853

838 search_term_cache_.clear(); // Invalidate the term cache.	854 search_term_cache_.clear(); // Invalidate the term cache.

839 }	855 }

840	856

841 void URLIndexPrivateData::AddWordToIndex(const base::string16& term,	857 void URLIndexPrivateData::AddWordToIndex(const base::string16& term,

842 HistoryID history_id) {	858 HistoryID history_id) {

843 WordMap::iterator word_pos = word_map_.find(term);	859 WordMap::iterator word_pos;

844 if (word_pos != word_map_.end())	860 bool is_new;

845 UpdateWordHistory(word_pos->second, history_id);	861 std::tie(word_pos, is_new) = word_map_.emplace(term, WordID());

846 else	862

847 AddWordHistory(term, history_id);	863 // Adding a new word (i.e. a word that is not already in the word index).

	864 if (is_new) {

	865 word_pos->second = AddNewWordToWordList(term);

	866

	867 // For each character in the newly added word add the word to the character

	868 // index.

	869 for (base::char16 uni_char : Char16SetFromString16(term))

	870 char_word_map_[uni_char].insert(word_pos->second);

	871 }

	872

	873 word_id_history_map_[word_pos->second].insert(history_id);

	874 history_id_word_map_[history_id].insert(word_pos->second);

848 }	875 }

849	876

850 void URLIndexPrivateData::AddWordHistory(const base::string16& term,	877 WordID URLIndexPrivateData::AddNewWordToWordList(const base::string16& term) {

851 HistoryID history_id) {

852 WordID word_id = word_list_.size();	878 WordID word_id = word_list_.size();

853 if (available_words_.empty()) {	879 if (available_words_.empty()) {

854 word_list_.push_back(term);	880 word_list_.push_back(term);

855 } else {	881 return word_id;

856 word_id = *(available_words_.begin());

857 word_list_[word_id] = term;

858 available_words_.erase(word_id);

859 }	882 }

860 word_map_[term] = word_id;

861	883

862 HistoryIDSet history_id_set;	884 word_id = available_words_.top();

863 history_id_set.insert(history_id);	885 available_words_.pop();

864 word_id_history_map_[word_id] = history_id_set;	886 word_list_[word_id] = term;

865 AddToHistoryIDWordMap(history_id, word_id);	887 return word_id;

866

867 // For each character in the newly added word (i.e. a word that is not

868 // already in the word index), add the word to the character index.

869 Char16Set characters = Char16SetFromString16(term);

870 for (Char16Set::iterator uni_char_iter = characters.begin();

871 uni_char_iter != characters.end(); ++uni_char_iter) {

872 base::char16 uni_char = *uni_char_iter;

873 CharWordIDMap::iterator char_iter = char_word_map_.find(uni_char);

874 if (char_iter != char_word_map_.end()) {

875 // Update existing entry in the char/word index.

876 WordIDSet& word_id_set(char_iter->second);

877 word_id_set.insert(word_id);

878 } else {

879 // Create a new entry in the char/word index.

880 WordIDSet word_id_set;

881 word_id_set.insert(word_id);

882 char_word_map_[uni_char] = word_id_set;

883 }

884 }

885 }

886

887 void URLIndexPrivateData::UpdateWordHistory(WordID word_id,

888 HistoryID history_id) {

889 WordIDHistoryMap::iterator history_pos = word_id_history_map_.find(word_id);

890 DCHECK(history_pos != word_id_history_map_.end());

891 HistoryIDSet& history_id_set(history_pos->second);

892 history_id_set.insert(history_id);

893 AddToHistoryIDWordMap(history_id, word_id);

894 }

895

896 void URLIndexPrivateData::AddToHistoryIDWordMap(HistoryID history_id,

897 WordID word_id) {

898 HistoryIDWordMap::iterator iter = history_id_word_map_.find(history_id);

899 if (iter != history_id_word_map_.end()) {

900 WordIDSet& word_id_set(iter->second);

901 word_id_set.insert(word_id);

902 } else {

903 WordIDSet word_id_set;

904 word_id_set.insert(word_id);

905 history_id_word_map_[history_id] = word_id_set;

906 }

907 }	888 }

908	889

909 void URLIndexPrivateData::RemoveRowFromIndex(const history::URLRow& row) {	890 void URLIndexPrivateData::RemoveRowFromIndex(const history::URLRow& row) {

910 RemoveRowWordsFromIndex(row);	891 RemoveRowWordsFromIndex(row);

911 HistoryID history_id = static_cast<HistoryID>(row.id());	892 HistoryID history_id = static_cast<HistoryID>(row.id());

912 history_info_map_.erase(history_id);	893 history_info_map_.erase(history_id);

913 word_starts_map_.erase(history_id);	894 word_starts_map_.erase(history_id);

914 }	895 }

915	896

916 void URLIndexPrivateData::RemoveRowWordsFromIndex(const history::URLRow& row) {	897 void URLIndexPrivateData::RemoveRowWordsFromIndex(const history::URLRow& row) {

917 // Remove the entries in history_id_word_map_ and word_id_history_map_ for	898 // Remove the entries in history_id_word_map_ and word_id_history_map_ for

918 // this row.	899 // this row.

919 HistoryID history_id = static_cast<HistoryID>(row.id());	900 HistoryID history_id = static_cast<HistoryID>(row.id());

920 WordIDSet word_id_set = history_id_word_map_[history_id];	901 WordIDSet word_id_set = history_id_word_map_[history_id];

921 history_id_word_map_.erase(history_id);	902 history_id_word_map_.erase(history_id);

922	903

923 // Reconcile any changes to word usage.	904 // Reconcile any changes to word usage.

924 for (WordIDSet::iterator word_id_iter = word_id_set.begin();	905 for (WordID word_id : word_id_set) {

925 word_id_iter != word_id_set.end(); ++word_id_iter) {	906 auto word_id_history_map_iter = word_id_history_map_.find(word_id);

926 WordID word_id = *word_id_iter;	907 DCHECK(word_id_history_map_iter != word_id_history_map_.end());

927 word_id_history_map_[word_id].erase(history_id);	908

928 if (!word_id_history_map_[word_id].empty())	909 word_id_history_map_iter->second.erase(history_id);

929 continue; // The word is still in use.	910 if (!word_id_history_map_iter->second.empty())

	911 continue;

930	912

931 // The word is no longer in use. Reconcile any changes to character usage.	913 // The word is no longer in use. Reconcile any changes to character usage.

932 base::string16 word = word_list_[word_id];	914 base::string16 word = word_list_[word_id];

933 Char16Set characters = Char16SetFromString16(word);	915 for (base::char16 uni_char : Char16SetFromString16(word)) {

934 for (Char16Set::iterator uni_char_iter = characters.begin();	916 auto char_word_map_iter = char_word_map_.find(uni_char);

935 uni_char_iter != characters.end(); ++uni_char_iter) {	917 char_word_map_iter->second.erase(word_id);

936 base::char16 uni_char = *uni_char_iter;	918 if (char_word_map_iter->second.empty())

937 char_word_map_[uni_char].erase(word_id);	919 char_word_map_.erase(char_word_map_iter);

938 if (char_word_map_[uni_char].empty())

939 char_word_map_.erase(uni_char); // No longer in use.

940 }	920 }

941	921

942 // Complete the removal of references to the word.	922 // Complete the removal of references to the word.

943 word_id_history_map_.erase(word_id);	923 word_id_history_map_.erase(word_id_history_map_iter);

944 word_map_.erase(word);	924 word_map_.erase(word);

945 word_list_[word_id] = base::string16();	925 word_list_[word_id] = base::string16();

946 available_words_.insert(word_id);	926 available_words_.push(word_id);

947 }	927 }

948 }	928 }

949	929

950 void URLIndexPrivateData::ResetSearchTermCache() {	930 void URLIndexPrivateData::ResetSearchTermCache() {

951 for (SearchTermCacheMap::iterator iter = search_term_cache_.begin();	931 for (auto& item : search_term_cache_)

952 iter != search_term_cache_.end(); ++iter)	932 item.second.used_ = false;

953 iter->second.used_ = false;

954 }	933 }

955	934

956 bool URLIndexPrivateData::SaveToFile(const base::FilePath& file_path) {	935 bool URLIndexPrivateData::SaveToFile(const base::FilePath& file_path) {

957 base::TimeTicks beginning_time = base::TimeTicks::Now();	936 base::TimeTicks beginning_time = base::TimeTicks::Now();

958 InMemoryURLIndexCacheItem index_cache;	937 InMemoryURLIndexCacheItem index_cache;

959 SavePrivateData(&index_cache);	938 SavePrivateData(&index_cache);

960 std::string data;	939 std::string data;

961 if (!index_cache.SerializeToString(&data)) {	940 if (!index_cache.SerializeToString(&data)) {

962 LOG(WARNING) << "Failed to serialize the InMemoryURLIndex cache.";	941 LOG(WARNING) << "Failed to serialize the InMemoryURLIndex cache.";

963 return false;	942 return false;

(...skipping 24 matching lines...) Expand all Loading...
988 SaveWordIDHistoryMap(cache);	967 SaveWordIDHistoryMap(cache);

989 SaveHistoryInfoMap(cache);	968 SaveHistoryInfoMap(cache);

990 SaveWordStartsMap(cache);	969 SaveWordStartsMap(cache);

991 }	970 }

992	971

993 void URLIndexPrivateData::SaveWordList(InMemoryURLIndexCacheItem* cache) const {	972 void URLIndexPrivateData::SaveWordList(InMemoryURLIndexCacheItem* cache) const {

994 if (word_list_.empty())	973 if (word_list_.empty())

995 return;	974 return;

996 WordListItem* list_item = cache->mutable_word_list();	975 WordListItem* list_item = cache->mutable_word_list();

997 list_item->set_word_count(word_list_.size());	976 list_item->set_word_count(word_list_.size());

998 for (String16Vector::const_iterator iter = word_list_.begin();	977 for (const base::string16& word : word_list_)

999 iter != word_list_.end(); ++iter)	978 list_item->add_word(base::UTF16ToUTF8(word));

1000 list_item->add_word(base::UTF16ToUTF8(*iter));

1001 }	979 }

1002	980

1003 void URLIndexPrivateData::SaveWordMap(InMemoryURLIndexCacheItem* cache) const {	981 void URLIndexPrivateData::SaveWordMap(InMemoryURLIndexCacheItem* cache) const {

1004 if (word_map_.empty())	982 if (word_map_.empty())

1005 return;	983 return;

1006 WordMapItem* map_item = cache->mutable_word_map();	984 WordMapItem* map_item = cache->mutable_word_map();

1007 map_item->set_item_count(word_map_.size());	985 map_item->set_item_count(word_map_.size());

1008 for (WordMap::const_iterator iter = word_map_.begin();	986 for (const auto& elem : word_map_) {

1009 iter != word_map_.end(); ++iter) {

1010 WordMapEntry* map_entry = map_item->add_word_map_entry();	987 WordMapEntry* map_entry = map_item->add_word_map_entry();

1011 map_entry->set_word(base::UTF16ToUTF8(iter->first));	988 map_entry->set_word(base::UTF16ToUTF8(elem.first));

1012 map_entry->set_word_id(iter->second);	989 map_entry->set_word_id(elem.second);

1013 }	990 }

1014 }	991 }

1015	992

1016 void URLIndexPrivateData::SaveCharWordMap(	993 void URLIndexPrivateData::SaveCharWordMap(

1017 InMemoryURLIndexCacheItem* cache) const {	994 InMemoryURLIndexCacheItem* cache) const {

1018 if (char_word_map_.empty())	995 if (char_word_map_.empty())

1019 return;	996 return;

1020 CharWordMapItem* map_item = cache->mutable_char_word_map();	997 CharWordMapItem* map_item = cache->mutable_char_word_map();

1021 map_item->set_item_count(char_word_map_.size());	998 map_item->set_item_count(char_word_map_.size());

1022 for (CharWordIDMap::const_iterator iter = char_word_map_.begin();	999 for (const auto& entry : char_word_map_) {

1023 iter != char_word_map_.end(); ++iter) {

1024 CharWordMapEntry* map_entry = map_item->add_char_word_map_entry();	1000 CharWordMapEntry* map_entry = map_item->add_char_word_map_entry();

1025 map_entry->set_char_16(iter->first);	1001 map_entry->set_char_16(entry.first);

1026 const WordIDSet& word_id_set(iter->second);	1002 const WordIDSet& word_id_set(entry.second);

1027 map_entry->set_item_count(word_id_set.size());	1003 map_entry->set_item_count(word_id_set.size());

1028 for (WordIDSet::const_iterator set_iter = word_id_set.begin();	1004 for (WordID word_id : word_id_set)

1029 set_iter != word_id_set.end(); ++set_iter)	1005 map_entry->add_word_id(word_id);

1030 map_entry->add_word_id(*set_iter);

1031 }	1006 }

1032 }	1007 }

1033	1008

1034 void URLIndexPrivateData::SaveWordIDHistoryMap(	1009 void URLIndexPrivateData::SaveWordIDHistoryMap(

1035 InMemoryURLIndexCacheItem* cache) const {	1010 InMemoryURLIndexCacheItem* cache) const {

1036 if (word_id_history_map_.empty())	1011 if (word_id_history_map_.empty())

1037 return;	1012 return;

1038 WordIDHistoryMapItem* map_item = cache->mutable_word_id_history_map();	1013 WordIDHistoryMapItem* map_item = cache->mutable_word_id_history_map();

1039 map_item->set_item_count(word_id_history_map_.size());	1014 map_item->set_item_count(word_id_history_map_.size());

1040 for (WordIDHistoryMap::const_iterator iter = word_id_history_map_.begin();	1015 for (const auto& entry : word_id_history_map_) {

1041 iter != word_id_history_map_.end(); ++iter) {

1042 WordIDHistoryMapEntry* map_entry =	1016 WordIDHistoryMapEntry* map_entry =

1043 map_item->add_word_id_history_map_entry();	1017 map_item->add_word_id_history_map_entry();

1044 map_entry->set_word_id(iter->first);	1018 map_entry->set_word_id(entry.first);

1045 const HistoryIDSet& history_id_set(iter->second);	1019 const HistoryIDSet& history_id_set(entry.second);

1046 map_entry->set_item_count(history_id_set.size());	1020 map_entry->set_item_count(history_id_set.size());

1047 for (HistoryIDSet::const_iterator set_iter = history_id_set.begin();	1021 for (HistoryID history_id : history_id_set)

1048 set_iter != history_id_set.end(); ++set_iter)	1022 map_entry->add_history_id(history_id);

1049 map_entry->add_history_id(*set_iter);

1050 }	1023 }

1051 }	1024 }

1052	1025

1053 void URLIndexPrivateData::SaveHistoryInfoMap(	1026 void URLIndexPrivateData::SaveHistoryInfoMap(

1054 InMemoryURLIndexCacheItem* cache) const {	1027 InMemoryURLIndexCacheItem* cache) const {

1055 if (history_info_map_.empty())	1028 if (history_info_map_.empty())

1056 return;	1029 return;

1057 HistoryInfoMapItem* map_item = cache->mutable_history_info_map();	1030 HistoryInfoMapItem* map_item = cache->mutable_history_info_map();

1058 map_item->set_item_count(history_info_map_.size());	1031 map_item->set_item_count(history_info_map_.size());

1059 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin();	1032 for (const auto& entry : history_info_map_) {

1060 iter != history_info_map_.end(); ++iter) {

1061 HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry();	1033 HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry();

1062 map_entry->set_history_id(iter->first);	1034 map_entry->set_history_id(entry.first);

1063 const history::URLRow& url_row(iter->second.url_row);	1035 const history::URLRow& url_row(entry.second.url_row);

1064 // Note: We only save information that contributes to the index so there	1036 // Note: We only save information that contributes to the index so there

1065 // is no need to save search_term_cache_ (not persistent).	1037 // is no need to save search_term_cache_ (not persistent).

1066 map_entry->set_visit_count(url_row.visit_count());	1038 map_entry->set_visit_count(url_row.visit_count());

1067 map_entry->set_typed_count(url_row.typed_count());	1039 map_entry->set_typed_count(url_row.typed_count());

1068 map_entry->set_last_visit(url_row.last_visit().ToInternalValue());	1040 map_entry->set_last_visit(url_row.last_visit().ToInternalValue());

1069 map_entry->set_url(url_row.url().spec());	1041 map_entry->set_url(url_row.url().spec());

1070 map_entry->set_title(base::UTF16ToUTF8(url_row.title()));	1042 map_entry->set_title(base::UTF16ToUTF8(url_row.title()));

1071 const VisitInfoVector& visits(iter->second.visits);	1043 for (const auto& visit : entry.second.visits) {

1072 for (VisitInfoVector::const_iterator visit_iter = visits.begin();

1073 visit_iter != visits.end(); ++visit_iter) {

1074 HistoryInfoMapEntry_VisitInfo* visit_info = map_entry->add_visits();	1044 HistoryInfoMapEntry_VisitInfo* visit_info = map_entry->add_visits();

1075 visit_info->set_visit_time(visit_iter->first.ToInternalValue());	1045 visit_info->set_visit_time(visit.first.ToInternalValue());

1076 visit_info->set_transition_type(visit_iter->second);	1046 visit_info->set_transition_type(visit.second);

1077 }	1047 }

1078 }	1048 }

1079 }	1049 }

1080	1050

1081 void URLIndexPrivateData::SaveWordStartsMap(	1051 void URLIndexPrivateData::SaveWordStartsMap(

1082 InMemoryURLIndexCacheItem* cache) const {	1052 InMemoryURLIndexCacheItem* cache) const {

1083 if (word_starts_map_.empty())	1053 if (word_starts_map_.empty())

1084 return;	1054 return;

1085 // For unit testing: Enable saving of the cache as an earlier version to	1055 // For unit testing: Enable saving of the cache as an earlier version to

1086 // allow testing of cache file upgrading in ReadFromFile().	1056 // allow testing of cache file upgrading in ReadFromFile().

1087 // TODO(mrossetti): Instead of intruding on production code with this kind of	1057 // TODO(mrossetti): Instead of intruding on production code with this kind of

1088 // test harness, save a copy of an older version cache with known results.	1058 // test harness, save a copy of an older version cache with known results.

1089 // Implement this when switching the caching over to SQLite.	1059 // Implement this when switching the caching over to SQLite.

1090 if (saved_cache_version_ < 1)	1060 if (saved_cache_version_ < 1)

1091 return;	1061 return;

1092	1062

1093 WordStartsMapItem* map_item = cache->mutable_word_starts_map();	1063 WordStartsMapItem* map_item = cache->mutable_word_starts_map();

1094 map_item->set_item_count(word_starts_map_.size());	1064 map_item->set_item_count(word_starts_map_.size());

1095 for (WordStartsMap::const_iterator iter = word_starts_map_.begin();	1065 for (const auto& entry : word_starts_map_) {

1096 iter != word_starts_map_.end(); ++iter) {

1097 WordStartsMapEntry* map_entry = map_item->add_word_starts_map_entry();	1066 WordStartsMapEntry* map_entry = map_item->add_word_starts_map_entry();

1098 map_entry->set_history_id(iter->first);	1067 map_entry->set_history_id(entry.first);

1099 const RowWordStarts& word_starts(iter->second);	1068 const RowWordStarts& word_starts(entry.second);

1100 for (WordStarts::const_iterator i = word_starts.url_word_starts_.begin();	1069 for (auto url_word_start : word_starts.url_word_starts_)

1101 i != word_starts.url_word_starts_.end(); ++i)	1070 map_entry->add_url_word_starts(url_word_start);

1102 map_entry->add_url_word_starts(*i);	1071 for (auto title_word_start : word_starts.title_word_starts_)

1103 for (WordStarts::const_iterator i = word_starts.title_word_starts_.begin();	1072 map_entry->add_title_word_starts(title_word_start);

1104 i != word_starts.title_word_starts_.end(); ++i)

1105 map_entry->add_title_word_starts(*i);

1106 }	1073 }

1107 }	1074 }

1108	1075

1109 bool URLIndexPrivateData::RestorePrivateData(	1076 bool URLIndexPrivateData::RestorePrivateData(

1110 const InMemoryURLIndexCacheItem& cache) {	1077 const InMemoryURLIndexCacheItem& cache) {

1111 last_time_rebuilt_from_history_ =	1078 last_time_rebuilt_from_history_ =

1112 base::Time::FromInternalValue(cache.last_rebuild_timestamp());	1079 base::Time::FromInternalValue(cache.last_rebuild_timestamp());

1113 const base::TimeDelta rebuilt_ago =	1080 const base::TimeDelta rebuilt_ago =

1114 base::Time::Now() - last_time_rebuilt_from_history_;	1081 base::Time::Now() - last_time_rebuilt_from_history_;

1115 if ((rebuilt_ago > base::TimeDelta::FromDays(7)) \|\|	1082 if ((rebuilt_ago > base::TimeDelta::FromDays(7)) \|\|

(...skipping 22 matching lines...) Expand all Loading...
1138 bool URLIndexPrivateData::RestoreWordList(	1105 bool URLIndexPrivateData::RestoreWordList(

1139 const InMemoryURLIndexCacheItem& cache) {	1106 const InMemoryURLIndexCacheItem& cache) {

1140 if (!cache.has_word_list())	1107 if (!cache.has_word_list())

1141 return false;	1108 return false;

1142 const WordListItem& list_item(cache.word_list());	1109 const WordListItem& list_item(cache.word_list());

1143 uint32_t expected_item_count = list_item.word_count();	1110 uint32_t expected_item_count = list_item.word_count();

1144 uint32_t actual_item_count = list_item.word_size();	1111 uint32_t actual_item_count = list_item.word_size();

1145 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)	1112 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)

1146 return false;	1113 return false;

1147 const RepeatedPtrField<std::string>& words(list_item.word());	1114 const RepeatedPtrField<std::string>& words(list_item.word());

1148 for (RepeatedPtrField<std::string>::const_iterator iter = words.begin();	1115 word_list_.reserve(words.size());

1149 iter != words.end(); ++iter)	1116 std::transform(

1150 word_list_.push_back(base::UTF8ToUTF16(*iter));	1117 words.begin(), words.end(), std::back_inserter(word_list_),

	1118 [](const std::string& word) { return base::UTF8ToUTF16(word); });

1151 return true;	1119 return true;

1152 }	1120 }

1153	1121

1154 bool URLIndexPrivateData::RestoreWordMap(	1122 bool URLIndexPrivateData::RestoreWordMap(

1155 const InMemoryURLIndexCacheItem& cache) {	1123 const InMemoryURLIndexCacheItem& cache) {

1156 if (!cache.has_word_map())	1124 if (!cache.has_word_map())

1157 return false;	1125 return false;

1158 const WordMapItem& list_item(cache.word_map());	1126 const WordMapItem& list_item(cache.word_map());

1159 uint32_t expected_item_count = list_item.item_count();	1127 uint32_t expected_item_count = list_item.item_count();

1160 uint32_t actual_item_count = list_item.word_map_entry_size();	1128 uint32_t actual_item_count = list_item.word_map_entry_size();

1161 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)	1129 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)

1162 return false;	1130 return false;

1163 const RepeatedPtrField<WordMapEntry>& entries(list_item.word_map_entry());	1131 for (const auto& entry : list_item.word_map_entry())

1164 for (RepeatedPtrField<WordMapEntry>::const_iterator iter = entries.begin();	1132 word_map_[base::UTF8ToUTF16(entry.word())] = entry.word_id();

1165 iter != entries.end(); ++iter)	1133

1166 word_map_[base::UTF8ToUTF16(iter->word())] = iter->word_id();

1167 return true;	1134 return true;

1168 }	1135 }

1169	1136

1170 bool URLIndexPrivateData::RestoreCharWordMap(	1137 bool URLIndexPrivateData::RestoreCharWordMap(

1171 const InMemoryURLIndexCacheItem& cache) {	1138 const InMemoryURLIndexCacheItem& cache) {

1172 if (!cache.has_char_word_map())	1139 if (!cache.has_char_word_map())

1173 return false;	1140 return false;

1174 const CharWordMapItem& list_item(cache.char_word_map());	1141 const CharWordMapItem& list_item(cache.char_word_map());

1175 uint32_t expected_item_count = list_item.item_count();	1142 uint32_t expected_item_count = list_item.item_count();

1176 uint32_t actual_item_count = list_item.char_word_map_entry_size();	1143 uint32_t actual_item_count = list_item.char_word_map_entry_size();

1177 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)	1144 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)

1178 return false;	1145 return false;

1179 const RepeatedPtrField<CharWordMapEntry>&	1146

1180 entries(list_item.char_word_map_entry());	1147 for (const auto& entry : list_item.char_word_map_entry()) {

1181 for (RepeatedPtrField<CharWordMapEntry>::const_iterator iter =	1148 expected_item_count = entry.item_count();

1182 entries.begin(); iter != entries.end(); ++iter) {	1149 actual_item_count = entry.word_id_size();

1183 expected_item_count = iter->item_count();

1184 actual_item_count = iter->word_id_size();

1185 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)	1150 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)

1186 return false;	1151 return false;

1187 base::char16 uni_char = static_cast<base::char16>(iter->char_16());	1152 base::char16 uni_char = static_cast<base::char16>(entry.char_16());

1188 WordIDSet word_id_set;	1153 const RepeatedField<int32_t>& word_ids(entry.word_id());
	Peter Kasting 2017/02/25 04:54:41 Nit: Use = rather than () to init references like Nit: Use = rather than () to init references like this (multiple places) dyaroshev 2017/02/26 01:12:16 Done. Show quoted text On 2017/02/25 04:54:41, Peter Kasting wrote: > Nit: Use = rather than () to init references like this (multiple places) Done.
1189 const RepeatedField<int32_t>& word_ids(iter->word_id());	1154 char_word_map_[uni_char] = {word_ids.begin(), word_ids.end()};

1190 for (RepeatedField<int32_t>::const_iterator jiter = word_ids.begin();

1191 jiter != word_ids.end(); ++jiter)

1192 word_id_set.insert(*jiter);

1193 char_word_map_[uni_char] = word_id_set;

1194 }	1155 }

1195 return true;	1156 return true;

1196 }	1157 }

1197	1158

1198 bool URLIndexPrivateData::RestoreWordIDHistoryMap(	1159 bool URLIndexPrivateData::RestoreWordIDHistoryMap(

1199 const InMemoryURLIndexCacheItem& cache) {	1160 const InMemoryURLIndexCacheItem& cache) {

1200 if (!cache.has_word_id_history_map())	1161 if (!cache.has_word_id_history_map())

1201 return false;	1162 return false;

1202 const WordIDHistoryMapItem& list_item(cache.word_id_history_map());	1163 const WordIDHistoryMapItem& list_item(cache.word_id_history_map());

1203 uint32_t expected_item_count = list_item.item_count();	1164 uint32_t expected_item_count = list_item.item_count();

1204 uint32_t actual_item_count = list_item.word_id_history_map_entry_size();	1165 uint32_t actual_item_count = list_item.word_id_history_map_entry_size();

1205 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)	1166 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)

1206 return false;	1167 return false;

1207 const RepeatedPtrField<WordIDHistoryMapEntry>&	1168 for (const auto& entry : list_item.word_id_history_map_entry()) {

1208 entries(list_item.word_id_history_map_entry());	1169 expected_item_count = entry.item_count();

1209 for (RepeatedPtrField<WordIDHistoryMapEntry>::const_iterator iter =	1170 actual_item_count = entry.history_id_size();

1210 entries.begin(); iter != entries.end(); ++iter) {

1211 expected_item_count = iter->item_count();

1212 actual_item_count = iter->history_id_size();

1213 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)	1171 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)

1214 return false;	1172 return false;

1215 WordID word_id = iter->word_id();	1173 WordID word_id = entry.word_id();

1216 HistoryIDSet history_id_set;	1174 const RepeatedField<int64_t>& history_ids(entry.history_id());

1217 const RepeatedField<int64_t>& history_ids(iter->history_id());	1175 word_id_history_map_[word_id] = {history_ids.begin(), history_ids.end()};

1218 for (RepeatedField<int64_t>::const_iterator jiter = history_ids.begin();	1176 for (HistoryID history_id : history_ids)

1219 jiter != history_ids.end(); ++jiter) {	1177 history_id_word_map_[history_id].insert(word_id);

1220 history_id_set.insert(*jiter);

1221 AddToHistoryIDWordMap(*jiter, word_id);

1222 }

1223 word_id_history_map_[word_id] = history_id_set;

1224 }	1178 }

1225 return true;	1179 return true;

1226 }	1180 }

1227	1181

1228 bool URLIndexPrivateData::RestoreHistoryInfoMap(	1182 bool URLIndexPrivateData::RestoreHistoryInfoMap(

1229 const InMemoryURLIndexCacheItem& cache) {	1183 const InMemoryURLIndexCacheItem& cache) {

1230 if (!cache.has_history_info_map())	1184 if (!cache.has_history_info_map())

1231 return false;	1185 return false;

1232 const HistoryInfoMapItem& list_item(cache.history_info_map());	1186 const HistoryInfoMapItem& list_item(cache.history_info_map());

1233 uint32_t expected_item_count = list_item.item_count();	1187 uint32_t expected_item_count = list_item.item_count();

1234 uint32_t actual_item_count = list_item.history_info_map_entry_size();	1188 uint32_t actual_item_count = list_item.history_info_map_entry_size();

1235 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)	1189 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)

1236 return false;	1190 return false;

1237 const RepeatedPtrField<HistoryInfoMapEntry>&	1191

1238 entries(list_item.history_info_map_entry());	1192 for (const auto& entry : list_item.history_info_map_entry()) {

1239 for (RepeatedPtrField<HistoryInfoMapEntry>::const_iterator iter =	1193 HistoryID history_id = entry.history_id();

1240 entries.begin(); iter != entries.end(); ++iter) {	1194 history::URLRow url_row(GURL(entry.url()), history_id);

1241 HistoryID history_id = iter->history_id();	1195 url_row.set_visit_count(entry.visit_count());

1242 GURL url(iter->url());	1196 url_row.set_typed_count(entry.typed_count());

1243 history::URLRow url_row(url, history_id);	1197 url_row.set_last_visit(base::Time::FromInternalValue(entry.last_visit()));

1244 url_row.set_visit_count(iter->visit_count());	1198 if (entry.has_title())

1245 url_row.set_typed_count(iter->typed_count());	1199 url_row.set_title(base::UTF8ToUTF16(entry.title()));

1246 url_row.set_last_visit(base::Time::FromInternalValue(iter->last_visit()));	1200 history_info_map_[history_id].url_row = std::move(url_row);

1247 if (iter->has_title()) {

1248 base::string16 title(base::UTF8ToUTF16(iter->title()));

1249 url_row.set_title(title);

1250 }

1251 history_info_map_[history_id].url_row = url_row;

1252	1201

1253 // Restore visits list.	1202 // Restore visits list.

1254 VisitInfoVector visits;	1203 VisitInfoVector visits;

1255 visits.reserve(iter->visits_size());	1204 visits.reserve(entry.visits_size());

1256 for (int i = 0; i < iter->visits_size(); ++i) {	1205 for (const auto& entry_visit : entry.visits()) {

1257 visits.push_back(std::make_pair(	1206 visits.emplace_back(

1258 base::Time::FromInternalValue(iter->visits(i).visit_time()),	1207 base::Time::FromInternalValue(entry_visit.visit_time()),

1259 ui::PageTransitionFromInt(iter->visits(i).transition_type())));	1208 ui::PageTransitionFromInt(entry_visit.transition_type()));

1260 }	1209 }

1261 history_info_map_[history_id].visits = visits;	1210 history_info_map_[history_id].visits = std::move(visits);

1262 }	1211 }

1263 return true;	1212 return true;

1264 }	1213 }

1265	1214

1266 bool URLIndexPrivateData::RestoreWordStartsMap(	1215 bool URLIndexPrivateData::RestoreWordStartsMap(

1267 const InMemoryURLIndexCacheItem& cache) {	1216 const InMemoryURLIndexCacheItem& cache) {

1268 // Note that this function must be called after RestoreHistoryInfoMap() has	1217 // Note that this function must be called after RestoreHistoryInfoMap() has

1269 // been run as the word starts may have to be recalculated from the urls and	1218 // been run as the word starts may have to be recalculated from the urls and

1270 // page titles.	1219 // page titles.

1271 if (cache.has_word_starts_map()) {	1220 if (cache.has_word_starts_map()) {

1272 const WordStartsMapItem& list_item(cache.word_starts_map());	1221 const WordStartsMapItem& list_item(cache.word_starts_map());

1273 uint32_t expected_item_count = list_item.item_count();	1222 uint32_t expected_item_count = list_item.item_count();

1274 uint32_t actual_item_count = list_item.word_starts_map_entry_size();	1223 uint32_t actual_item_count = list_item.word_starts_map_entry_size();

1275 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)	1224 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)

1276 return false;	1225 return false;

1277 const RepeatedPtrField<WordStartsMapEntry>&	1226 for (const auto& entry : list_item.word_starts_map_entry()) {

1278 entries(list_item.word_starts_map_entry());	1227 HistoryID history_id = entry.history_id();

1279 for (RepeatedPtrField<WordStartsMapEntry>::const_iterator iter =

1280 entries.begin(); iter != entries.end(); ++iter) {

1281 HistoryID history_id = iter->history_id();

1282 RowWordStarts word_starts;	1228 RowWordStarts word_starts;

1283 // Restore the URL word starts.	1229 // Restore the URL word starts.

1284 const RepeatedField<int32_t>& url_starts(iter->url_word_starts());	1230 const RepeatedField<int32_t>& url_starts(entry.url_word_starts());

1285 for (RepeatedField<int32_t>::const_iterator jiter = url_starts.begin();	1231 word_starts.url_word_starts_ = {url_starts.begin(), url_starts.end()};

1286 jiter != url_starts.end(); ++jiter)	1232

1287 word_starts.url_word_starts_.push_back(*jiter);

1288 // Restore the page title word starts.	1233 // Restore the page title word starts.

1289 const RepeatedField<int32_t>& title_starts(iter->title_word_starts());	1234 const RepeatedField<int32_t>& title_starts(entry.title_word_starts());

1290 for (RepeatedField<int32_t>::const_iterator jiter = title_starts.begin();	1235 word_starts.title_word_starts_ = {title_starts.begin(),

1291 jiter != title_starts.end(); ++jiter)	1236 title_starts.end()};

1292 word_starts.title_word_starts_.push_back(*jiter);	1237

1293 word_starts_map_[history_id] = word_starts;	1238 word_starts_map_[history_id] = std::move(word_starts);

1294 }	1239 }

1295 } else {	1240 } else {

1296 // Since the cache did not contain any word starts we must rebuild then from	1241 // Since the cache did not contain any word starts we must rebuild then from

1297 // the URL and page titles.	1242 // the URL and page titles.

1298 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin();	1243 for (const auto& entry : history_info_map_) {

1299 iter != history_info_map_.end(); ++iter) {

1300 RowWordStarts word_starts;	1244 RowWordStarts word_starts;

1301 const history::URLRow& row(iter->second.url_row);	1245 const history::URLRow& row(entry.second.url_row);

1302 const base::string16& url =	1246 const base::string16& url =

1303 bookmarks::CleanUpUrlForMatching(row.url(), nullptr);	1247 bookmarks::CleanUpUrlForMatching(row.url(), nullptr);

1304 String16VectorFromString16(url, false, &word_starts.url_word_starts_);	1248 String16VectorFromString16(url, false, &word_starts.url_word_starts_);

1305 const base::string16& title =	1249 const base::string16& title =

1306 bookmarks::CleanUpTitleForMatching(row.title());	1250 bookmarks::CleanUpTitleForMatching(row.title());

1307 String16VectorFromString16(title, false, &word_starts.title_word_starts_);	1251 String16VectorFromString16(title, false, &word_starts.title_word_starts_);

1308 word_starts_map_[iter->first] = word_starts;	1252 word_starts_map_[entry.first] = std::move(word_starts);

1309 }	1253 }

1310 }	1254 }

1311 return true;	1255 return true;

1312 }	1256 }

1313	1257

1314 // static	1258 // static

1315 bool URLIndexPrivateData::URLSchemeIsWhitelisted(	1259 bool URLIndexPrivateData::URLSchemeIsWhitelisted(

1316 const GURL& gurl,	1260 const GURL& gurl,

1317 const std::set<std::string>& whitelist) {	1261 const std::set<std::string>& whitelist) {

1318 return whitelist.find(gurl.scheme()) != whitelist.end();	1262 return whitelist.find(gurl.scheme()) != whitelist.end();

(...skipping 62 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1381 // First cut: typed count, visit count, recency.	1325 // First cut: typed count, visit count, recency.

1382 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks	1326 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks

1383 // recently visited (within the last 12/24 hours) as highly important. Get	1327 // recently visited (within the last 12/24 hours) as highly important. Get

1384 // input from mpearson.	1328 // input from mpearson.

1385 if (r1.typed_count() != r2.typed_count())	1329 if (r1.typed_count() != r2.typed_count())

1386 return (r1.typed_count() > r2.typed_count());	1330 return (r1.typed_count() > r2.typed_count());

1387 if (r1.visit_count() != r2.visit_count())	1331 if (r1.visit_count() != r2.visit_count())

1388 return (r1.visit_count() > r2.visit_count());	1332 return (r1.visit_count() > r2.visit_count());

1389 return (r1.last_visit() > r2.last_visit());	1333 return (r1.last_visit() > r2.last_visit());

1390 }	1334 }

OLD	NEW

« components/omnibox/browser/url_index_private_data.h ('K') | « components/omnibox/browser/url_index_private_data.h ('k') | no next file » | no next file with comments »