components/omnibox/browser/url_index_private_data.cc - Issue 2690303012: Cleaning up url_index_private_data and in_memory_url_index_types.

Side by Side Diff: components/omnibox/browser/url_index_private_data.cc

Issue 2690303012: Cleaning up url_index_private_data and in_memory_url_index_types. (Closed)

Patch Set: Review, round 4. Created 3 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "components/omnibox/browser/url_index_private_data.h"	5 #include "components/omnibox/browser/url_index_private_data.h"

6	6

7 #include <stdint.h>	7 #include <stdint.h>

8	8

9 #include <functional>	9 #include <functional>

10 #include <iterator>	10 #include <iterator>

(...skipping 19 matching lines...) Expand all Loading...
30 #include "components/omnibox/browser/in_memory_url_index.h"	30 #include "components/omnibox/browser/in_memory_url_index.h"

31 #include "components/search_engines/template_url_service.h"	31 #include "components/search_engines/template_url_service.h"

32 #include "components/url_formatter/url_formatter.h"	32 #include "components/url_formatter/url_formatter.h"

33	33

34 #if defined(USE_SYSTEM_PROTOBUF)	34 #if defined(USE_SYSTEM_PROTOBUF)

35 #include <google/protobuf/repeated_field.h>	35 #include <google/protobuf/repeated_field.h>

36 #else	36 #else

37 #include "third_party/protobuf/src/google/protobuf/repeated_field.h"	37 #include "third_party/protobuf/src/google/protobuf/repeated_field.h"

38 #endif	38 #endif

39	39

	40 namespace {

	41

40 using google::protobuf::RepeatedField;	42 using google::protobuf::RepeatedField;

41 using google::protobuf::RepeatedPtrField;	43 using google::protobuf::RepeatedPtrField;

42 using in_memory_url_index::InMemoryURLIndexCacheItem;	44 using in_memory_url_index::InMemoryURLIndexCacheItem;

43	45

44 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordListItem	46 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordListItem

45 WordListItem;	47 WordListItem;

46 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordMapItem_WordMapEntry	48 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordMapItem_WordMapEntry

47 WordMapEntry;	49 WordMapEntry;

48 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordMapItem WordMapItem;	50 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordMapItem WordMapItem;

49 typedef in_memory_url_index::InMemoryURLIndexCacheItem_CharWordMapItem	51 typedef in_memory_url_index::InMemoryURLIndexCacheItem_CharWordMapItem

(...skipping 20 matching lines...) Expand all Loading...
70 WordStartsMapEntry;	72 WordStartsMapEntry;

71	73

72 // Algorithm Functions ---------------------------------------------------------	74 // Algorithm Functions ---------------------------------------------------------

73	75

74 // Comparison function for sorting search terms by descending length.	76 // Comparison function for sorting search terms by descending length.

75 bool LengthGreater(const base::string16& string_a,	77 bool LengthGreater(const base::string16& string_a,

76 const base::string16& string_b) {	78 const base::string16& string_b) {

77 return string_a.length() > string_b.length();	79 return string_a.length() > string_b.length();

78 }	80 }

79	81

	82 } // namespace

80	83

81 // UpdateRecentVisitsFromHistoryDBTask -----------------------------------------	84 // UpdateRecentVisitsFromHistoryDBTask -----------------------------------------

82	85

83 // HistoryDBTask used to update the recent visit data for a particular	86 // HistoryDBTask used to update the recent visit data for a particular

84 // row from the history database.	87 // row from the history database.

85 class UpdateRecentVisitsFromHistoryDBTask : public history::HistoryDBTask {	88 class UpdateRecentVisitsFromHistoryDBTask : public history::HistoryDBTask {

86 public:	89 public:

87 explicit UpdateRecentVisitsFromHistoryDBTask(	90 explicit UpdateRecentVisitsFromHistoryDBTask(

88 URLIndexPrivateData* private_data,	91 URLIndexPrivateData* private_data,

89 history::URLID url_id);	92 history::URLID url_id);

(...skipping 90 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
180 // the index we need individual, lower-cased words, ignoring escapings. For	183 // the index we need individual, lower-cased words, ignoring escapings. For

181 // the final filtering we need whitespace separated substrings possibly	184 // the final filtering we need whitespace separated substrings possibly

182 // containing escaped characters.	185 // containing escaped characters.

183 base::string16 lower_raw_string(base::i18n::ToLower(search_string));	186 base::string16 lower_raw_string(base::i18n::ToLower(search_string));

184 base::string16 lower_unescaped_string = net::UnescapeURLComponent(	187 base::string16 lower_unescaped_string = net::UnescapeURLComponent(

185 lower_raw_string,	188 lower_raw_string,

186 net::UnescapeRule::SPACES \| net::UnescapeRule::PATH_SEPARATORS \|	189 net::UnescapeRule::SPACES \| net::UnescapeRule::PATH_SEPARATORS \|

187 net::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS);	190 net::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS);

188	191

189 // Extract individual 'words' (as opposed to 'terms'; see comment in	192 // Extract individual 'words' (as opposed to 'terms'; see comment in

190 // HistoryIdSetToScoredMatches()) from the search string. When the user	193 // HistoryIdsToScoredMatches()) from the search string. When the user types

191 // types "colspec=ID%20Mstone Release" we get four 'words': "colspec", "id",	194 // "colspec=ID%20Mstone Release" we get four 'words': "colspec", "id",

192 // "mstone" and "release".	195 // "mstone" and "release".

193 String16Vector lower_words(	196 String16Vector lower_words(

194 String16VectorFromString16(lower_unescaped_string, false, nullptr));	197 String16VectorFromString16(lower_unescaped_string, false, nullptr));

195 if (lower_words.empty())	198 if (lower_words.empty())

196 continue;	199 continue;

197 HistoryIDSet history_id_set = HistoryIDSetFromWords(lower_words);

198 history_ids_were_trimmed \|= TrimHistoryIdsPool(&history_id_set);

199	200

200 ScoredHistoryMatches temp_scored_items;	201 HistoryIDVector history_ids = HistoryIDsFromWords(lower_words);

201 HistoryIdSetToScoredMatches(history_id_set, lower_raw_string,	202 history_ids_were_trimmed \|= TrimHistoryIdsPool(&history_ids);

202 template_url_service, bookmark_model,	203

203 &temp_scored_items);	204 HistoryIdsToScoredMatches(std::move(history_ids), lower_raw_string,

204 scored_items.insert(scored_items.end(), temp_scored_items.begin(),	205 template_url_service, bookmark_model,

205 temp_scored_items.end());	206 &scored_items);
	dyaroshev 2017/02/26 01:12:17 In fixed version of HistoryIdsToScoredMatches() we In fixed version of HistoryIdsToScoredMatches() we don't actually need a temporary.
206 }	207 }

207 // Select and sort only the top \|max_matches\| results.	208 // Select and sort only the top \|max_matches\| results.

208 if (scored_items.size() > max_matches) {	209 if (scored_items.size() > max_matches) {

209 std::partial_sort(scored_items.begin(), scored_items.begin() + max_matches,	210 std::partial_sort(scored_items.begin(), scored_items.begin() + max_matches,

210 scored_items.end(),	211 scored_items.end(),

211 ScoredHistoryMatch::MatchScoreGreater);	212 ScoredHistoryMatch::MatchScoreGreater);

212 scored_items.resize(max_matches);	213 scored_items.resize(max_matches);

213 } else {	214 } else {

214 std::sort(scored_items.begin(), scored_items.end(),	215 std::sort(scored_items.begin(), scored_items.end(),

215 ScoredHistoryMatch::MatchScoreGreater);	216 ScoredHistoryMatch::MatchScoreGreater);

(...skipping 229 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
445 // search_term_cache_	446 // search_term_cache_

446 }	447 }

447	448

448 bool URLIndexPrivateData::Empty() const {	449 bool URLIndexPrivateData::Empty() const {

449 return history_info_map_.empty();	450 return history_info_map_.empty();

450 }	451 }

451	452

452 void URLIndexPrivateData::Clear() {	453 void URLIndexPrivateData::Clear() {

453 last_time_rebuilt_from_history_ = base::Time();	454 last_time_rebuilt_from_history_ = base::Time();

454 word_list_.clear();	455 word_list_.clear();

455 available_words_.clear();	456 available_words_ = {};

456 word_map_.clear();	457 word_map_.clear();

457 char_word_map_.clear();	458 char_word_map_.clear();

458 word_id_history_map_.clear();	459 word_id_history_map_.clear();

459 history_id_word_map_.clear();	460 history_id_word_map_.clear();

460 history_info_map_.clear();	461 history_info_map_.clear();

461 word_starts_map_.clear();	462 word_starts_map_.clear();

462 }	463 }

463	464

464 URLIndexPrivateData::~URLIndexPrivateData() {}	465 URLIndexPrivateData::~URLIndexPrivateData() {}

465	466

466 HistoryIDSet URLIndexPrivateData::HistoryIDSetFromWords(	467 HistoryIDVector URLIndexPrivateData::HistoryIDsFromWords(

467 const String16Vector& unsorted_words) {	468 const String16Vector& unsorted_words) {

	469 // This histogram name reflects the historic name of this function.

468 SCOPED_UMA_HISTOGRAM_TIMER("Omnibox.HistoryQuickHistoryIDSetFromWords");	470 SCOPED_UMA_HISTOGRAM_TIMER("Omnibox.HistoryQuickHistoryIDSetFromWords");

469 // Break the terms down into individual terms (words), get the candidate	471 // Break the terms down into individual terms (words), get the candidate

470 // set for each term, and intersect each to get a final candidate list.	472 // set for each term, and intersect each to get a final candidate list.

471 // Note that a single 'term' from the user's perspective might be	473 // Note that a single 'term' from the user's perspective might be

472 // a string like "http://www.somewebsite.com" which, from our perspective,	474 // a string like "http://www.somewebsite.com" which, from our perspective,

473 // is four words: 'http', 'www', 'somewebsite', and 'com'.	475 // is four words: 'http', 'www', 'somewebsite', and 'com'.

474 HistoryIDSet history_id_set;	476 HistoryIDVector history_ids;

475 String16Vector words(unsorted_words);	477 String16Vector words(unsorted_words);

476 // Sort the words into the longest first as such are likely to narrow down	478 // Sort the words into the longest first as such are likely to narrow down

477 // the results quicker. Also, single character words are the most expensive	479 // the results quicker. Also, single character words are the most expensive

478 // to process so save them for last.	480 // to process so save them for last.

479 std::sort(words.begin(), words.end(), LengthGreater);	481 std::sort(words.begin(), words.end(), LengthGreater);

	482

	483 // TODO(dyaroshev): write a generic algorithm(crbug.com/696167).

480 for (String16Vector::iterator iter = words.begin(); iter != words.end();	484 for (String16Vector::iterator iter = words.begin(); iter != words.end();

481 ++iter) {	485 ++iter) {

482 base::string16 uni_word = *iter;	486 base::string16 uni_word = *iter;
	Peter Kasting 2017/02/28 00:33:54 Nit: Inline into next statement. Nit: Inline into next statement.
483 HistoryIDSet term_history_set = HistoryIDsForTerm(uni_word);	487 HistoryIDSet term_history_set = HistoryIDsForTerm(uni_word);

484 if (term_history_set.empty()) {	488 if (term_history_set.empty()) {

485 history_id_set.clear();	489 history_ids.clear();
	Peter Kasting 2017/02/28 00:33:54 Nit: Just "return HistoryIDVector();" rather than Nit: Just "return HistoryIDVector();" rather than clear() + break; + return. (here, plus two similar places below)
486 break;	490 break;

487 }	491 }

488 if (iter == words.begin()) {	492 if (iter == words.begin()) {

489 history_id_set.swap(term_history_set);	493 history_ids = {term_history_set.begin(), term_history_set.end()};

490 } else {	494 } else {

491 HistoryIDSet new_history_id_set = base::STLSetIntersection<HistoryIDSet>(	495 history_ids = base::STLSetIntersection<HistoryIDVector>(history_ids,

492 history_id_set, term_history_set);	496 term_history_set);

493 history_id_set.swap(new_history_id_set);

494 }	497 }

495 }	498 }

496 return history_id_set;	499 return history_ids;

497 }	500 }

498	501

499 bool URLIndexPrivateData::TrimHistoryIdsPool(	502 bool URLIndexPrivateData::TrimHistoryIdsPool(

500 HistoryIDSet* history_id_set) const {	503 HistoryIDVector* history_ids) const {

501 constexpr size_t kItemsToScoreLimit = 500;	504 constexpr size_t kItemsToScoreLimit = 500;

502 if (history_id_set->size() <= kItemsToScoreLimit)	505 if (history_ids->size() <= kItemsToScoreLimit)

503 return false;	506 return false;

504	507

505 HistoryIDVector history_ids(history_id_set->begin(), history_id_set->end());

506

507 // Trim down the set by sorting by typed-count, visit-count, and last	508 // Trim down the set by sorting by typed-count, visit-count, and last

508 // visit.	509 // visit.

509 auto new_end = history_ids.begin() + kItemsToScoreLimit;	510 auto new_end = history_ids->begin() + kItemsToScoreLimit;

510 HistoryItemFactorGreater item_factor_functor(history_info_map_);	511 HistoryItemFactorGreater item_factor_functor(history_info_map_);

511	512

512 std::nth_element(history_ids.begin(), new_end, history_ids.end(),	513 std::nth_element(history_ids->begin(), new_end, history_ids->end(),

513 item_factor_functor);	514 item_factor_functor);

514 history_ids.erase(new_end, history_ids.end());	515 history_ids->erase(new_end, history_ids->end());

515	516

516 *history_id_set = {history_ids.begin(), history_ids.end()};

517 return true;	517 return true;

518 }	518 }

519	519

520 HistoryIDSet URLIndexPrivateData::HistoryIDsForTerm(	520 HistoryIDSet URLIndexPrivateData::HistoryIDsForTerm(

521 const base::string16& term) {	521 const base::string16& term) {

522 if (term.empty())	522 if (term.empty())

523 return HistoryIDSet();	523 return HistoryIDSet();

524	524

525 // TODO(mrossetti): Consider optimizing for very common terms such as	525 // TODO(mrossetti): Consider optimizing for very common terms such as

526 // 'http[s]', 'www', 'com', etc. Or collect the top 100 more frequently	526 // 'http[s]', 'www', 'com', etc. Or collect the top 100 more frequently

(...skipping 47 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
574	574

575 // Reduce the word set with any leftover, unprocessed characters.	575 // Reduce the word set with any leftover, unprocessed characters.

576 if (!unique_chars.empty()) {	576 if (!unique_chars.empty()) {

577 WordIDSet leftover_set(WordIDSetForTermChars(unique_chars));	577 WordIDSet leftover_set(WordIDSetForTermChars(unique_chars));

578 // We might come up empty on the leftovers.	578 // We might come up empty on the leftovers.

579 if (leftover_set.empty()) {	579 if (leftover_set.empty()) {

580 search_term_cache_[term] = SearchTermCacheItem();	580 search_term_cache_[term] = SearchTermCacheItem();

581 return HistoryIDSet();	581 return HistoryIDSet();

582 }	582 }

583 // Or there may not have been a prefix from which to start.	583 // Or there may not have been a prefix from which to start.

584 if (prefix_chars.empty()) {	584 word_id_set = prefix_chars.empty() ? std::move(leftover_set)

585 word_id_set.swap(leftover_set);	585 : base::STLSetIntersection<WordIDSet>(

586 } else {	586 word_id_set, leftover_set);

587 WordIDSet new_word_id_set = base::STLSetIntersection<WordIDSet>(

588 word_id_set, leftover_set);

589 word_id_set.swap(new_word_id_set);

590 }

591 }	587 }

592	588

593 // We must filter the word list because the resulting word set surely	589 // We must filter the word list because the resulting word set surely

594 // contains words which do not have the search term as a proper subset.	590 // contains words which do not have the search term as a proper subset.

595 for (WordIDSet::iterator word_set_iter = word_id_set.begin();	591 for (WordIDSet::iterator word_set_iter = word_id_set.begin();

596 word_set_iter != word_id_set.end(); ) {	592 word_set_iter != word_id_set.end(); ) {

597 if (word_list_[*word_set_iter].find(term) == base::string16::npos)	593 if (word_list_[*word_set_iter].find(term) == base::string16::npos)

598 word_set_iter = word_id_set.erase(word_set_iter);	594 word_set_iter = word_id_set.erase(word_set_iter);

599 else	595 else

600 ++word_set_iter;	596 ++word_set_iter;

601 }	597 }

602 } else {	598 } else {

603 word_id_set = WordIDSetForTermChars(Char16SetFromString16(term));	599 word_id_set = WordIDSetForTermChars(Char16SetFromString16(term));

604 }	600 }

605	601

606 // If any words resulted then we can compose a set of history IDs by unioning	602 // If any words resulted then we can compose a set of history IDs by unioning

607 // the sets from each word.	603 // the sets from each word.

608 HistoryIDSet history_id_set;	604 HistoryIDSet history_id_set;

609 if (!word_id_set.empty()) {	605 for (WordID word_id : word_id_set) {

610 for (WordIDSet::iterator word_id_iter = word_id_set.begin();	606 WordIDHistoryMap::iterator word_iter = word_id_history_map_.find(word_id);

611 word_id_iter != word_id_set.end(); ++word_id_iter) {	607 if (word_iter != word_id_history_map_.end()) {

612 WordID word_id = *word_id_iter;	608 HistoryIDSet& word_history_id_set(word_iter->second);

613 WordIDHistoryMap::iterator word_iter = word_id_history_map_.find(word_id);	609 history_id_set.insert(word_history_id_set.begin(),

614 if (word_iter != word_id_history_map_.end()) {	610 word_history_id_set.end());

615 HistoryIDSet& word_history_id_set(word_iter->second);

616 history_id_set.insert(word_history_id_set.begin(),

617 word_history_id_set.end());

618 }

619 }	611 }

620 }	612 }

621	613

622 // Record a new cache entry for this word if the term is longer than	614 // Record a new cache entry for this word if the term is longer than

623 // a single character.	615 // a single character.

624 if (term_length > 1)	616 if (term_length > 1)

625 search_term_cache_[term] = SearchTermCacheItem(word_id_set, history_id_set);	617 search_term_cache_[term] = SearchTermCacheItem(word_id_set, history_id_set);

626	618

627 return history_id_set;	619 return history_id_set;

628 }	620 }

629	621

630 WordIDSet URLIndexPrivateData::WordIDSetForTermChars(	622 WordIDSet URLIndexPrivateData::WordIDSetForTermChars(

631 const Char16Set& term_chars) {	623 const Char16Set& term_chars) {

	624 // TODO(dyaroshev): write a generic algorithm(crbug.com/696167).

	625

632 WordIDSet word_id_set;	626 WordIDSet word_id_set;

633 for (Char16Set::const_iterator c_iter = term_chars.begin();	627 for (Char16Set::const_iterator c_iter = term_chars.begin();

634 c_iter != term_chars.end(); ++c_iter) {	628 c_iter != term_chars.end(); ++c_iter) {

635 CharWordIDMap::iterator char_iter = char_word_map_.find(*c_iter);	629 CharWordIDMap::iterator char_iter = char_word_map_.find(*c_iter);

636 if (char_iter == char_word_map_.end()) {	630 if (char_iter == char_word_map_.end()) {

637 // A character was not found so there are no matching results: bail.	631 // A character was not found so there are no matching results: bail.

638 word_id_set.clear();	632 word_id_set.clear();
	Peter Kasting 2017/02/28 00:33:54 Nit: Same comment as above Nit: Same comment as above
639 break;	633 break;

640 }	634 }

641 WordIDSet& char_word_id_set(char_iter->second);	635 WordIDSet& char_word_id_set(char_iter->second);
	Peter Kasting 2017/02/28 00:33:54 Nit: Can be const Nit: Can be const
642 // It is possible for there to no longer be any words associated with	636 // It is possible for there to no longer be any words associated with

643 // a particular character. Give up in that case.	637 // a particular character. Give up in that case.

644 if (char_word_id_set.empty()) {	638 if (char_word_id_set.empty()) {

645 word_id_set.clear();	639 word_id_set.clear();

646 break;	640 break;

647 }	641 }

648	642

649 if (c_iter == term_chars.begin()) {	643 if (c_iter == term_chars.begin()) {

650 // First character results becomes base set of results.	644 // First character results becomes base set of results.
	Peter Kasting 2017/02/28 00:33:55 Nit: These comments just restate the code, remove. Nit: These comments just restate the code, remove.
651 word_id_set = char_word_id_set;	645 word_id_set = char_word_id_set;

652 } else {	646 } else {

653 // Subsequent character results get intersected in.	647 // Subsequent character results get intersected in.

654 WordIDSet new_word_id_set = base::STLSetIntersection<WordIDSet>(	648 word_id_set =

655 word_id_set, char_word_id_set);	649 base::STLSetIntersection<WordIDSet>(word_id_set, char_word_id_set);

656 word_id_set.swap(new_word_id_set);

657 }	650 }

658 }	651 }

659 return word_id_set;	652 return word_id_set;

660 }	653 }

661	654

662 void URLIndexPrivateData::HistoryIdSetToScoredMatches(	655 void URLIndexPrivateData::HistoryIdsToScoredMatches(

663 HistoryIDSet history_id_set,	656 HistoryIDVector history_ids,

664 const base::string16& lower_raw_string,	657 const base::string16& lower_raw_string,

665 const TemplateURLService* template_url_service,	658 const TemplateURLService* template_url_service,

666 bookmarks::BookmarkModel* bookmark_model,	659 bookmarks::BookmarkModel* bookmark_model,

667 ScoredHistoryMatches* scored_items) const {	660 ScoredHistoryMatches* scored_items) const {

668 if (history_id_set.empty())	661 if (history_ids.empty())

669 return;	662 return;

670	663

671 // Break up the raw search string (complete with escaped URL elements) into	664 // Break up the raw search string (complete with escaped URL elements) into

672 // 'terms' (as opposed to 'words'; see comment in HistoryItemsForTerms()).	665 // 'terms' (as opposed to 'words'; see comment in HistoryItemsForTerms()).

673 // We only want to break up the search string on 'true' whitespace rather than	666 // We only want to break up the search string on 'true' whitespace rather than

674 // escaped whitespace. For example, when the user types	667 // escaped whitespace. For example, when the user types

675 // "colspec=ID%20Mstone Release" we get two 'terms': "colspec=id%20mstone" and	668 // "colspec=ID%20Mstone Release" we get two 'terms': "colspec=id%20mstone" and

676 // "release".	669 // "release".

677 String16Vector lower_raw_terms =	670 String16Vector lower_raw_terms =

678 base::SplitString(lower_raw_string, base::kWhitespaceUTF16,	671 base::SplitString(lower_raw_string, base::kWhitespaceUTF16,

679 base::KEEP_WHITESPACE, base::SPLIT_WANT_NONEMPTY);	672 base::KEEP_WHITESPACE, base::SPLIT_WANT_NONEMPTY);

680	673

681 // Don't score matches when there are no terms to score against. (It's	674 // Don't score matches when there are no terms to score against. (It's

682 // possible that the word break iterater that extracts words to search for in	675 // possible that the word break iterater that extracts words to search for in

683 // the database allows some whitespace "words" whereas SplitString excludes a	676 // the database allows some whitespace "words" whereas SplitString excludes a

684 // long list of whitespace.) One could write a scoring function that gives a	677 // long list of whitespace.) One could write a scoring function that gives a

685 // reasonable order to matches when there are no terms (i.e., all the words	678 // reasonable order to matches when there are no terms (i.e., all the words

686 // are some form of whitespace), but this is such a rare edge case that it's	679 // are some form of whitespace), but this is such a rare edge case that it's

687 // not worth the time.	680 // not worth the time.

688 if (lower_raw_terms.empty())	681 if (lower_raw_terms.empty())

689 return;	682 return;

690	683

691 WordStarts lower_terms_to_word_starts_offsets;	684 WordStarts lower_terms_to_word_starts_offsets;

692 CalculateWordStartsOffsets(lower_raw_terms,	685 CalculateWordStartsOffsets(lower_raw_terms,

693 &lower_terms_to_word_starts_offsets);	686 &lower_terms_to_word_starts_offsets);

694	687

695 // Filter bad matches and other matches we don't want to display.	688 // Filter bad matches and other matches we don't want to display.

696 for (auto it = history_id_set.begin();;) {	689 auto filter = [this, template_url_service](const HistoryID history_id) {

697 it = std::find_if(it, history_id_set.end(),	690 return ShouldFilter(history_id, template_url_service);

698 [this, template_url_service](const HistoryID history_id) {	691 };

699 return ShouldFilter(history_id, template_url_service);	692 history_ids.erase(

700 });	693 std::remove_if(history_ids.begin(), history_ids.end(), filter),

701 if (it == history_id_set.end())	694 history_ids.end());

702 break;

703 it = history_id_set.erase(it);

704 }

705	695

706 // Score the matches.	696 // Score the matches.

707 const size_t num_matches = history_id_set.size();	697 const size_t num_matches = history_ids.size();

708 const base::Time now = base::Time::Now();	698 const base::Time now = base::Time::Now();

709 std::transform(

710 history_id_set.begin(), history_id_set.end(),

711 std::back_inserter(*scored_items), [&](const HistoryID history_id) {

712 auto hist_pos = history_info_map_.find(history_id);

713 const history::URLRow& hist_item = hist_pos->second.url_row;

714 auto starts_pos = word_starts_map_.find(history_id);

715 DCHECK(starts_pos != word_starts_map_.end());

716 return ScoredHistoryMatch(

717 hist_item, hist_pos->second.visits, lower_raw_string,

718 lower_raw_terms, lower_terms_to_word_starts_offsets,

719 starts_pos->second,

720 bookmark_model && bookmark_model->IsBookmarked(hist_item.url()),

721 num_matches, now);

722 });

723	699
	dyaroshev 2017/02/26 01:12:17 First transforming and then removing seems unneces First transforming and then removing seems unnecessary - it's better to filter prior to putting in.
724 // Filter all matches that ended up scoring 0. (These are usually matches	700 for (HistoryID history_id : history_ids) {

725 // which didn't match the user's raw terms.)	701 auto hist_pos = history_info_map_.find(history_id);

726 scored_items->erase(std::remove_if(scored_items->begin(), scored_items->end(),	702 const history::URLRow& hist_item = hist_pos->second.url_row;

727 [](const ScoredHistoryMatch& match) {	703 auto starts_pos = word_starts_map_.find(history_id);

728 return match.raw_score == 0;	704 DCHECK(starts_pos != word_starts_map_.end());

729 }),	705 ScoredHistoryMatch new_scored_match(

730 scored_items->end());	706 hist_item, hist_pos->second.visits, lower_raw_string, lower_raw_terms,

	707 lower_terms_to_word_starts_offsets, starts_pos->second,

	708 bookmark_model && bookmark_model->IsBookmarked(hist_item.url()),

	709 num_matches, now);

	710 // Filter new matches that ended up scoring 0. (These are usually matches

	711 // which didn't match the user's raw terms.)

	712 if (new_scored_match.raw_score != 0)
	Peter Kasting 2017/02/28 00:33:54 Nit: > 0? Nit: > 0?
	713 scored_items->push_back(std::move(new_scored_match));

	714 }

731 }	715 }

732	716

733 // static	717 // static

734 void URLIndexPrivateData::CalculateWordStartsOffsets(	718 void URLIndexPrivateData::CalculateWordStartsOffsets(

735 const String16Vector& lower_terms,	719 const String16Vector& lower_terms,

736 WordStarts* lower_terms_to_word_starts_offsets) {	720 WordStarts* lower_terms_to_word_starts_offsets) {

737 // Calculate offsets for each term. For instance, the offset for	721 // Calculate offsets for each term. For instance, the offset for

738 // ".net" should be 1, indicating that the actual word-part of the term	722 // ".net" should be 1, indicating that the actual word-part of the term

739 // starts at offset 1.	723 // starts at offset 1.

740 lower_terms_to_word_starts_offsets->resize(lower_terms.size(), 0u);	724 lower_terms_to_word_starts_offsets->resize(lower_terms.size(), 0u);

(...skipping 71 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
812 HistoryID history_id = static_cast<HistoryID>(row.id());	796 HistoryID history_id = static_cast<HistoryID>(row.id());

813 // Split URL into individual, unique words then add in the title words.	797 // Split URL into individual, unique words then add in the title words.

814 const GURL& gurl(row.url());	798 const GURL& gurl(row.url());

815 const base::string16& url =	799 const base::string16& url =

816 bookmarks::CleanUpUrlForMatching(gurl, nullptr);	800 bookmarks::CleanUpUrlForMatching(gurl, nullptr);

817 String16Set url_words = String16SetFromString16(url,	801 String16Set url_words = String16SetFromString16(url,

818 word_starts ? &word_starts->url_word_starts_ : nullptr);	802 word_starts ? &word_starts->url_word_starts_ : nullptr);

819 const base::string16& title = bookmarks::CleanUpTitleForMatching(row.title());	803 const base::string16& title = bookmarks::CleanUpTitleForMatching(row.title());

820 String16Set title_words = String16SetFromString16(title,	804 String16Set title_words = String16SetFromString16(title,

821 word_starts ? &word_starts->title_word_starts_ : nullptr);	805 word_starts ? &word_starts->title_word_starts_ : nullptr);

822 String16Set words = base::STLSetUnion<String16Set>(url_words, title_words);	806 for (const auto& word :

823 for (String16Set::iterator word_iter = words.begin();	807 base::STLSetUnion<String16Set>(url_words, title_words))

824 word_iter != words.end(); ++word_iter)	808 AddWordToIndex(word, history_id);

825 AddWordToIndex(*word_iter, history_id);

826	809

827 search_term_cache_.clear(); // Invalidate the term cache.	810 search_term_cache_.clear(); // Invalidate the term cache.

828 }	811 }

829	812

830 void URLIndexPrivateData::AddWordToIndex(const base::string16& term,	813 void URLIndexPrivateData::AddWordToIndex(const base::string16& term,

831 HistoryID history_id) {	814 HistoryID history_id) {

832 WordMap::iterator word_pos = word_map_.find(term);	815 WordMap::iterator word_pos;

833 if (word_pos != word_map_.end())	816 bool is_new;

834 UpdateWordHistory(word_pos->second, history_id);	817 std::tie(word_pos, is_new) = word_map_.emplace(term, WordID());

835 else	818

836 AddWordHistory(term, history_id);	819 // Adding a new word (i.e. a word that is not already in the word index).

	820 if (is_new) {

	821 word_pos->second = AddNewWordToWordList(term);

	822

	823 // For each character in the newly added word add the word to the character

	824 // index.

	825 for (base::char16 uni_char : Char16SetFromString16(term))

	826 char_word_map_[uni_char].insert(word_pos->second);

	827 }

	828

	829 word_id_history_map_[word_pos->second].insert(history_id);

	830 history_id_word_map_[history_id].insert(word_pos->second);

837 }	831 }

838	832

839 void URLIndexPrivateData::AddWordHistory(const base::string16& term,	833 WordID URLIndexPrivateData::AddNewWordToWordList(const base::string16& term) {

840 HistoryID history_id) {

841 WordID word_id = word_list_.size();	834 WordID word_id = word_list_.size();

842 if (available_words_.empty()) {	835 if (available_words_.empty()) {

843 word_list_.push_back(term);	836 word_list_.push_back(term);

844 } else {	837 return word_id;

845 word_id = *(available_words_.begin());

846 word_list_[word_id] = term;

847 available_words_.erase(word_id);

848 }	838 }

849 word_map_[term] = word_id;

850	839

851 HistoryIDSet history_id_set;	840 word_id = available_words_.top();

852 history_id_set.insert(history_id);	841 available_words_.pop();

853 word_id_history_map_[word_id] = history_id_set;	842 word_list_[word_id] = term;

854 AddToHistoryIDWordMap(history_id, word_id);	843 return word_id;

855

856 // For each character in the newly added word (i.e. a word that is not

857 // already in the word index), add the word to the character index.

858 Char16Set characters = Char16SetFromString16(term);

859 for (Char16Set::iterator uni_char_iter = characters.begin();

860 uni_char_iter != characters.end(); ++uni_char_iter) {

861 base::char16 uni_char = *uni_char_iter;

862 CharWordIDMap::iterator char_iter = char_word_map_.find(uni_char);

863 if (char_iter != char_word_map_.end()) {

864 // Update existing entry in the char/word index.

865 WordIDSet& word_id_set(char_iter->second);

866 word_id_set.insert(word_id);

867 } else {

868 // Create a new entry in the char/word index.

869 WordIDSet word_id_set;

870 word_id_set.insert(word_id);

871 char_word_map_[uni_char] = word_id_set;

872 }

873 }

874 }

875

876 void URLIndexPrivateData::UpdateWordHistory(WordID word_id,

877 HistoryID history_id) {

878 WordIDHistoryMap::iterator history_pos = word_id_history_map_.find(word_id);

879 DCHECK(history_pos != word_id_history_map_.end());

880 HistoryIDSet& history_id_set(history_pos->second);

881 history_id_set.insert(history_id);

882 AddToHistoryIDWordMap(history_id, word_id);

883 }

884

885 void URLIndexPrivateData::AddToHistoryIDWordMap(HistoryID history_id,

886 WordID word_id) {

887 HistoryIDWordMap::iterator iter = history_id_word_map_.find(history_id);

888 if (iter != history_id_word_map_.end()) {

889 WordIDSet& word_id_set(iter->second);

890 word_id_set.insert(word_id);

891 } else {

892 WordIDSet word_id_set;

893 word_id_set.insert(word_id);

894 history_id_word_map_[history_id] = word_id_set;

895 }

896 }	844 }

897	845

898 void URLIndexPrivateData::RemoveRowFromIndex(const history::URLRow& row) {	846 void URLIndexPrivateData::RemoveRowFromIndex(const history::URLRow& row) {

899 RemoveRowWordsFromIndex(row);	847 RemoveRowWordsFromIndex(row);

900 HistoryID history_id = static_cast<HistoryID>(row.id());	848 HistoryID history_id = static_cast<HistoryID>(row.id());

901 history_info_map_.erase(history_id);	849 history_info_map_.erase(history_id);

902 word_starts_map_.erase(history_id);	850 word_starts_map_.erase(history_id);

903 }	851 }

904	852

905 void URLIndexPrivateData::RemoveRowWordsFromIndex(const history::URLRow& row) {	853 void URLIndexPrivateData::RemoveRowWordsFromIndex(const history::URLRow& row) {

906 // Remove the entries in history_id_word_map_ and word_id_history_map_ for	854 // Remove the entries in history_id_word_map_ and word_id_history_map_ for

907 // this row.	855 // this row.

908 HistoryID history_id = static_cast<HistoryID>(row.id());	856 HistoryID history_id = static_cast<HistoryID>(row.id());

909 WordIDSet word_id_set = history_id_word_map_[history_id];	857 WordIDSet word_id_set = history_id_word_map_[history_id];

910 history_id_word_map_.erase(history_id);	858 history_id_word_map_.erase(history_id);

911	859

912 // Reconcile any changes to word usage.	860 // Reconcile any changes to word usage.

913 for (WordIDSet::iterator word_id_iter = word_id_set.begin();	861 for (WordID word_id : word_id_set) {

914 word_id_iter != word_id_set.end(); ++word_id_iter) {	862 auto word_id_history_map_iter = word_id_history_map_.find(word_id);

915 WordID word_id = *word_id_iter;	863 DCHECK(word_id_history_map_iter != word_id_history_map_.end());

916 word_id_history_map_[word_id].erase(history_id);	864

917 if (!word_id_history_map_[word_id].empty())	865 word_id_history_map_iter->second.erase(history_id);

918 continue; // The word is still in use.	866 if (!word_id_history_map_iter->second.empty())

	867 continue;

919	868

920 // The word is no longer in use. Reconcile any changes to character usage.	869 // The word is no longer in use. Reconcile any changes to character usage.

921 base::string16 word = word_list_[word_id];	870 base::string16 word = word_list_[word_id];

922 Char16Set characters = Char16SetFromString16(word);	871 for (base::char16 uni_char : Char16SetFromString16(word)) {

923 for (Char16Set::iterator uni_char_iter = characters.begin();	872 auto char_word_map_iter = char_word_map_.find(uni_char);

924 uni_char_iter != characters.end(); ++uni_char_iter) {	873 char_word_map_iter->second.erase(word_id);

925 base::char16 uni_char = *uni_char_iter;	874 if (char_word_map_iter->second.empty())

926 char_word_map_[uni_char].erase(word_id);	875 char_word_map_.erase(char_word_map_iter);

927 if (char_word_map_[uni_char].empty())

928 char_word_map_.erase(uni_char); // No longer in use.

929 }	876 }

930	877

931 // Complete the removal of references to the word.	878 // Complete the removal of references to the word.

932 word_id_history_map_.erase(word_id);	879 word_id_history_map_.erase(word_id_history_map_iter);

933 word_map_.erase(word);	880 word_map_.erase(word);

934 word_list_[word_id] = base::string16();	881 word_list_[word_id] = base::string16();

935 available_words_.insert(word_id);	882 available_words_.push(word_id);

936 }	883 }

937 }	884 }

938	885

939 void URLIndexPrivateData::ResetSearchTermCache() {	886 void URLIndexPrivateData::ResetSearchTermCache() {

940 for (SearchTermCacheMap::iterator iter = search_term_cache_.begin();	887 for (auto& item : search_term_cache_)

941 iter != search_term_cache_.end(); ++iter)	888 item.second.used_ = false;

942 iter->second.used_ = false;

943 }	889 }

944	890

945 bool URLIndexPrivateData::SaveToFile(const base::FilePath& file_path) {	891 bool URLIndexPrivateData::SaveToFile(const base::FilePath& file_path) {

946 base::TimeTicks beginning_time = base::TimeTicks::Now();	892 base::TimeTicks beginning_time = base::TimeTicks::Now();

947 InMemoryURLIndexCacheItem index_cache;	893 InMemoryURLIndexCacheItem index_cache;

948 SavePrivateData(&index_cache);	894 SavePrivateData(&index_cache);

949 std::string data;	895 std::string data;

950 if (!index_cache.SerializeToString(&data)) {	896 if (!index_cache.SerializeToString(&data)) {

951 LOG(WARNING) << "Failed to serialize the InMemoryURLIndex cache.";	897 LOG(WARNING) << "Failed to serialize the InMemoryURLIndex cache.";

952 return false;	898 return false;

(...skipping 24 matching lines...) Expand all Loading...
977 SaveWordIDHistoryMap(cache);	923 SaveWordIDHistoryMap(cache);

978 SaveHistoryInfoMap(cache);	924 SaveHistoryInfoMap(cache);

979 SaveWordStartsMap(cache);	925 SaveWordStartsMap(cache);

980 }	926 }

981	927

982 void URLIndexPrivateData::SaveWordList(InMemoryURLIndexCacheItem* cache) const {	928 void URLIndexPrivateData::SaveWordList(InMemoryURLIndexCacheItem* cache) const {

983 if (word_list_.empty())	929 if (word_list_.empty())

984 return;	930 return;

985 WordListItem* list_item = cache->mutable_word_list();	931 WordListItem* list_item = cache->mutable_word_list();

986 list_item->set_word_count(word_list_.size());	932 list_item->set_word_count(word_list_.size());

987 for (String16Vector::const_iterator iter = word_list_.begin();	933 for (const base::string16& word : word_list_)

988 iter != word_list_.end(); ++iter)	934 list_item->add_word(base::UTF16ToUTF8(word));

989 list_item->add_word(base::UTF16ToUTF8(*iter));

990 }	935 }

991	936

992 void URLIndexPrivateData::SaveWordMap(InMemoryURLIndexCacheItem* cache) const {	937 void URLIndexPrivateData::SaveWordMap(InMemoryURLIndexCacheItem* cache) const {

993 if (word_map_.empty())	938 if (word_map_.empty())

994 return;	939 return;

995 WordMapItem* map_item = cache->mutable_word_map();	940 WordMapItem* map_item = cache->mutable_word_map();

996 map_item->set_item_count(word_map_.size());	941 map_item->set_item_count(word_map_.size());

997 for (WordMap::const_iterator iter = word_map_.begin();	942 for (const auto& elem : word_map_) {

998 iter != word_map_.end(); ++iter) {

999 WordMapEntry* map_entry = map_item->add_word_map_entry();	943 WordMapEntry* map_entry = map_item->add_word_map_entry();

1000 map_entry->set_word(base::UTF16ToUTF8(iter->first));	944 map_entry->set_word(base::UTF16ToUTF8(elem.first));

1001 map_entry->set_word_id(iter->second);	945 map_entry->set_word_id(elem.second);

1002 }	946 }

1003 }	947 }

1004	948

1005 void URLIndexPrivateData::SaveCharWordMap(	949 void URLIndexPrivateData::SaveCharWordMap(

1006 InMemoryURLIndexCacheItem* cache) const {	950 InMemoryURLIndexCacheItem* cache) const {

1007 if (char_word_map_.empty())	951 if (char_word_map_.empty())

1008 return;	952 return;

1009 CharWordMapItem* map_item = cache->mutable_char_word_map();	953 CharWordMapItem* map_item = cache->mutable_char_word_map();

1010 map_item->set_item_count(char_word_map_.size());	954 map_item->set_item_count(char_word_map_.size());

1011 for (CharWordIDMap::const_iterator iter = char_word_map_.begin();	955 for (const auto& entry : char_word_map_) {

1012 iter != char_word_map_.end(); ++iter) {

1013 CharWordMapEntry* map_entry = map_item->add_char_word_map_entry();	956 CharWordMapEntry* map_entry = map_item->add_char_word_map_entry();

1014 map_entry->set_char_16(iter->first);	957 map_entry->set_char_16(entry.first);

1015 const WordIDSet& word_id_set(iter->second);	958 const WordIDSet& word_id_set = entry.second;

1016 map_entry->set_item_count(word_id_set.size());	959 map_entry->set_item_count(word_id_set.size());

1017 for (WordIDSet::const_iterator set_iter = word_id_set.begin();	960 for (WordID word_id : word_id_set)

1018 set_iter != word_id_set.end(); ++set_iter)	961 map_entry->add_word_id(word_id);

1019 map_entry->add_word_id(*set_iter);

1020 }	962 }

1021 }	963 }

1022	964

1023 void URLIndexPrivateData::SaveWordIDHistoryMap(	965 void URLIndexPrivateData::SaveWordIDHistoryMap(

1024 InMemoryURLIndexCacheItem* cache) const {	966 InMemoryURLIndexCacheItem* cache) const {

1025 if (word_id_history_map_.empty())	967 if (word_id_history_map_.empty())

1026 return;	968 return;

1027 WordIDHistoryMapItem* map_item = cache->mutable_word_id_history_map();	969 WordIDHistoryMapItem* map_item = cache->mutable_word_id_history_map();

1028 map_item->set_item_count(word_id_history_map_.size());	970 map_item->set_item_count(word_id_history_map_.size());

1029 for (WordIDHistoryMap::const_iterator iter = word_id_history_map_.begin();	971 for (const auto& entry : word_id_history_map_) {

1030 iter != word_id_history_map_.end(); ++iter) {

1031 WordIDHistoryMapEntry* map_entry =	972 WordIDHistoryMapEntry* map_entry =

1032 map_item->add_word_id_history_map_entry();	973 map_item->add_word_id_history_map_entry();

1033 map_entry->set_word_id(iter->first);	974 map_entry->set_word_id(entry.first);

1034 const HistoryIDSet& history_id_set(iter->second);	975 const HistoryIDSet& history_id_set = entry.second;

1035 map_entry->set_item_count(history_id_set.size());	976 map_entry->set_item_count(history_id_set.size());

1036 for (HistoryIDSet::const_iterator set_iter = history_id_set.begin();	977 for (HistoryID history_id : history_id_set)

1037 set_iter != history_id_set.end(); ++set_iter)	978 map_entry->add_history_id(history_id);

1038 map_entry->add_history_id(*set_iter);

1039 }	979 }

1040 }	980 }

1041	981

1042 void URLIndexPrivateData::SaveHistoryInfoMap(	982 void URLIndexPrivateData::SaveHistoryInfoMap(

1043 InMemoryURLIndexCacheItem* cache) const {	983 InMemoryURLIndexCacheItem* cache) const {

1044 if (history_info_map_.empty())	984 if (history_info_map_.empty())

1045 return;	985 return;

1046 HistoryInfoMapItem* map_item = cache->mutable_history_info_map();	986 HistoryInfoMapItem* map_item = cache->mutable_history_info_map();

1047 map_item->set_item_count(history_info_map_.size());	987 map_item->set_item_count(history_info_map_.size());

1048 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin();	988 for (const auto& entry : history_info_map_) {

1049 iter != history_info_map_.end(); ++iter) {

1050 HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry();	989 HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry();

1051 map_entry->set_history_id(iter->first);	990 map_entry->set_history_id(entry.first);

1052 const history::URLRow& url_row(iter->second.url_row);	991 const history::URLRow& url_row = entry.second.url_row;

1053 // Note: We only save information that contributes to the index so there	992 // Note: We only save information that contributes to the index so there

1054 // is no need to save search_term_cache_ (not persistent).	993 // is no need to save search_term_cache_ (not persistent).

1055 map_entry->set_visit_count(url_row.visit_count());	994 map_entry->set_visit_count(url_row.visit_count());

1056 map_entry->set_typed_count(url_row.typed_count());	995 map_entry->set_typed_count(url_row.typed_count());

1057 map_entry->set_last_visit(url_row.last_visit().ToInternalValue());	996 map_entry->set_last_visit(url_row.last_visit().ToInternalValue());

1058 map_entry->set_url(url_row.url().spec());	997 map_entry->set_url(url_row.url().spec());

1059 map_entry->set_title(base::UTF16ToUTF8(url_row.title()));	998 map_entry->set_title(base::UTF16ToUTF8(url_row.title()));

1060 const VisitInfoVector& visits(iter->second.visits);	999 for (const auto& visit : entry.second.visits) {

1061 for (VisitInfoVector::const_iterator visit_iter = visits.begin();

1062 visit_iter != visits.end(); ++visit_iter) {

1063 HistoryInfoMapEntry_VisitInfo* visit_info = map_entry->add_visits();	1000 HistoryInfoMapEntry_VisitInfo* visit_info = map_entry->add_visits();

1064 visit_info->set_visit_time(visit_iter->first.ToInternalValue());	1001 visit_info->set_visit_time(visit.first.ToInternalValue());

1065 visit_info->set_transition_type(visit_iter->second);	1002 visit_info->set_transition_type(visit.second);

1066 }	1003 }

1067 }	1004 }

1068 }	1005 }

1069	1006

1070 void URLIndexPrivateData::SaveWordStartsMap(	1007 void URLIndexPrivateData::SaveWordStartsMap(

1071 InMemoryURLIndexCacheItem* cache) const {	1008 InMemoryURLIndexCacheItem* cache) const {

1072 if (word_starts_map_.empty())	1009 if (word_starts_map_.empty())

1073 return;	1010 return;

1074 // For unit testing: Enable saving of the cache as an earlier version to	1011 // For unit testing: Enable saving of the cache as an earlier version to

1075 // allow testing of cache file upgrading in ReadFromFile().	1012 // allow testing of cache file upgrading in ReadFromFile().

1076 // TODO(mrossetti): Instead of intruding on production code with this kind of	1013 // TODO(mrossetti): Instead of intruding on production code with this kind of

1077 // test harness, save a copy of an older version cache with known results.	1014 // test harness, save a copy of an older version cache with known results.

1078 // Implement this when switching the caching over to SQLite.	1015 // Implement this when switching the caching over to SQLite.

1079 if (saved_cache_version_ < 1)	1016 if (saved_cache_version_ < 1)

1080 return;	1017 return;

1081	1018

1082 WordStartsMapItem* map_item = cache->mutable_word_starts_map();	1019 WordStartsMapItem* map_item = cache->mutable_word_starts_map();

1083 map_item->set_item_count(word_starts_map_.size());	1020 map_item->set_item_count(word_starts_map_.size());

1084 for (WordStartsMap::const_iterator iter = word_starts_map_.begin();	1021 for (const auto& entry : word_starts_map_) {

1085 iter != word_starts_map_.end(); ++iter) {

1086 WordStartsMapEntry* map_entry = map_item->add_word_starts_map_entry();	1022 WordStartsMapEntry* map_entry = map_item->add_word_starts_map_entry();

1087 map_entry->set_history_id(iter->first);	1023 map_entry->set_history_id(entry.first);

1088 const RowWordStarts& word_starts(iter->second);	1024 const RowWordStarts& word_starts = entry.second;

1089 for (WordStarts::const_iterator i = word_starts.url_word_starts_.begin();	1025 for (auto url_word_start : word_starts.url_word_starts_)

1090 i != word_starts.url_word_starts_.end(); ++i)	1026 map_entry->add_url_word_starts(url_word_start);

1091 map_entry->add_url_word_starts(*i);	1027 for (auto title_word_start : word_starts.title_word_starts_)

1092 for (WordStarts::const_iterator i = word_starts.title_word_starts_.begin();	1028 map_entry->add_title_word_starts(title_word_start);

1093 i != word_starts.title_word_starts_.end(); ++i)

1094 map_entry->add_title_word_starts(*i);

1095 }	1029 }

1096 }	1030 }

1097	1031

1098 bool URLIndexPrivateData::RestorePrivateData(	1032 bool URLIndexPrivateData::RestorePrivateData(

1099 const InMemoryURLIndexCacheItem& cache) {	1033 const InMemoryURLIndexCacheItem& cache) {

1100 last_time_rebuilt_from_history_ =	1034 last_time_rebuilt_from_history_ =

1101 base::Time::FromInternalValue(cache.last_rebuild_timestamp());	1035 base::Time::FromInternalValue(cache.last_rebuild_timestamp());

1102 const base::TimeDelta rebuilt_ago =	1036 const base::TimeDelta rebuilt_ago =

1103 base::Time::Now() - last_time_rebuilt_from_history_;	1037 base::Time::Now() - last_time_rebuilt_from_history_;

1104 if ((rebuilt_ago > base::TimeDelta::FromDays(7)) \|\|	1038 if ((rebuilt_ago > base::TimeDelta::FromDays(7)) \|\|

(...skipping 21 matching lines...) Expand all Loading...
1126	1060

1127 bool URLIndexPrivateData::RestoreWordList(	1061 bool URLIndexPrivateData::RestoreWordList(

1128 const InMemoryURLIndexCacheItem& cache) {	1062 const InMemoryURLIndexCacheItem& cache) {

1129 if (!cache.has_word_list())	1063 if (!cache.has_word_list())

1130 return false;	1064 return false;

1131 const WordListItem& list_item(cache.word_list());	1065 const WordListItem& list_item(cache.word_list());

1132 uint32_t expected_item_count = list_item.word_count();	1066 uint32_t expected_item_count = list_item.word_count();

1133 uint32_t actual_item_count = list_item.word_size();	1067 uint32_t actual_item_count = list_item.word_size();

1134 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)	1068 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)

1135 return false;	1069 return false;

1136 const RepeatedPtrField<std::string>& words(list_item.word());	1070 const RepeatedPtrField<std::string>& words = list_item.word();

1137 for (RepeatedPtrField<std::string>::const_iterator iter = words.begin();	1071 word_list_.reserve(words.size());

1138 iter != words.end(); ++iter)	1072 std::transform(

1139 word_list_.push_back(base::UTF8ToUTF16(*iter));	1073 words.begin(), words.end(), std::back_inserter(word_list_),

	1074 [](const std::string& word) { return base::UTF8ToUTF16(word); });

1140 return true;	1075 return true;

1141 }	1076 }

1142	1077

1143 bool URLIndexPrivateData::RestoreWordMap(	1078 bool URLIndexPrivateData::RestoreWordMap(

1144 const InMemoryURLIndexCacheItem& cache) {	1079 const InMemoryURLIndexCacheItem& cache) {

1145 if (!cache.has_word_map())	1080 if (!cache.has_word_map())

1146 return false;	1081 return false;

1147 const WordMapItem& list_item(cache.word_map());	1082 const WordMapItem& list_item = cache.word_map();

1148 uint32_t expected_item_count = list_item.item_count();	1083 uint32_t expected_item_count = list_item.item_count();

1149 uint32_t actual_item_count = list_item.word_map_entry_size();	1084 uint32_t actual_item_count = list_item.word_map_entry_size();

1150 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)	1085 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)

1151 return false;	1086 return false;

1152 const RepeatedPtrField<WordMapEntry>& entries(list_item.word_map_entry());	1087 for (const auto& entry : list_item.word_map_entry())

1153 for (RepeatedPtrField<WordMapEntry>::const_iterator iter = entries.begin();	1088 word_map_[base::UTF8ToUTF16(entry.word())] = entry.word_id();

1154 iter != entries.end(); ++iter)	1089

1155 word_map_[base::UTF8ToUTF16(iter->word())] = iter->word_id();

1156 return true;	1090 return true;

1157 }	1091 }

1158	1092

1159 bool URLIndexPrivateData::RestoreCharWordMap(	1093 bool URLIndexPrivateData::RestoreCharWordMap(

1160 const InMemoryURLIndexCacheItem& cache) {	1094 const InMemoryURLIndexCacheItem& cache) {

1161 if (!cache.has_char_word_map())	1095 if (!cache.has_char_word_map())

1162 return false;	1096 return false;

1163 const CharWordMapItem& list_item(cache.char_word_map());	1097 const CharWordMapItem& list_item(cache.char_word_map());

1164 uint32_t expected_item_count = list_item.item_count();	1098 uint32_t expected_item_count = list_item.item_count();

1165 uint32_t actual_item_count = list_item.char_word_map_entry_size();	1099 uint32_t actual_item_count = list_item.char_word_map_entry_size();

1166 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)	1100 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)

1167 return false;	1101 return false;

1168 const RepeatedPtrField<CharWordMapEntry>&	1102

1169 entries(list_item.char_word_map_entry());	1103 for (const auto& entry : list_item.char_word_map_entry()) {

1170 for (RepeatedPtrField<CharWordMapEntry>::const_iterator iter =	1104 expected_item_count = entry.item_count();

1171 entries.begin(); iter != entries.end(); ++iter) {	1105 actual_item_count = entry.word_id_size();

1172 expected_item_count = iter->item_count();

1173 actual_item_count = iter->word_id_size();

1174 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)	1106 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)

1175 return false;	1107 return false;

1176 base::char16 uni_char = static_cast<base::char16>(iter->char_16());	1108 base::char16 uni_char = static_cast<base::char16>(entry.char_16());

1177 WordIDSet word_id_set;	1109 const RepeatedField<int32_t>& word_ids = entry.word_id();

1178 const RepeatedField<int32_t>& word_ids(iter->word_id());	1110 char_word_map_[uni_char] = {word_ids.begin(), word_ids.end()};

1179 for (RepeatedField<int32_t>::const_iterator jiter = word_ids.begin();

1180 jiter != word_ids.end(); ++jiter)

1181 word_id_set.insert(*jiter);

1182 char_word_map_[uni_char] = word_id_set;

1183 }	1111 }

1184 return true;	1112 return true;

1185 }	1113 }

1186	1114

1187 bool URLIndexPrivateData::RestoreWordIDHistoryMap(	1115 bool URLIndexPrivateData::RestoreWordIDHistoryMap(

1188 const InMemoryURLIndexCacheItem& cache) {	1116 const InMemoryURLIndexCacheItem& cache) {

1189 if (!cache.has_word_id_history_map())	1117 if (!cache.has_word_id_history_map())

1190 return false;	1118 return false;

1191 const WordIDHistoryMapItem& list_item(cache.word_id_history_map());	1119 const WordIDHistoryMapItem& list_item(cache.word_id_history_map());

1192 uint32_t expected_item_count = list_item.item_count();	1120 uint32_t expected_item_count = list_item.item_count();

1193 uint32_t actual_item_count = list_item.word_id_history_map_entry_size();	1121 uint32_t actual_item_count = list_item.word_id_history_map_entry_size();

1194 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)	1122 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)

1195 return false;	1123 return false;

1196 const RepeatedPtrField<WordIDHistoryMapEntry>&	1124 for (const auto& entry : list_item.word_id_history_map_entry()) {

1197 entries(list_item.word_id_history_map_entry());	1125 expected_item_count = entry.item_count();

1198 for (RepeatedPtrField<WordIDHistoryMapEntry>::const_iterator iter =	1126 actual_item_count = entry.history_id_size();

1199 entries.begin(); iter != entries.end(); ++iter) {

1200 expected_item_count = iter->item_count();

1201 actual_item_count = iter->history_id_size();

1202 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)	1127 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)

1203 return false;	1128 return false;

1204 WordID word_id = iter->word_id();	1129 WordID word_id = entry.word_id();

1205 HistoryIDSet history_id_set;	1130 const RepeatedField<int64_t>& history_ids = entry.history_id();

1206 const RepeatedField<int64_t>& history_ids(iter->history_id());	1131 word_id_history_map_[word_id] = {history_ids.begin(), history_ids.end()};

1207 for (RepeatedField<int64_t>::const_iterator jiter = history_ids.begin();	1132 for (HistoryID history_id : history_ids)

1208 jiter != history_ids.end(); ++jiter) {	1133 history_id_word_map_[history_id].insert(word_id);

1209 history_id_set.insert(*jiter);

1210 AddToHistoryIDWordMap(*jiter, word_id);

1211 }

1212 word_id_history_map_[word_id] = history_id_set;

1213 }	1134 }

1214 return true;	1135 return true;

1215 }	1136 }

1216	1137

1217 bool URLIndexPrivateData::RestoreHistoryInfoMap(	1138 bool URLIndexPrivateData::RestoreHistoryInfoMap(

1218 const InMemoryURLIndexCacheItem& cache) {	1139 const InMemoryURLIndexCacheItem& cache) {

1219 if (!cache.has_history_info_map())	1140 if (!cache.has_history_info_map())

1220 return false;	1141 return false;

1221 const HistoryInfoMapItem& list_item(cache.history_info_map());	1142 const HistoryInfoMapItem& list_item(cache.history_info_map());

1222 uint32_t expected_item_count = list_item.item_count();	1143 uint32_t expected_item_count = list_item.item_count();

1223 uint32_t actual_item_count = list_item.history_info_map_entry_size();	1144 uint32_t actual_item_count = list_item.history_info_map_entry_size();

1224 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)	1145 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)

1225 return false;	1146 return false;

1226 const RepeatedPtrField<HistoryInfoMapEntry>&	1147

1227 entries(list_item.history_info_map_entry());	1148 for (const auto& entry : list_item.history_info_map_entry()) {

1228 for (RepeatedPtrField<HistoryInfoMapEntry>::const_iterator iter =	1149 HistoryID history_id = entry.history_id();

1229 entries.begin(); iter != entries.end(); ++iter) {	1150 history::URLRow url_row(GURL(entry.url()), history_id);

1230 HistoryID history_id = iter->history_id();	1151 url_row.set_visit_count(entry.visit_count());

1231 GURL url(iter->url());	1152 url_row.set_typed_count(entry.typed_count());

1232 history::URLRow url_row(url, history_id);	1153 url_row.set_last_visit(base::Time::FromInternalValue(entry.last_visit()));

1233 url_row.set_visit_count(iter->visit_count());	1154 if (entry.has_title())

1234 url_row.set_typed_count(iter->typed_count());	1155 url_row.set_title(base::UTF8ToUTF16(entry.title()));

1235 url_row.set_last_visit(base::Time::FromInternalValue(iter->last_visit()));	1156 history_info_map_[history_id].url_row = std::move(url_row);

1236 if (iter->has_title()) {

1237 base::string16 title(base::UTF8ToUTF16(iter->title()));

1238 url_row.set_title(title);

1239 }

1240 history_info_map_[history_id].url_row = url_row;

1241	1157

1242 // Restore visits list.	1158 // Restore visits list.

1243 VisitInfoVector visits;	1159 VisitInfoVector visits;

1244 visits.reserve(iter->visits_size());	1160 visits.reserve(entry.visits_size());

1245 for (int i = 0; i < iter->visits_size(); ++i) {	1161 for (const auto& entry_visit : entry.visits()) {

1246 visits.push_back(std::make_pair(	1162 visits.emplace_back(

1247 base::Time::FromInternalValue(iter->visits(i).visit_time()),	1163 base::Time::FromInternalValue(entry_visit.visit_time()),

1248 ui::PageTransitionFromInt(iter->visits(i).transition_type())));	1164 ui::PageTransitionFromInt(entry_visit.transition_type()));

1249 }	1165 }

1250 history_info_map_[history_id].visits = visits;	1166 history_info_map_[history_id].visits = std::move(visits);

1251 }	1167 }

1252 return true;	1168 return true;

1253 }	1169 }

1254	1170

1255 bool URLIndexPrivateData::RestoreWordStartsMap(	1171 bool URLIndexPrivateData::RestoreWordStartsMap(

1256 const InMemoryURLIndexCacheItem& cache) {	1172 const InMemoryURLIndexCacheItem& cache) {

1257 // Note that this function must be called after RestoreHistoryInfoMap() has	1173 // Note that this function must be called after RestoreHistoryInfoMap() has

1258 // been run as the word starts may have to be recalculated from the urls and	1174 // been run as the word starts may have to be recalculated from the urls and

1259 // page titles.	1175 // page titles.

1260 if (cache.has_word_starts_map()) {	1176 if (cache.has_word_starts_map()) {

1261 const WordStartsMapItem& list_item(cache.word_starts_map());	1177 const WordStartsMapItem& list_item(cache.word_starts_map());

1262 uint32_t expected_item_count = list_item.item_count();	1178 uint32_t expected_item_count = list_item.item_count();

1263 uint32_t actual_item_count = list_item.word_starts_map_entry_size();	1179 uint32_t actual_item_count = list_item.word_starts_map_entry_size();

1264 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)	1180 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)

1265 return false;	1181 return false;

1266 const RepeatedPtrField<WordStartsMapEntry>&	1182 for (const auto& entry : list_item.word_starts_map_entry()) {

1267 entries(list_item.word_starts_map_entry());	1183 HistoryID history_id = entry.history_id();

1268 for (RepeatedPtrField<WordStartsMapEntry>::const_iterator iter =

1269 entries.begin(); iter != entries.end(); ++iter) {

1270 HistoryID history_id = iter->history_id();

1271 RowWordStarts word_starts;	1184 RowWordStarts word_starts;

1272 // Restore the URL word starts.	1185 // Restore the URL word starts.

1273 const RepeatedField<int32_t>& url_starts(iter->url_word_starts());	1186 const RepeatedField<int32_t>& url_starts = entry.url_word_starts();

1274 for (RepeatedField<int32_t>::const_iterator jiter = url_starts.begin();	1187 word_starts.url_word_starts_ = {url_starts.begin(), url_starts.end()};

1275 jiter != url_starts.end(); ++jiter)	1188

1276 word_starts.url_word_starts_.push_back(*jiter);

1277 // Restore the page title word starts.	1189 // Restore the page title word starts.

1278 const RepeatedField<int32_t>& title_starts(iter->title_word_starts());	1190 const RepeatedField<int32_t>& title_starts = entry.title_word_starts();

1279 for (RepeatedField<int32_t>::const_iterator jiter = title_starts.begin();	1191 word_starts.title_word_starts_ = {title_starts.begin(),

1280 jiter != title_starts.end(); ++jiter)	1192 title_starts.end()};

1281 word_starts.title_word_starts_.push_back(*jiter);	1193

1282 word_starts_map_[history_id] = word_starts;	1194 word_starts_map_[history_id] = std::move(word_starts);

1283 }	1195 }

1284 } else {	1196 } else {

1285 // Since the cache did not contain any word starts we must rebuild then from	1197 // Since the cache did not contain any word starts we must rebuild then from

1286 // the URL and page titles.	1198 // the URL and page titles.

1287 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin();	1199 for (const auto& entry : history_info_map_) {

1288 iter != history_info_map_.end(); ++iter) {

1289 RowWordStarts word_starts;	1200 RowWordStarts word_starts;

1290 const history::URLRow& row(iter->second.url_row);	1201 const history::URLRow& row = entry.second.url_row;

1291 const base::string16& url =	1202 const base::string16& url =

1292 bookmarks::CleanUpUrlForMatching(row.url(), nullptr);	1203 bookmarks::CleanUpUrlForMatching(row.url(), nullptr);

1293 String16VectorFromString16(url, false, &word_starts.url_word_starts_);	1204 String16VectorFromString16(url, false, &word_starts.url_word_starts_);

1294 const base::string16& title =	1205 const base::string16& title =

1295 bookmarks::CleanUpTitleForMatching(row.title());	1206 bookmarks::CleanUpTitleForMatching(row.title());

1296 String16VectorFromString16(title, false, &word_starts.title_word_starts_);	1207 String16VectorFromString16(title, false, &word_starts.title_word_starts_);

1297 word_starts_map_[iter->first] = word_starts;	1208 word_starts_map_[entry.first] = std::move(word_starts);

1298 }	1209 }

1299 }	1210 }

1300 return true;	1211 return true;

1301 }	1212 }

1302	1213

1303 // static	1214 // static

1304 bool URLIndexPrivateData::URLSchemeIsWhitelisted(	1215 bool URLIndexPrivateData::URLSchemeIsWhitelisted(

1305 const GURL& gurl,	1216 const GURL& gurl,

1306 const std::set<std::string>& whitelist) {	1217 const std::set<std::string>& whitelist) {

1307 return whitelist.find(gurl.scheme()) != whitelist.end();	1218 return whitelist.find(gurl.scheme()) != whitelist.end();

(...skipping 62 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1370 // First cut: typed count, visit count, recency.	1281 // First cut: typed count, visit count, recency.

1371 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks	1282 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks

1372 // recently visited (within the last 12/24 hours) as highly important. Get	1283 // recently visited (within the last 12/24 hours) as highly important. Get

1373 // input from mpearson.	1284 // input from mpearson.

1374 if (r1.typed_count() != r2.typed_count())	1285 if (r1.typed_count() != r2.typed_count())

1375 return (r1.typed_count() > r2.typed_count());	1286 return (r1.typed_count() > r2.typed_count());

1376 if (r1.visit_count() != r2.visit_count())	1287 if (r1.visit_count() != r2.visit_count())

1377 return (r1.visit_count() > r2.visit_count());	1288 return (r1.visit_count() > r2.visit_count());

1378 return (r1.last_visit() > r2.last_visit());	1289 return (r1.last_visit() > r2.last_visit());

1379 }	1290 }

OLD	NEW

« no previous file with comments | « components/omnibox/browser/url_index_private_data.h ('k') | no next file » | no next file with comments »