Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "components/omnibox/browser/scored_history_match.h" | 5 #include "components/omnibox/browser/scored_history_match.h" |
| 6 | 6 |
| 7 #include <math.h> | 7 #include <math.h> |
| 8 | 8 |
| 9 #include <algorithm> | 9 #include <algorithm> |
| 10 #include <vector> | 10 #include <vector> |
| (...skipping 91 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 102 } | 102 } |
| 103 | 103 |
| 104 } // namespace | 104 } // namespace |
| 105 | 105 |
| 106 // static | 106 // static |
| 107 bool ScoredHistoryMatch::also_do_hup_like_scoring_; | 107 bool ScoredHistoryMatch::also_do_hup_like_scoring_; |
| 108 float ScoredHistoryMatch::bookmark_value_; | 108 float ScoredHistoryMatch::bookmark_value_; |
| 109 float ScoredHistoryMatch::typed_value_; | 109 float ScoredHistoryMatch::typed_value_; |
| 110 bool ScoredHistoryMatch::fix_few_visits_bug_; | 110 bool ScoredHistoryMatch::fix_few_visits_bug_; |
| 111 bool ScoredHistoryMatch::frequency_uses_sum_; | 111 bool ScoredHistoryMatch::frequency_uses_sum_; |
| 112 OmniboxFieldTrial::NumMatchesMultipliers* | |
| 113 ScoredHistoryMatch::num_matches_to_frequency_multiplier_ = nullptr; | |
| 112 size_t ScoredHistoryMatch::max_visits_to_score_; | 114 size_t ScoredHistoryMatch::max_visits_to_score_; |
| 113 bool ScoredHistoryMatch::allow_tld_matches_; | 115 bool ScoredHistoryMatch::allow_tld_matches_; |
| 114 bool ScoredHistoryMatch::allow_scheme_matches_; | 116 bool ScoredHistoryMatch::allow_scheme_matches_; |
| 115 size_t ScoredHistoryMatch::num_title_words_to_allow_; | 117 size_t ScoredHistoryMatch::num_title_words_to_allow_; |
| 116 bool ScoredHistoryMatch::hqp_experimental_scoring_enabled_; | 118 bool ScoredHistoryMatch::hqp_experimental_scoring_enabled_; |
| 117 | 119 |
| 118 // Default topicality threshold. See GetTopicalityScore() for details. | 120 // Default topicality threshold. See GetTopicalityScore() for details. |
| 119 float ScoredHistoryMatch::topicality_threshold_ = 0.8f; | 121 float ScoredHistoryMatch::topicality_threshold_ = 0.8f; |
| 120 | 122 |
| 121 // Default HQP relevance buckets. See GetFinalRelevancyScore() for more details | 123 // Default HQP relevance buckets. See GetFinalRelevancyScore() for more details |
| 122 // on these numbers. | 124 // on these numbers. |
| 123 char ScoredHistoryMatch::hqp_relevance_buckets_str_[] = | 125 char ScoredHistoryMatch::hqp_relevance_buckets_str_[] = |
| 124 "0.0:400,1.5:600,5.0:900,10.5:1203,15.0:1300,20.0:1399"; | 126 "0.0:400,1.5:600,5.0:900,10.5:1203,15.0:1300,20.0:1399"; |
| 125 std::vector<ScoredHistoryMatch::ScoreMaxRelevance>* | 127 std::vector<ScoredHistoryMatch::ScoreMaxRelevance>* |
| 126 ScoredHistoryMatch::hqp_relevance_buckets_ = nullptr; | 128 ScoredHistoryMatch::hqp_relevance_buckets_ = nullptr; |
| 127 | 129 |
| 128 ScoredHistoryMatch::ScoredHistoryMatch() | 130 ScoredHistoryMatch::ScoredHistoryMatch() |
| 129 : ScoredHistoryMatch(history::URLRow(), | 131 : ScoredHistoryMatch(history::URLRow(), |
| 130 VisitInfoVector(), | 132 VisitInfoVector(), |
| 131 base::string16(), | 133 base::string16(), |
| 132 String16Vector(), | 134 String16Vector(), |
| 133 WordStarts(), | 135 WordStarts(), |
| 134 RowWordStarts(), | 136 RowWordStarts(), |
| 135 false, | 137 false, |
| 136 base::Time::Max()) { | 138 1, |
| 137 } | 139 base::Time::Max()) {} |
| 138 | 140 |
| 139 ScoredHistoryMatch::ScoredHistoryMatch( | 141 ScoredHistoryMatch::ScoredHistoryMatch( |
| 140 const history::URLRow& row, | 142 const history::URLRow& row, |
| 141 const VisitInfoVector& visits, | 143 const VisitInfoVector& visits, |
| 142 const base::string16& lower_string, | 144 const base::string16& lower_string, |
| 143 const String16Vector& terms_vector, | 145 const String16Vector& terms_vector, |
| 144 const WordStarts& terms_to_word_starts_offsets, | 146 const WordStarts& terms_to_word_starts_offsets, |
| 145 const RowWordStarts& word_starts, | 147 const RowWordStarts& word_starts, |
| 146 bool is_url_bookmarked, | 148 bool is_url_bookmarked, |
| 149 size_t num_matching_pages, | |
| 147 base::Time now) | 150 base::Time now) |
| 148 : HistoryMatch(row, 0, false, false), raw_score(0) { | 151 : HistoryMatch(row, 0, false, false), raw_score(0) { |
| 149 // NOTE: Call Init() before doing any validity checking to ensure that the | 152 // NOTE: Call Init() before doing any validity checking to ensure that the |
| 150 // class is always initialized after an instance has been constructed. In | 153 // class is always initialized after an instance has been constructed. In |
| 151 // particular, this ensures that the class is initialized after an instance | 154 // particular, this ensures that the class is initialized after an instance |
| 152 // has been constructed via the no-args constructor. | 155 // has been constructed via the no-args constructor. |
| 153 ScoredHistoryMatch::Init(); | 156 ScoredHistoryMatch::Init(); |
| 154 | 157 |
| 155 // Figure out where each search term appears in the URL and/or page title | 158 // Figure out where each search term appears in the URL and/or page title |
| 156 // so that we can score as well as provide autocomplete highlighting. | 159 // so that we can score as well as provide autocomplete highlighting. |
| (...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 253 // the prefix that makes it inlineable may be empty. | 256 // the prefix that makes it inlineable may be empty. |
| 254 likely_can_inline = true; | 257 likely_can_inline = true; |
| 255 innermost_match = (best_inlineable_prefix->num_components == | 258 innermost_match = (best_inlineable_prefix->num_components == |
| 256 best_prefix->num_components); | 259 best_prefix->num_components); |
| 257 } | 260 } |
| 258 } | 261 } |
| 259 } | 262 } |
| 260 | 263 |
| 261 const float topicality_score = GetTopicalityScore( | 264 const float topicality_score = GetTopicalityScore( |
| 262 terms_vector.size(), url, terms_to_word_starts_offsets, word_starts); | 265 terms_vector.size(), url, terms_to_word_starts_offsets, word_starts); |
| 263 const float frequency_score = GetFrequency(now, is_url_bookmarked, visits); | 266 const float frequency_score = |
| 267 GetFrequency(now, is_url_bookmarked, visits, num_matching_pages); | |
| 264 raw_score = base::saturated_cast<int>(GetFinalRelevancyScore( | 268 raw_score = base::saturated_cast<int>(GetFinalRelevancyScore( |
| 265 topicality_score, frequency_score, *hqp_relevance_buckets_)); | 269 topicality_score, frequency_score, *hqp_relevance_buckets_)); |
| 266 | 270 |
| 267 if (also_do_hup_like_scoring_ && likely_can_inline) { | 271 if (also_do_hup_like_scoring_ && likely_can_inline) { |
| 268 // HistoryURL-provider-like scoring gives any match that is | 272 // HistoryURL-provider-like scoring gives any match that is |
| 269 // capable of being inlined a certain minimum score. Some of these | 273 // capable of being inlined a certain minimum score. Some of these |
| 270 // are given a higher score that lets them be shown in inline. | 274 // are given a higher score that lets them be shown in inline. |
| 271 // This test here derives from the test in | 275 // This test here derives from the test in |
| 272 // HistoryURLProvider::PromoteMatchForInlineAutocomplete(). | 276 // HistoryURLProvider::PromoteMatchForInlineAutocomplete(). |
| 273 const bool promote_to_inline = | 277 const bool promote_to_inline = |
| (...skipping 131 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 405 if (initialized) | 409 if (initialized) |
| 406 return; | 410 return; |
| 407 | 411 |
| 408 initialized = true; | 412 initialized = true; |
| 409 also_do_hup_like_scoring_ = OmniboxFieldTrial::HQPAlsoDoHUPLikeScoring(); | 413 also_do_hup_like_scoring_ = OmniboxFieldTrial::HQPAlsoDoHUPLikeScoring(); |
| 410 bookmark_value_ = OmniboxFieldTrial::HQPBookmarkValue(); | 414 bookmark_value_ = OmniboxFieldTrial::HQPBookmarkValue(); |
| 411 typed_value_ = OmniboxFieldTrial::HQPTypedValue(); | 415 typed_value_ = OmniboxFieldTrial::HQPTypedValue(); |
| 412 max_visits_to_score_ = OmniboxFieldTrial::HQPMaxVisitsToScore(); | 416 max_visits_to_score_ = OmniboxFieldTrial::HQPMaxVisitsToScore(); |
| 413 frequency_uses_sum_ = OmniboxFieldTrial::HQPFreqencyUsesSum(); | 417 frequency_uses_sum_ = OmniboxFieldTrial::HQPFreqencyUsesSum(); |
| 414 fix_few_visits_bug_ = OmniboxFieldTrial::HQPFixFewVisitsBug(); | 418 fix_few_visits_bug_ = OmniboxFieldTrial::HQPFixFewVisitsBug(); |
| 419 num_matches_to_frequency_multiplier_ = | |
| 420 new OmniboxFieldTrial::NumMatchesMultipliers(); | |
|
Peter Kasting
2016/12/01 07:07:53
This leaks. If you want a leaky object of class t
Mark P
2016/12/04 01:06:42
Still TODO.
Mark P
2016/12/06 21:02:47
I don't understand how this macro should be used i
Peter Kasting
2016/12/06 21:41:27
There are two ways to do this I can think of:
(1)
Mark P
2016/12/08 00:21:31
Thanks for the explanation. I'll do something lik
| |
| 421 OmniboxFieldTrial::HQPGetNumMatchesMultipliers( | |
| 422 num_matches_to_frequency_multiplier_); | |
| 415 allow_tld_matches_ = OmniboxFieldTrial::HQPAllowMatchInTLDValue(); | 423 allow_tld_matches_ = OmniboxFieldTrial::HQPAllowMatchInTLDValue(); |
| 416 allow_scheme_matches_ = OmniboxFieldTrial::HQPAllowMatchInSchemeValue(); | 424 allow_scheme_matches_ = OmniboxFieldTrial::HQPAllowMatchInSchemeValue(); |
| 417 num_title_words_to_allow_ = OmniboxFieldTrial::HQPNumTitleWordsToAllow(); | 425 num_title_words_to_allow_ = OmniboxFieldTrial::HQPNumTitleWordsToAllow(); |
| 418 | 426 |
| 419 InitRawTermScoreToTopicalityScoreArray(); | 427 InitRawTermScoreToTopicalityScoreArray(); |
| 420 InitDaysAgoToRecencyScoreArray(); | 428 InitDaysAgoToRecencyScoreArray(); |
| 421 InitHQPExperimentalParams(); | 429 InitHQPExperimentalParams(); |
| 422 } | 430 } |
| 423 | 431 |
| 424 float ScoredHistoryMatch::GetTopicalityScore( | 432 float ScoredHistoryMatch::GetTopicalityScore( |
| (...skipping 149 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 574 // Lookup the score in days_ago_to_recency_score, treating | 582 // Lookup the score in days_ago_to_recency_score, treating |
| 575 // everything older than what we've precomputed as the oldest thing | 583 // everything older than what we've precomputed as the oldest thing |
| 576 // we've precomputed. The std::max is to protect against corruption | 584 // we've precomputed. The std::max is to protect against corruption |
| 577 // in the database (in case last_visit_days_ago is negative). | 585 // in the database (in case last_visit_days_ago is negative). |
| 578 return days_ago_to_recency_score[std::max( | 586 return days_ago_to_recency_score[std::max( |
| 579 std::min(last_visit_days_ago, kDaysToPrecomputeRecencyScoresFor - 1), 0)]; | 587 std::min(last_visit_days_ago, kDaysToPrecomputeRecencyScoresFor - 1), 0)]; |
| 580 } | 588 } |
| 581 | 589 |
| 582 float ScoredHistoryMatch::GetFrequency(const base::Time& now, | 590 float ScoredHistoryMatch::GetFrequency(const base::Time& now, |
| 583 const bool bookmarked, | 591 const bool bookmarked, |
| 584 const VisitInfoVector& visits) const { | 592 const VisitInfoVector& visits, |
| 593 const size_t num_matching_pages) const { | |
| 585 // Compute the weighted sum of |value_of_transition| over the last at most | 594 // Compute the weighted sum of |value_of_transition| over the last at most |
| 586 // |max_visits_to_score_| visits, where each visit is weighted using | 595 // |max_visits_to_score_| visits, where each visit is weighted using |
| 587 // GetRecencyScore() based on how many days ago it happened. | 596 // GetRecencyScore() based on how many days ago it happened. |
| 588 float summed_visit_points = 0; | 597 float summed_visit_points = 0; |
| 589 auto visits_end = | 598 auto visits_end = |
| 590 visits.begin() + std::min(visits.size(), max_visits_to_score_); | 599 visits.begin() + std::min(visits.size(), max_visits_to_score_); |
| 591 // Visits should be in newest to oldest order. | 600 // Visits should be in newest to oldest order. |
| 592 DCHECK(std::adjacent_find( | 601 DCHECK(std::adjacent_find( |
| 593 visits.begin(), visits_end, | 602 visits.begin(), visits_end, |
| 594 [](const history::VisitInfo& a, const history::VisitInfo& b) { | 603 [](const history::VisitInfo& a, const history::VisitInfo& b) { |
| 595 return a.first < b.first; | 604 return a.first < b.first; |
| 596 }) == visits_end); | 605 }) == visits_end); |
| 597 for (auto i = visits.begin(); i != visits_end; ++i) { | 606 for (auto i = visits.begin(); i != visits_end; ++i) { |
| 598 const bool is_page_transition_typed = | 607 const bool is_page_transition_typed = |
| 599 ui::PageTransitionCoreTypeIs(i->second, ui::PAGE_TRANSITION_TYPED); | 608 ui::PageTransitionCoreTypeIs(i->second, ui::PAGE_TRANSITION_TYPED); |
| 600 float value_of_transition = is_page_transition_typed ? typed_value_ : 1; | 609 float value_of_transition = is_page_transition_typed ? typed_value_ : 1; |
| 601 if (bookmarked) | 610 if (bookmarked) |
| 602 value_of_transition = std::max(value_of_transition, bookmark_value_); | 611 value_of_transition = std::max(value_of_transition, bookmark_value_); |
| 603 const float bucket_weight = GetRecencyScore((now - i->first).InDays()); | 612 const float bucket_weight = GetRecencyScore((now - i->first).InDays()); |
| 604 summed_visit_points += (value_of_transition * bucket_weight); | 613 summed_visit_points += (value_of_transition * bucket_weight); |
| 605 } | 614 } |
| 606 if (frequency_uses_sum_) | 615 float frequency; |
| 607 return summed_visit_points; | 616 if (frequency_uses_sum_) { |
| 608 | 617 frequency = summed_visit_points; |
| 609 // Compute the average weighted value_of_transition and return it. | 618 } else { |
| 610 // Use |max_visits_to_score_| as the denominator for the average regardless of | 619 // Compute the average weighted value_of_transition and return it. |
| 611 // how many visits there were in order to penalize a match that has | 620 // Use |max_visits_to_score_| as the denominator for the average regardless |
| 612 // fewer visits than kMaxVisitsToScore. | 621 // of how many visits there were in order to penalize a match that has |
| 613 if (fix_few_visits_bug_) | 622 // fewer visits than kMaxVisitsToScore. |
| 614 return summed_visit_points / ScoredHistoryMatch::max_visits_to_score_; | 623 if (fix_few_visits_bug_) { |
| 615 return visits.size() * summed_visit_points / | 624 frequency = |
| 616 ScoredHistoryMatch::max_visits_to_score_; | 625 summed_visit_points / ScoredHistoryMatch::max_visits_to_score_; |
| 626 } else { | |
| 627 frequency = visits.size() * summed_visit_points / | |
| 628 ScoredHistoryMatch::max_visits_to_score_; | |
| 629 } | |
| 630 } | |
| 631 // Boost the score if applicable. | |
| 632 if (num_matches_to_frequency_multiplier_->find(num_matching_pages) != | |
| 633 num_matches_to_frequency_multiplier_->end()) | |
| 634 frequency *= (*num_matches_to_frequency_multiplier_)[num_matching_pages]; | |
| 635 return frequency; | |
|
Peter Kasting
2016/12/01 07:07:53
I don't love putting this here.
Part of this is c
Mark P
2016/12/04 01:06:42
I agree with your reasoning below. After some tho
| |
| 617 } | 636 } |
| 618 | 637 |
| 619 // static | 638 // static |
| 620 float ScoredHistoryMatch::GetFinalRelevancyScore( | 639 float ScoredHistoryMatch::GetFinalRelevancyScore( |
| 621 float topicality_score, | 640 float topicality_score, |
| 622 float frequency_score, | 641 float frequency_score, |
| 623 const std::vector<ScoreMaxRelevance>& hqp_relevance_buckets) { | 642 const std::vector<ScoreMaxRelevance>& hqp_relevance_buckets) { |
| 624 DCHECK(hqp_relevance_buckets.size() > 0); | 643 DCHECK(hqp_relevance_buckets.size() > 0); |
| 625 DCHECK_EQ(hqp_relevance_buckets[0].first, 0.0); | 644 DCHECK_EQ(hqp_relevance_buckets[0].first, 0.0); |
| 626 | 645 |
| (...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 721 base::StringToDouble(it->first, &bucket.first); | 740 base::StringToDouble(it->first, &bucket.first); |
| 722 DCHECK(is_valid_intermediate_score); | 741 DCHECK(is_valid_intermediate_score); |
| 723 bool is_valid_hqp_score = base::StringToInt(it->second, &bucket.second); | 742 bool is_valid_hqp_score = base::StringToInt(it->second, &bucket.second); |
| 724 DCHECK(is_valid_hqp_score); | 743 DCHECK(is_valid_hqp_score); |
| 725 hqp_buckets->push_back(bucket); | 744 hqp_buckets->push_back(bucket); |
| 726 } | 745 } |
| 727 return true; | 746 return true; |
| 728 } | 747 } |
| 729 return false; | 748 return false; |
| 730 } | 749 } |
| OLD | NEW |