OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "components/omnibox/browser/scored_history_match.h" | 5 #include "components/omnibox/browser/scored_history_match.h" |
6 | 6 |
7 #include <math.h> | 7 #include <math.h> |
8 | 8 |
9 #include <algorithm> | 9 #include <algorithm> |
10 #include <vector> | 10 #include <vector> |
(...skipping 91 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
102 } | 102 } |
103 | 103 |
104 } // namespace | 104 } // namespace |
105 | 105 |
106 // static | 106 // static |
107 bool ScoredHistoryMatch::also_do_hup_like_scoring_; | 107 bool ScoredHistoryMatch::also_do_hup_like_scoring_; |
108 float ScoredHistoryMatch::bookmark_value_; | 108 float ScoredHistoryMatch::bookmark_value_; |
109 float ScoredHistoryMatch::typed_value_; | 109 float ScoredHistoryMatch::typed_value_; |
110 bool ScoredHistoryMatch::fix_few_visits_bug_; | 110 bool ScoredHistoryMatch::fix_few_visits_bug_; |
111 bool ScoredHistoryMatch::frequency_uses_sum_; | 111 bool ScoredHistoryMatch::frequency_uses_sum_; |
112 OmniboxFieldTrial::NumMatchesScores* | |
113 ScoredHistoryMatch::num_matches_to_document_specificity_score_ = nullptr; | |
112 size_t ScoredHistoryMatch::max_visits_to_score_; | 114 size_t ScoredHistoryMatch::max_visits_to_score_; |
113 bool ScoredHistoryMatch::allow_tld_matches_; | 115 bool ScoredHistoryMatch::allow_tld_matches_; |
114 bool ScoredHistoryMatch::allow_scheme_matches_; | 116 bool ScoredHistoryMatch::allow_scheme_matches_; |
115 size_t ScoredHistoryMatch::num_title_words_to_allow_; | 117 size_t ScoredHistoryMatch::num_title_words_to_allow_; |
116 bool ScoredHistoryMatch::hqp_experimental_scoring_enabled_; | 118 bool ScoredHistoryMatch::hqp_experimental_scoring_enabled_; |
117 | 119 |
118 // Default topicality threshold. See GetTopicalityScore() for details. | 120 // Default topicality threshold. See GetTopicalityScore() for details. |
119 float ScoredHistoryMatch::topicality_threshold_ = 0.8f; | 121 float ScoredHistoryMatch::topicality_threshold_ = 0.8f; |
120 | 122 |
121 // Default HQP relevance buckets. See GetFinalRelevancyScore() for more details | 123 // Default HQP relevance buckets. See GetFinalRelevancyScore() for more details |
122 // on these numbers. | 124 // on these numbers. |
123 char ScoredHistoryMatch::hqp_relevance_buckets_str_[] = | 125 char ScoredHistoryMatch::hqp_relevance_buckets_str_[] = |
124 "0.0:400,1.5:600,5.0:900,10.5:1203,15.0:1300,20.0:1399"; | 126 "0.0:400,1.5:600,5.0:900,10.5:1203,15.0:1300,20.0:1399"; |
125 std::vector<ScoredHistoryMatch::ScoreMaxRelevance>* | 127 std::vector<ScoredHistoryMatch::ScoreMaxRelevance>* |
126 ScoredHistoryMatch::hqp_relevance_buckets_ = nullptr; | 128 ScoredHistoryMatch::hqp_relevance_buckets_ = nullptr; |
127 | 129 |
128 ScoredHistoryMatch::ScoredHistoryMatch() | 130 ScoredHistoryMatch::ScoredHistoryMatch() |
129 : ScoredHistoryMatch(history::URLRow(), | 131 : ScoredHistoryMatch(history::URLRow(), |
130 VisitInfoVector(), | 132 VisitInfoVector(), |
131 base::string16(), | 133 base::string16(), |
132 String16Vector(), | 134 String16Vector(), |
133 WordStarts(), | 135 WordStarts(), |
134 RowWordStarts(), | 136 RowWordStarts(), |
135 false, | 137 false, |
136 base::Time::Max()) { | 138 1, |
137 } | 139 base::Time::Max()) {} |
138 | 140 |
139 ScoredHistoryMatch::ScoredHistoryMatch( | 141 ScoredHistoryMatch::ScoredHistoryMatch( |
140 const history::URLRow& row, | 142 const history::URLRow& row, |
141 const VisitInfoVector& visits, | 143 const VisitInfoVector& visits, |
142 const base::string16& lower_string, | 144 const base::string16& lower_string, |
143 const String16Vector& terms_vector, | 145 const String16Vector& terms_vector, |
144 const WordStarts& terms_to_word_starts_offsets, | 146 const WordStarts& terms_to_word_starts_offsets, |
145 const RowWordStarts& word_starts, | 147 const RowWordStarts& word_starts, |
146 bool is_url_bookmarked, | 148 bool is_url_bookmarked, |
149 size_t num_matching_pages, | |
147 base::Time now) | 150 base::Time now) |
148 : HistoryMatch(row, 0, false, false), raw_score(0) { | 151 : HistoryMatch(row, 0, false, false), raw_score(0) { |
149 // NOTE: Call Init() before doing any validity checking to ensure that the | 152 // NOTE: Call Init() before doing any validity checking to ensure that the |
150 // class is always initialized after an instance has been constructed. In | 153 // class is always initialized after an instance has been constructed. In |
151 // particular, this ensures that the class is initialized after an instance | 154 // particular, this ensures that the class is initialized after an instance |
152 // has been constructed via the no-args constructor. | 155 // has been constructed via the no-args constructor. |
153 ScoredHistoryMatch::Init(); | 156 ScoredHistoryMatch::Init(); |
154 | 157 |
155 // Figure out where each search term appears in the URL and/or page title | 158 // Figure out where each search term appears in the URL and/or page title |
156 // so that we can score as well as provide autocomplete highlighting. | 159 // so that we can score as well as provide autocomplete highlighting. |
(...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
254 likely_can_inline = true; | 257 likely_can_inline = true; |
255 innermost_match = (best_inlineable_prefix->num_components == | 258 innermost_match = (best_inlineable_prefix->num_components == |
256 best_prefix->num_components); | 259 best_prefix->num_components); |
257 } | 260 } |
258 } | 261 } |
259 } | 262 } |
260 | 263 |
261 const float topicality_score = GetTopicalityScore( | 264 const float topicality_score = GetTopicalityScore( |
262 terms_vector.size(), url, terms_to_word_starts_offsets, word_starts); | 265 terms_vector.size(), url, terms_to_word_starts_offsets, word_starts); |
263 const float frequency_score = GetFrequency(now, is_url_bookmarked, visits); | 266 const float frequency_score = GetFrequency(now, is_url_bookmarked, visits); |
264 raw_score = base::saturated_cast<int>(GetFinalRelevancyScore( | 267 const float specificity_score = |
265 topicality_score, frequency_score, *hqp_relevance_buckets_)); | 268 GetDocumentSpecificityScore(num_matching_pages); |
269 raw_score = base::saturated_cast<int>( | |
270 GetFinalRelevancyScore(topicality_score, frequency_score, | |
271 specificity_score, *hqp_relevance_buckets_)); | |
266 | 272 |
267 if (also_do_hup_like_scoring_ && likely_can_inline) { | 273 if (also_do_hup_like_scoring_ && likely_can_inline) { |
268 // HistoryURL-provider-like scoring gives any match that is | 274 // HistoryURL-provider-like scoring gives any match that is |
269 // capable of being inlined a certain minimum score. Some of these | 275 // capable of being inlined a certain minimum score. Some of these |
270 // are given a higher score that lets them be shown in inline. | 276 // are given a higher score that lets them be shown in inline. |
271 // This test here derives from the test in | 277 // This test here derives from the test in |
272 // HistoryURLProvider::PromoteMatchForInlineAutocomplete(). | 278 // HistoryURLProvider::PromoteMatchForInlineAutocomplete(). |
273 const bool promote_to_inline = | 279 const bool promote_to_inline = |
274 (row.typed_count() > 1) || (IsHostOnly() && (row.typed_count() == 1)); | 280 (row.typed_count() > 1) || (IsHostOnly() && (row.typed_count() == 1)); |
275 int hup_like_score = | 281 int hup_like_score = |
(...skipping 129 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
405 if (initialized) | 411 if (initialized) |
406 return; | 412 return; |
407 | 413 |
408 initialized = true; | 414 initialized = true; |
409 also_do_hup_like_scoring_ = OmniboxFieldTrial::HQPAlsoDoHUPLikeScoring(); | 415 also_do_hup_like_scoring_ = OmniboxFieldTrial::HQPAlsoDoHUPLikeScoring(); |
410 bookmark_value_ = OmniboxFieldTrial::HQPBookmarkValue(); | 416 bookmark_value_ = OmniboxFieldTrial::HQPBookmarkValue(); |
411 typed_value_ = OmniboxFieldTrial::HQPTypedValue(); | 417 typed_value_ = OmniboxFieldTrial::HQPTypedValue(); |
412 max_visits_to_score_ = OmniboxFieldTrial::HQPMaxVisitsToScore(); | 418 max_visits_to_score_ = OmniboxFieldTrial::HQPMaxVisitsToScore(); |
413 frequency_uses_sum_ = OmniboxFieldTrial::HQPFreqencyUsesSum(); | 419 frequency_uses_sum_ = OmniboxFieldTrial::HQPFreqencyUsesSum(); |
414 fix_few_visits_bug_ = OmniboxFieldTrial::HQPFixFewVisitsBug(); | 420 fix_few_visits_bug_ = OmniboxFieldTrial::HQPFixFewVisitsBug(); |
421 num_matches_to_document_specificity_score_ = | |
422 new OmniboxFieldTrial::NumMatchesScores(); | |
423 (*num_matches_to_document_specificity_score_) = | |
424 OmniboxFieldTrial::HQPNumMatchesScores(); | |
415 allow_tld_matches_ = OmniboxFieldTrial::HQPAllowMatchInTLDValue(); | 425 allow_tld_matches_ = OmniboxFieldTrial::HQPAllowMatchInTLDValue(); |
416 allow_scheme_matches_ = OmniboxFieldTrial::HQPAllowMatchInSchemeValue(); | 426 allow_scheme_matches_ = OmniboxFieldTrial::HQPAllowMatchInSchemeValue(); |
417 num_title_words_to_allow_ = OmniboxFieldTrial::HQPNumTitleWordsToAllow(); | 427 num_title_words_to_allow_ = OmniboxFieldTrial::HQPNumTitleWordsToAllow(); |
418 | 428 |
419 InitRawTermScoreToTopicalityScoreArray(); | 429 InitRawTermScoreToTopicalityScoreArray(); |
420 InitDaysAgoToRecencyScoreArray(); | 430 InitDaysAgoToRecencyScoreArray(); |
421 InitHQPExperimentalParams(); | 431 InitHQPExperimentalParams(); |
422 } | 432 } |
423 | 433 |
424 float ScoredHistoryMatch::GetTopicalityScore( | 434 float ScoredHistoryMatch::GetTopicalityScore( |
(...skipping 184 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
609 // Compute the average weighted value_of_transition and return it. | 619 // Compute the average weighted value_of_transition and return it. |
610 // Use |max_visits_to_score_| as the denominator for the average regardless of | 620 // Use |max_visits_to_score_| as the denominator for the average regardless of |
611 // how many visits there were in order to penalize a match that has | 621 // how many visits there were in order to penalize a match that has |
612 // fewer visits than kMaxVisitsToScore. | 622 // fewer visits than kMaxVisitsToScore. |
613 if (fix_few_visits_bug_) | 623 if (fix_few_visits_bug_) |
614 return summed_visit_points / ScoredHistoryMatch::max_visits_to_score_; | 624 return summed_visit_points / ScoredHistoryMatch::max_visits_to_score_; |
615 return visits.size() * summed_visit_points / | 625 return visits.size() * summed_visit_points / |
616 ScoredHistoryMatch::max_visits_to_score_; | 626 ScoredHistoryMatch::max_visits_to_score_; |
617 } | 627 } |
618 | 628 |
629 float ScoredHistoryMatch::GetDocumentSpecificityScore( | |
630 const size_t num_matching_pages) const { | |
631 // The floating point value below doesn't matter. | |
Peter Kasting
2016/12/06 05:19:30
I'm not sure that's true.
Since upper_bound() ret
Mark P
2016/12/08 00:21:31
Good point. I never imagined a field trial specif
| |
632 OmniboxFieldTrial::NumMatchesScores::const_iterator it = | |
633 std::upper_bound(num_matches_to_document_specificity_score_->begin(), | |
634 num_matches_to_document_specificity_score_->end(), | |
635 std::pair<size_t, double>{num_matching_pages, 1.0}); | |
636 if (it == num_matches_to_document_specificity_score_->end()) | |
637 return 1.0; | |
638 return it->second; | |
Peter Kasting
2016/12/06 05:19:30
Nit: Could use ?:
Mark P
2016/12/08 00:21:31
Done (after reversing the order, as that read bett
Peter Kasting
2016/12/08 00:51:40
My concern with the order reversal is that it sort
Mark P
2016/12/08 04:37:35
I understand the point, but I find != end to read
| |
639 }; | |
640 | |
619 // static | 641 // static |
620 float ScoredHistoryMatch::GetFinalRelevancyScore( | 642 float ScoredHistoryMatch::GetFinalRelevancyScore( |
621 float topicality_score, | 643 float topicality_score, |
622 float frequency_score, | 644 float frequency_score, |
645 float specificity_score, | |
623 const std::vector<ScoreMaxRelevance>& hqp_relevance_buckets) { | 646 const std::vector<ScoreMaxRelevance>& hqp_relevance_buckets) { |
624 DCHECK(hqp_relevance_buckets.size() > 0); | 647 DCHECK(hqp_relevance_buckets.size() > 0); |
625 DCHECK_EQ(hqp_relevance_buckets[0].first, 0.0); | 648 DCHECK_EQ(hqp_relevance_buckets[0].first, 0.0); |
626 | 649 |
627 if (topicality_score == 0) | 650 if (topicality_score == 0) |
628 return 0; | 651 return 0; |
629 // Here's how to interpret intermediate_score: Suppose the omnibox | 652 // Here's how to interpret intermediate_score: Suppose the omnibox has one |
630 // has one input term. Suppose we have a URL for which the omnibox | 653 // input term. Suppose the input matches many documents. (This implies |
654 // specificity_score == 1.0.) Suppose we have a URL for which the omnibox | |
631 // input term has a single URL hostname hit at a word boundary. (This | 655 // input term has a single URL hostname hit at a word boundary. (This |
632 // implies topicality_score = 1.0.). Then the intermediate_score for | 656 // implies topicality_score = 1.0.). Then the intermediate_score for |
633 // this URL will depend entirely on the frequency_score with | 657 // this URL will depend entirely on the frequency_score with |
634 // this interpretation: | 658 // this interpretation: |
635 // - a single typed visit more than three months ago, no other visits -> 0.2 | 659 // - a single typed visit more than three months ago, no other visits -> 0.2 |
636 // - a visit every three days, no typed visits -> 0.706 | 660 // - a visit every three days, no typed visits -> 0.706 |
637 // - a visit every day, no typed visits -> 0.916 | 661 // - a visit every day, no typed visits -> 0.916 |
638 // - a single typed visit yesterday, no other visits -> 2.0 | 662 // - a single typed visit yesterday, no other visits -> 2.0 |
639 // - a typed visit once a week -> 11.77 | 663 // - a typed visit once a week -> 11.77 |
640 // - a typed visit every three days -> 14.12 | 664 // - a typed visit every three days -> 14.12 |
641 // - at least ten typed visits today -> 20.0 (maximum score) | 665 // - at least ten typed visits today -> 20.0 (maximum score) |
642 // | 666 // |
643 // The below code maps intermediate_score to the range [0, 1399]. | 667 // The below code maps intermediate_score to the range [0, 1399]. |
644 // For example: | 668 // For example: |
645 // HQP default scoring buckets: "0.0:400,1.5:600,12.0:1300,20.0:1399" | 669 // HQP default scoring buckets: "0.0:400,1.5:600,12.0:1300,20.0:1399" |
646 // We will linearly interpolate the scores between: | 670 // We will linearly interpolate the scores between: |
647 // 0 to 1.5 --> 400 to 600 | 671 // 0 to 1.5 --> 400 to 600 |
648 // 1.5 to 12.0 --> 600 to 1300 | 672 // 1.5 to 12.0 --> 600 to 1300 |
649 // 12.0 to 20.0 --> 1300 to 1399 | 673 // 12.0 to 20.0 --> 1300 to 1399 |
650 // >= 20.0 --> 1399 | 674 // >= 20.0 --> 1399 |
651 // | 675 // |
652 // The score maxes out at 1399 (i.e., cannot beat a good inlineable result | 676 // The score maxes out at 1399 (i.e., cannot beat a good inlineable result |
653 // from HistoryURL provider). | 677 // from HistoryURL provider). |
654 const float intermediate_score = topicality_score * frequency_score; | 678 const float intermediate_score = |
679 topicality_score * frequency_score * specificity_score; | |
655 | 680 |
656 // Find the threshold where intermediate score is greater than bucket. | 681 // Find the threshold where intermediate score is greater than bucket. |
657 size_t i = 1; | 682 size_t i = 1; |
658 for (; i < hqp_relevance_buckets.size(); ++i) { | 683 for (; i < hqp_relevance_buckets.size(); ++i) { |
659 const ScoreMaxRelevance& hqp_bucket = hqp_relevance_buckets[i]; | 684 const ScoreMaxRelevance& hqp_bucket = hqp_relevance_buckets[i]; |
660 if (intermediate_score >= hqp_bucket.first) { | 685 if (intermediate_score >= hqp_bucket.first) { |
661 continue; | 686 continue; |
662 } | 687 } |
663 const ScoreMaxRelevance& previous_bucket = hqp_relevance_buckets[i - 1]; | 688 const ScoreMaxRelevance& previous_bucket = hqp_relevance_buckets[i - 1]; |
664 const float slope = ((hqp_bucket.second - previous_bucket.second) / | 689 const float slope = ((hqp_bucket.second - previous_bucket.second) / |
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
721 base::StringToDouble(it->first, &bucket.first); | 746 base::StringToDouble(it->first, &bucket.first); |
722 DCHECK(is_valid_intermediate_score); | 747 DCHECK(is_valid_intermediate_score); |
723 bool is_valid_hqp_score = base::StringToInt(it->second, &bucket.second); | 748 bool is_valid_hqp_score = base::StringToInt(it->second, &bucket.second); |
724 DCHECK(is_valid_hqp_score); | 749 DCHECK(is_valid_hqp_score); |
725 hqp_buckets->push_back(bucket); | 750 hqp_buckets->push_back(bucket); |
726 } | 751 } |
727 return true; | 752 return true; |
728 } | 753 } |
729 return false; | 754 return false; |
730 } | 755 } |
OLD | NEW |