| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "components/omnibox/browser/scored_history_match.h" | 5 #include "components/omnibox/browser/scored_history_match.h" |
| 6 | 6 |
| 7 #include <math.h> | 7 #include <math.h> |
| 8 | 8 |
| 9 #include <algorithm> | 9 #include <algorithm> |
| 10 #include <vector> | 10 #include <vector> |
| (...skipping 99 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 110 float ScoredHistoryMatch::typed_value_; | 110 float ScoredHistoryMatch::typed_value_; |
| 111 bool ScoredHistoryMatch::fix_few_visits_bug_; | 111 bool ScoredHistoryMatch::fix_few_visits_bug_; |
| 112 bool ScoredHistoryMatch::frequency_uses_sum_; | 112 bool ScoredHistoryMatch::frequency_uses_sum_; |
| 113 size_t ScoredHistoryMatch::max_visits_to_score_; | 113 size_t ScoredHistoryMatch::max_visits_to_score_; |
| 114 bool ScoredHistoryMatch::allow_tld_matches_; | 114 bool ScoredHistoryMatch::allow_tld_matches_; |
| 115 bool ScoredHistoryMatch::allow_scheme_matches_; | 115 bool ScoredHistoryMatch::allow_scheme_matches_; |
| 116 size_t ScoredHistoryMatch::num_title_words_to_allow_; | 116 size_t ScoredHistoryMatch::num_title_words_to_allow_; |
| 117 float ScoredHistoryMatch::topicality_threshold_; | 117 float ScoredHistoryMatch::topicality_threshold_; |
| 118 ScoredHistoryMatch::ScoreMaxRelevances* | 118 ScoredHistoryMatch::ScoreMaxRelevances* |
| 119 ScoredHistoryMatch::relevance_buckets_override_ = nullptr; | 119 ScoredHistoryMatch::relevance_buckets_override_ = nullptr; |
| 120 OmniboxFieldTrial::NumMatchesScores* |
| 121 ScoredHistoryMatch::matches_to_specificity_override_ = nullptr; |
| 120 | 122 |
| 121 ScoredHistoryMatch::ScoredHistoryMatch() | 123 ScoredHistoryMatch::ScoredHistoryMatch() |
| 122 : ScoredHistoryMatch(history::URLRow(), | 124 : ScoredHistoryMatch(history::URLRow(), |
| 123 VisitInfoVector(), | 125 VisitInfoVector(), |
| 124 base::string16(), | 126 base::string16(), |
| 125 String16Vector(), | 127 String16Vector(), |
| 126 WordStarts(), | 128 WordStarts(), |
| 127 RowWordStarts(), | 129 RowWordStarts(), |
| 128 false, | 130 false, |
| 129 base::Time::Max()) { | 131 1, |
| 130 } | 132 base::Time::Max()) {} |
| 131 | 133 |
| 132 ScoredHistoryMatch::ScoredHistoryMatch( | 134 ScoredHistoryMatch::ScoredHistoryMatch( |
| 133 const history::URLRow& row, | 135 const history::URLRow& row, |
| 134 const VisitInfoVector& visits, | 136 const VisitInfoVector& visits, |
| 135 const base::string16& lower_string, | 137 const base::string16& lower_string, |
| 136 const String16Vector& terms_vector, | 138 const String16Vector& terms_vector, |
| 137 const WordStarts& terms_to_word_starts_offsets, | 139 const WordStarts& terms_to_word_starts_offsets, |
| 138 const RowWordStarts& word_starts, | 140 const RowWordStarts& word_starts, |
| 139 bool is_url_bookmarked, | 141 bool is_url_bookmarked, |
| 142 size_t num_matching_pages, |
| 140 base::Time now) | 143 base::Time now) |
| 141 : HistoryMatch(row, 0, false, false), raw_score(0) { | 144 : HistoryMatch(row, 0, false, false), raw_score(0) { |
| 142 // NOTE: Call Init() before doing any validity checking to ensure that the | 145 // NOTE: Call Init() before doing any validity checking to ensure that the |
| 143 // class is always initialized after an instance has been constructed. In | 146 // class is always initialized after an instance has been constructed. In |
| 144 // particular, this ensures that the class is initialized after an instance | 147 // particular, this ensures that the class is initialized after an instance |
| 145 // has been constructed via the no-args constructor. | 148 // has been constructed via the no-args constructor. |
| 146 ScoredHistoryMatch::Init(); | 149 ScoredHistoryMatch::Init(); |
| 147 | 150 |
| 148 // Figure out where each search term appears in the URL and/or page title | 151 // Figure out where each search term appears in the URL and/or page title |
| 149 // so that we can score as well as provide autocomplete highlighting. | 152 // so that we can score as well as provide autocomplete highlighting. |
| (...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 247 likely_can_inline = true; | 250 likely_can_inline = true; |
| 248 innermost_match = (best_inlineable_prefix->num_components == | 251 innermost_match = (best_inlineable_prefix->num_components == |
| 249 best_prefix->num_components); | 252 best_prefix->num_components); |
| 250 } | 253 } |
| 251 } | 254 } |
| 252 } | 255 } |
| 253 | 256 |
| 254 const float topicality_score = GetTopicalityScore( | 257 const float topicality_score = GetTopicalityScore( |
| 255 terms_vector.size(), url, terms_to_word_starts_offsets, word_starts); | 258 terms_vector.size(), url, terms_to_word_starts_offsets, word_starts); |
| 256 const float frequency_score = GetFrequency(now, is_url_bookmarked, visits); | 259 const float frequency_score = GetFrequency(now, is_url_bookmarked, visits); |
| 257 raw_score = base::saturated_cast<int>( | 260 const float specificity_score = |
| 258 GetFinalRelevancyScore(topicality_score, frequency_score)); | 261 GetDocumentSpecificityScore(num_matching_pages); |
| 262 raw_score = base::saturated_cast<int>(GetFinalRelevancyScore( |
| 263 topicality_score, frequency_score, specificity_score)); |
| 259 | 264 |
| 260 if (also_do_hup_like_scoring_ && likely_can_inline) { | 265 if (also_do_hup_like_scoring_ && likely_can_inline) { |
| 261 // HistoryURL-provider-like scoring gives any match that is | 266 // HistoryURL-provider-like scoring gives any match that is |
| 262 // capable of being inlined a certain minimum score. Some of these | 267 // capable of being inlined a certain minimum score. Some of these |
| 263 // are given a higher score that lets them be shown in inline. | 268 // are given a higher score that lets them be shown in inline. |
| 264 // This test here derives from the test in | 269 // This test here derives from the test in |
| 265 // HistoryURLProvider::PromoteMatchForInlineAutocomplete(). | 270 // HistoryURLProvider::PromoteMatchForInlineAutocomplete(). |
| 266 const bool promote_to_inline = | 271 const bool promote_to_inline = |
| 267 (row.typed_count() > 1) || (IsHostOnly() && (row.typed_count() == 1)); | 272 (row.typed_count() > 1) || (IsHostOnly() && (row.typed_count() == 1)); |
| 268 int hup_like_score = | 273 int hup_like_score = |
| (...skipping 334 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 603 // Compute the average weighted value_of_transition and return it. | 608 // Compute the average weighted value_of_transition and return it. |
| 604 // Use |max_visits_to_score_| as the denominator for the average regardless of | 609 // Use |max_visits_to_score_| as the denominator for the average regardless of |
| 605 // how many visits there were in order to penalize a match that has | 610 // how many visits there were in order to penalize a match that has |
| 606 // fewer visits than kMaxVisitsToScore. | 611 // fewer visits than kMaxVisitsToScore. |
| 607 if (fix_few_visits_bug_) | 612 if (fix_few_visits_bug_) |
| 608 return summed_visit_points / ScoredHistoryMatch::max_visits_to_score_; | 613 return summed_visit_points / ScoredHistoryMatch::max_visits_to_score_; |
| 609 return visits.size() * summed_visit_points / | 614 return visits.size() * summed_visit_points / |
| 610 ScoredHistoryMatch::max_visits_to_score_; | 615 ScoredHistoryMatch::max_visits_to_score_; |
| 611 } | 616 } |
| 612 | 617 |
| 618 float ScoredHistoryMatch::GetDocumentSpecificityScore( |
| 619 size_t num_matching_pages) const { |
| 620 // A mapping from the number of matching pages to their associated document |
| 621 // specificity scores. See omnibox_field_trial.h for more details. |
| 622 CR_DEFINE_STATIC_LOCAL(OmniboxFieldTrial::NumMatchesScores, |
| 623 default_matches_to_specificity, |
| 624 (OmniboxFieldTrial::HQPNumMatchesScores())); |
| 625 OmniboxFieldTrial::NumMatchesScores* matches_to_specificity = |
| 626 matches_to_specificity_override_ ? matches_to_specificity_override_ |
| 627 : &default_matches_to_specificity; |
| 628 |
| 629 // The floating point value below must be less than the lowest score the |
| 630 // server would send down. |
| 631 OmniboxFieldTrial::NumMatchesScores::const_iterator it = std::upper_bound( |
| 632 matches_to_specificity->begin(), matches_to_specificity->end(), |
| 633 std::pair<size_t, double>{num_matching_pages, -1}); |
| 634 return (it != matches_to_specificity->end()) ? it->second : 1.0; |
| 635 }; |
| 636 |
| 613 // static | 637 // static |
| 614 float ScoredHistoryMatch::GetFinalRelevancyScore(float topicality_score, | 638 float ScoredHistoryMatch::GetFinalRelevancyScore(float topicality_score, |
| 615 float frequency_score) { | 639 float frequency_score, |
| 640 float specificity_score) { |
| 616 // |relevance_buckets| gives a mapping from intemerdiate score to the final | 641 // |relevance_buckets| gives a mapping from intemerdiate score to the final |
| 617 // relevance score. | 642 // relevance score. |
| 618 CR_DEFINE_STATIC_LOCAL(ScoreMaxRelevances, default_relevance_buckets, | 643 CR_DEFINE_STATIC_LOCAL(ScoreMaxRelevances, default_relevance_buckets, |
| 619 (GetHQPBuckets())); | 644 (GetHQPBuckets())); |
| 620 ScoreMaxRelevances* relevance_buckets = relevance_buckets_override_ | 645 ScoreMaxRelevances* relevance_buckets = relevance_buckets_override_ |
| 621 ? relevance_buckets_override_ | 646 ? relevance_buckets_override_ |
| 622 : &default_relevance_buckets; | 647 : &default_relevance_buckets; |
| 623 DCHECK(!relevance_buckets->empty()); | 648 DCHECK(!relevance_buckets->empty()); |
| 624 DCHECK_EQ(0.0, (*relevance_buckets)[0].first); | 649 DCHECK_EQ(0.0, (*relevance_buckets)[0].first); |
| 625 | 650 |
| 626 if (topicality_score == 0) | 651 if (topicality_score == 0) |
| 627 return 0; | 652 return 0; |
| 628 // Here's how to interpret intermediate_score: Suppose the omnibox | 653 // Here's how to interpret intermediate_score: Suppose the omnibox has one |
| 629 // has one input term. Suppose we have a URL for which the omnibox | 654 // input term. Suppose the input matches many documents. (This implies |
| 655 // specificity_score == 1.0.) Suppose we have a URL for which the omnibox |
| 630 // input term has a single URL hostname hit at a word boundary. (This | 656 // input term has a single URL hostname hit at a word boundary. (This |
| 631 // implies topicality_score = 1.0.). Then the intermediate_score for | 657 // implies topicality_score = 1.0.). Then the intermediate_score for |
| 632 // this URL will depend entirely on the frequency_score with | 658 // this URL will depend entirely on the frequency_score with |
| 633 // this interpretation: | 659 // this interpretation: |
| 634 // - a single typed visit more than three months ago, no other visits -> 0.2 | 660 // - a single typed visit more than three months ago, no other visits -> 0.2 |
| 635 // - a visit every three days, no typed visits -> 0.706 | 661 // - a visit every three days, no typed visits -> 0.706 |
| 636 // - a visit every day, no typed visits -> 0.916 | 662 // - a visit every day, no typed visits -> 0.916 |
| 637 // - a single typed visit yesterday, no other visits -> 2.0 | 663 // - a single typed visit yesterday, no other visits -> 2.0 |
| 638 // - a typed visit once a week -> 11.77 | 664 // - a typed visit once a week -> 11.77 |
| 639 // - a typed visit every three days -> 14.12 | 665 // - a typed visit every three days -> 14.12 |
| 640 // - at least ten typed visits today -> 20.0 (maximum score) | 666 // - at least ten typed visits today -> 20.0 (maximum score) |
| 641 // | 667 // |
| 642 // The below code maps intermediate_score to the range [0, 1399]. | 668 // The below code maps intermediate_score to the range [0, 1399]. |
| 643 // For example: | 669 // For example: |
| 644 // The default scoring buckets: "0.0:400,1.5:600,12.0:1300,20.0:1399" | 670 // The default scoring buckets: "0.0:400,1.5:600,12.0:1300,20.0:1399" |
| 645 // We will linearly interpolate the scores between: | 671 // We will linearly interpolate the scores between: |
| 646 // 0 to 1.5 --> 400 to 600 | 672 // 0 to 1.5 --> 400 to 600 |
| 647 // 1.5 to 12.0 --> 600 to 1300 | 673 // 1.5 to 12.0 --> 600 to 1300 |
| 648 // 12.0 to 20.0 --> 1300 to 1399 | 674 // 12.0 to 20.0 --> 1300 to 1399 |
| 649 // >= 20.0 --> 1399 | 675 // >= 20.0 --> 1399 |
| 650 // | 676 // |
| 651 // The score maxes out at 1399 (i.e., cannot beat a good inlineable result | 677 // The score maxes out at 1399 (i.e., cannot beat a good inlineable result |
| 652 // from HistoryURL provider). | 678 // from HistoryURL provider). |
| 653 const float intermediate_score = topicality_score * frequency_score; | 679 const float intermediate_score = |
| 680 topicality_score * frequency_score * specificity_score; |
| 654 | 681 |
| 655 // Find the threshold where intermediate score is greater than bucket. | 682 // Find the threshold where intermediate score is greater than bucket. |
| 656 size_t i = 1; | 683 size_t i = 1; |
| 657 for (; i < relevance_buckets->size(); ++i) { | 684 for (; i < relevance_buckets->size(); ++i) { |
| 658 const ScoreMaxRelevance& bucket = (*relevance_buckets)[i]; | 685 const ScoreMaxRelevance& bucket = (*relevance_buckets)[i]; |
| 659 if (intermediate_score >= bucket.first) { | 686 if (intermediate_score >= bucket.first) { |
| 660 continue; | 687 continue; |
| 661 } | 688 } |
| 662 const ScoreMaxRelevance& previous_bucket = (*relevance_buckets)[i - 1]; | 689 const ScoreMaxRelevance& previous_bucket = (*relevance_buckets)[i - 1]; |
| 663 const float slope = ((bucket.second - previous_bucket.second) / | 690 const float slope = ((bucket.second - previous_bucket.second) / |
| (...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 696 ScoreMaxRelevance bucket; | 723 ScoreMaxRelevance bucket; |
| 697 bool is_valid_intermediate_score = | 724 bool is_valid_intermediate_score = |
| 698 base::StringToDouble(it->first, &bucket.first); | 725 base::StringToDouble(it->first, &bucket.first); |
| 699 DCHECK(is_valid_intermediate_score); | 726 DCHECK(is_valid_intermediate_score); |
| 700 bool is_valid_hqp_score = base::StringToInt(it->second, &bucket.second); | 727 bool is_valid_hqp_score = base::StringToInt(it->second, &bucket.second); |
| 701 DCHECK(is_valid_hqp_score); | 728 DCHECK(is_valid_hqp_score); |
| 702 hqp_buckets.push_back(bucket); | 729 hqp_buckets.push_back(bucket); |
| 703 } | 730 } |
| 704 return hqp_buckets; | 731 return hqp_buckets; |
| 705 } | 732 } |
| OLD | NEW |