Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "components/omnibox/browser/scored_history_match.h" | 5 #include "components/omnibox/browser/scored_history_match.h" |
| 6 | 6 |
| 7 #include <math.h> | 7 #include <math.h> |
| 8 | 8 |
| 9 #include <algorithm> | 9 #include <algorithm> |
| 10 #include <vector> | 10 #include <vector> |
| 11 | 11 |
| 12 #include "base/logging.h" | 12 #include "base/logging.h" |
| 13 #include "base/macros.h" | |
| 13 #include "base/numerics/safe_conversions.h" | 14 #include "base/numerics/safe_conversions.h" |
| 14 #include "base/strings/string_number_conversions.h" | 15 #include "base/strings/string_number_conversions.h" |
| 15 #include "base/strings/string_split.h" | 16 #include "base/strings/string_split.h" |
| 16 #include "base/strings/string_util.h" | 17 #include "base/strings/string_util.h" |
| 17 #include "base/strings/utf_offset_string_conversions.h" | 18 #include "base/strings/utf_offset_string_conversions.h" |
| 18 #include "base/strings/utf_string_conversions.h" | 19 #include "base/strings/utf_string_conversions.h" |
| 19 #include "components/bookmarks/browser/bookmark_utils.h" | 20 #include "components/bookmarks/browser/bookmark_utils.h" |
| 20 #include "components/omnibox/browser/history_url_provider.h" | 21 #include "components/omnibox/browser/history_url_provider.h" |
| 21 #include "components/omnibox/browser/omnibox_field_trial.h" | 22 #include "components/omnibox/browser/omnibox_field_trial.h" |
| 22 #include "components/omnibox/browser/url_prefix.h" | 23 #include "components/omnibox/browser/url_prefix.h" |
| (...skipping 95 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 118 // Default topicality threshold. See GetTopicalityScore() for details. | 119 // Default topicality threshold. See GetTopicalityScore() for details. |
| 119 float ScoredHistoryMatch::topicality_threshold_ = 0.8f; | 120 float ScoredHistoryMatch::topicality_threshold_ = 0.8f; |
| 120 | 121 |
| 121 // Default HQP relevance buckets. See GetFinalRelevancyScore() for more details | 122 // Default HQP relevance buckets. See GetFinalRelevancyScore() for more details |
| 122 // on these numbers. | 123 // on these numbers. |
| 123 char ScoredHistoryMatch::hqp_relevance_buckets_str_[] = | 124 char ScoredHistoryMatch::hqp_relevance_buckets_str_[] = |
| 124 "0.0:400,1.5:600,5.0:900,10.5:1203,15.0:1300,20.0:1399"; | 125 "0.0:400,1.5:600,5.0:900,10.5:1203,15.0:1300,20.0:1399"; |
| 125 std::vector<ScoredHistoryMatch::ScoreMaxRelevance>* | 126 std::vector<ScoredHistoryMatch::ScoreMaxRelevance>* |
| 126 ScoredHistoryMatch::hqp_relevance_buckets_ = nullptr; | 127 ScoredHistoryMatch::hqp_relevance_buckets_ = nullptr; |
| 127 | 128 |
| 129 OmniboxFieldTrial::NumMatchesScores* | |
| 130 ScoredHistoryMatch::num_matches_to_document_specificity_score_override_ = | |
| 131 nullptr; | |
| 132 | |
| 128 ScoredHistoryMatch::ScoredHistoryMatch() | 133 ScoredHistoryMatch::ScoredHistoryMatch() |
| 129 : ScoredHistoryMatch(history::URLRow(), | 134 : ScoredHistoryMatch(history::URLRow(), |
| 130 VisitInfoVector(), | 135 VisitInfoVector(), |
| 131 base::string16(), | 136 base::string16(), |
| 132 String16Vector(), | 137 String16Vector(), |
| 133 WordStarts(), | 138 WordStarts(), |
| 134 RowWordStarts(), | 139 RowWordStarts(), |
| 135 false, | 140 false, |
| 136 base::Time::Max()) { | 141 1, |
| 137 } | 142 base::Time::Max()) {} |
| 138 | 143 |
| 139 ScoredHistoryMatch::ScoredHistoryMatch( | 144 ScoredHistoryMatch::ScoredHistoryMatch( |
| 140 const history::URLRow& row, | 145 const history::URLRow& row, |
| 141 const VisitInfoVector& visits, | 146 const VisitInfoVector& visits, |
| 142 const base::string16& lower_string, | 147 const base::string16& lower_string, |
| 143 const String16Vector& terms_vector, | 148 const String16Vector& terms_vector, |
| 144 const WordStarts& terms_to_word_starts_offsets, | 149 const WordStarts& terms_to_word_starts_offsets, |
| 145 const RowWordStarts& word_starts, | 150 const RowWordStarts& word_starts, |
| 146 bool is_url_bookmarked, | 151 bool is_url_bookmarked, |
| 152 size_t num_matching_pages, | |
| 147 base::Time now) | 153 base::Time now) |
| 148 : HistoryMatch(row, 0, false, false), raw_score(0) { | 154 : HistoryMatch(row, 0, false, false), raw_score(0) { |
| 149 // NOTE: Call Init() before doing any validity checking to ensure that the | 155 // NOTE: Call Init() before doing any validity checking to ensure that the |
| 150 // class is always initialized after an instance has been constructed. In | 156 // class is always initialized after an instance has been constructed. In |
| 151 // particular, this ensures that the class is initialized after an instance | 157 // particular, this ensures that the class is initialized after an instance |
| 152 // has been constructed via the no-args constructor. | 158 // has been constructed via the no-args constructor. |
| 153 ScoredHistoryMatch::Init(); | 159 ScoredHistoryMatch::Init(); |
| 154 | 160 |
| 155 // Figure out where each search term appears in the URL and/or page title | 161 // Figure out where each search term appears in the URL and/or page title |
| 156 // so that we can score as well as provide autocomplete highlighting. | 162 // so that we can score as well as provide autocomplete highlighting. |
| (...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 254 likely_can_inline = true; | 260 likely_can_inline = true; |
| 255 innermost_match = (best_inlineable_prefix->num_components == | 261 innermost_match = (best_inlineable_prefix->num_components == |
| 256 best_prefix->num_components); | 262 best_prefix->num_components); |
| 257 } | 263 } |
| 258 } | 264 } |
| 259 } | 265 } |
| 260 | 266 |
| 261 const float topicality_score = GetTopicalityScore( | 267 const float topicality_score = GetTopicalityScore( |
| 262 terms_vector.size(), url, terms_to_word_starts_offsets, word_starts); | 268 terms_vector.size(), url, terms_to_word_starts_offsets, word_starts); |
| 263 const float frequency_score = GetFrequency(now, is_url_bookmarked, visits); | 269 const float frequency_score = GetFrequency(now, is_url_bookmarked, visits); |
| 264 raw_score = base::saturated_cast<int>(GetFinalRelevancyScore( | 270 const float specificity_score = |
| 265 topicality_score, frequency_score, *hqp_relevance_buckets_)); | 271 GetDocumentSpecificityScore(num_matching_pages); |
| 272 raw_score = base::saturated_cast<int>( | |
| 273 GetFinalRelevancyScore(topicality_score, frequency_score, | |
| 274 specificity_score, *hqp_relevance_buckets_)); | |
| 266 | 275 |
| 267 if (also_do_hup_like_scoring_ && likely_can_inline) { | 276 if (also_do_hup_like_scoring_ && likely_can_inline) { |
| 268 // HistoryURL-provider-like scoring gives any match that is | 277 // HistoryURL-provider-like scoring gives any match that is |
| 269 // capable of being inlined a certain minimum score. Some of these | 278 // capable of being inlined a certain minimum score. Some of these |
| 270 // are given a higher score that lets them be shown in inline. | 279 // are given a higher score that lets them be shown in inline. |
| 271 // This test here derives from the test in | 280 // This test here derives from the test in |
| 272 // HistoryURLProvider::PromoteMatchForInlineAutocomplete(). | 281 // HistoryURLProvider::PromoteMatchForInlineAutocomplete(). |
| 273 const bool promote_to_inline = | 282 const bool promote_to_inline = |
| 274 (row.typed_count() > 1) || (IsHostOnly() && (row.typed_count() == 1)); | 283 (row.typed_count() > 1) || (IsHostOnly() && (row.typed_count() == 1)); |
| 275 int hup_like_score = | 284 int hup_like_score = |
| (...skipping 333 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 609 // Compute the average weighted value_of_transition and return it. | 618 // Compute the average weighted value_of_transition and return it. |
| 610 // Use |max_visits_to_score_| as the denominator for the average regardless of | 619 // Use |max_visits_to_score_| as the denominator for the average regardless of |
| 611 // how many visits there were in order to penalize a match that has | 620 // how many visits there were in order to penalize a match that has |
| 612 // fewer visits than kMaxVisitsToScore. | 621 // fewer visits than kMaxVisitsToScore. |
| 613 if (fix_few_visits_bug_) | 622 if (fix_few_visits_bug_) |
| 614 return summed_visit_points / ScoredHistoryMatch::max_visits_to_score_; | 623 return summed_visit_points / ScoredHistoryMatch::max_visits_to_score_; |
| 615 return visits.size() * summed_visit_points / | 624 return visits.size() * summed_visit_points / |
| 616 ScoredHistoryMatch::max_visits_to_score_; | 625 ScoredHistoryMatch::max_visits_to_score_; |
| 617 } | 626 } |
| 618 | 627 |
| 628 float ScoredHistoryMatch::GetDocumentSpecificityScore( | |
| 629 size_t num_matching_pages) const { | |
| 630 // A mapping from the number of matching pages to their associated document | |
| 631 // specificity scores. See omnibox_field_trial.h for more details. | |
| 632 CR_DEFINE_STATIC_LOCAL(OmniboxFieldTrial::NumMatchesScores, | |
| 633 default_num_matches_to_document_specificity_score, | |
| 634 (OmniboxFieldTrial::HQPNumMatchesScores())); | |
| 635 OmniboxFieldTrial::NumMatchesScores* | |
| 636 num_matches_to_document_specificity_score = | |
| 637 num_matches_to_document_specificity_score_override_ | |
| 638 ? num_matches_to_document_specificity_score_override_ | |
| 639 : &default_num_matches_to_document_specificity_score; | |
|
Peter Kasting
2016/12/10 02:22:26
I feel like there might be a way to shorten all th
Mark P
2016/12/11 05:11:37
I was wondering if there was a good way to shorten
| |
| 640 | |
| 641 // The floating point value below must be less than the lowest score the | |
| 642 // server would send down. | |
| 643 OmniboxFieldTrial::NumMatchesScores::const_iterator it = | |
| 644 std::upper_bound(num_matches_to_document_specificity_score->begin(), | |
| 645 num_matches_to_document_specificity_score->end(), | |
| 646 std::pair<size_t, double>{num_matching_pages, -1}); | |
| 647 return (it != num_matches_to_document_specificity_score->end()) ? it->second | |
| 648 : 1.0; | |
| 649 }; | |
| 650 | |
| 619 // static | 651 // static |
| 620 float ScoredHistoryMatch::GetFinalRelevancyScore( | 652 float ScoredHistoryMatch::GetFinalRelevancyScore( |
| 621 float topicality_score, | 653 float topicality_score, |
| 622 float frequency_score, | 654 float frequency_score, |
| 655 float specificity_score, | |
| 623 const std::vector<ScoreMaxRelevance>& hqp_relevance_buckets) { | 656 const std::vector<ScoreMaxRelevance>& hqp_relevance_buckets) { |
| 624 DCHECK(hqp_relevance_buckets.size() > 0); | 657 DCHECK(hqp_relevance_buckets.size() > 0); |
| 625 DCHECK_EQ(hqp_relevance_buckets[0].first, 0.0); | 658 DCHECK_EQ(hqp_relevance_buckets[0].first, 0.0); |
| 626 | 659 |
| 627 if (topicality_score == 0) | 660 if (topicality_score == 0) |
| 628 return 0; | 661 return 0; |
| 629 // Here's how to interpret intermediate_score: Suppose the omnibox | 662 // Here's how to interpret intermediate_score: Suppose the omnibox has one |
| 630 // has one input term. Suppose we have a URL for which the omnibox | 663 // input term. Suppose the input matches many documents. (This implies |
| 664 // specificity_score == 1.0.) Suppose we have a URL for which the omnibox | |
| 631 // input term has a single URL hostname hit at a word boundary. (This | 665 // input term has a single URL hostname hit at a word boundary. (This |
| 632 // implies topicality_score = 1.0.). Then the intermediate_score for | 666 // implies topicality_score = 1.0.). Then the intermediate_score for |
| 633 // this URL will depend entirely on the frequency_score with | 667 // this URL will depend entirely on the frequency_score with |
| 634 // this interpretation: | 668 // this interpretation: |
| 635 // - a single typed visit more than three months ago, no other visits -> 0.2 | 669 // - a single typed visit more than three months ago, no other visits -> 0.2 |
| 636 // - a visit every three days, no typed visits -> 0.706 | 670 // - a visit every three days, no typed visits -> 0.706 |
| 637 // - a visit every day, no typed visits -> 0.916 | 671 // - a visit every day, no typed visits -> 0.916 |
| 638 // - a single typed visit yesterday, no other visits -> 2.0 | 672 // - a single typed visit yesterday, no other visits -> 2.0 |
| 639 // - a typed visit once a week -> 11.77 | 673 // - a typed visit once a week -> 11.77 |
| 640 // - a typed visit every three days -> 14.12 | 674 // - a typed visit every three days -> 14.12 |
| 641 // - at least ten typed visits today -> 20.0 (maximum score) | 675 // - at least ten typed visits today -> 20.0 (maximum score) |
| 642 // | 676 // |
| 643 // The below code maps intermediate_score to the range [0, 1399]. | 677 // The below code maps intermediate_score to the range [0, 1399]. |
| 644 // For example: | 678 // For example: |
| 645 // HQP default scoring buckets: "0.0:400,1.5:600,12.0:1300,20.0:1399" | 679 // HQP default scoring buckets: "0.0:400,1.5:600,12.0:1300,20.0:1399" |
| 646 // We will linearly interpolate the scores between: | 680 // We will linearly interpolate the scores between: |
| 647 // 0 to 1.5 --> 400 to 600 | 681 // 0 to 1.5 --> 400 to 600 |
| 648 // 1.5 to 12.0 --> 600 to 1300 | 682 // 1.5 to 12.0 --> 600 to 1300 |
| 649 // 12.0 to 20.0 --> 1300 to 1399 | 683 // 12.0 to 20.0 --> 1300 to 1399 |
| 650 // >= 20.0 --> 1399 | 684 // >= 20.0 --> 1399 |
| 651 // | 685 // |
| 652 // The score maxes out at 1399 (i.e., cannot beat a good inlineable result | 686 // The score maxes out at 1399 (i.e., cannot beat a good inlineable result |
| 653 // from HistoryURL provider). | 687 // from HistoryURL provider). |
| 654 const float intermediate_score = topicality_score * frequency_score; | 688 const float intermediate_score = |
| 689 topicality_score * frequency_score * specificity_score; | |
| 655 | 690 |
| 656 // Find the threshold where intermediate score is greater than bucket. | 691 // Find the threshold where intermediate score is greater than bucket. |
| 657 size_t i = 1; | 692 size_t i = 1; |
| 658 for (; i < hqp_relevance_buckets.size(); ++i) { | 693 for (; i < hqp_relevance_buckets.size(); ++i) { |
| 659 const ScoreMaxRelevance& hqp_bucket = hqp_relevance_buckets[i]; | 694 const ScoreMaxRelevance& hqp_bucket = hqp_relevance_buckets[i]; |
| 660 if (intermediate_score >= hqp_bucket.first) { | 695 if (intermediate_score >= hqp_bucket.first) { |
| 661 continue; | 696 continue; |
| 662 } | 697 } |
| 663 const ScoreMaxRelevance& previous_bucket = hqp_relevance_buckets[i - 1]; | 698 const ScoreMaxRelevance& previous_bucket = hqp_relevance_buckets[i - 1]; |
| 664 const float slope = ((hqp_bucket.second - previous_bucket.second) / | 699 const float slope = ((hqp_bucket.second - previous_bucket.second) / |
| (...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 721 base::StringToDouble(it->first, &bucket.first); | 756 base::StringToDouble(it->first, &bucket.first); |
| 722 DCHECK(is_valid_intermediate_score); | 757 DCHECK(is_valid_intermediate_score); |
| 723 bool is_valid_hqp_score = base::StringToInt(it->second, &bucket.second); | 758 bool is_valid_hqp_score = base::StringToInt(it->second, &bucket.second); |
| 724 DCHECK(is_valid_hqp_score); | 759 DCHECK(is_valid_hqp_score); |
| 725 hqp_buckets->push_back(bucket); | 760 hqp_buckets->push_back(bucket); |
| 726 } | 761 } |
| 727 return true; | 762 return true; |
| 728 } | 763 } |
| 729 return false; | 764 return false; |
| 730 } | 765 } |
| OLD | NEW |