OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "components/omnibox/browser/scored_history_match.h" | 5 #include "components/omnibox/browser/scored_history_match.h" |
6 | 6 |
7 #include <math.h> | 7 #include <math.h> |
8 | 8 |
9 #include <algorithm> | 9 #include <algorithm> |
10 #include <vector> | 10 #include <vector> |
11 | 11 |
12 #include "base/logging.h" | 12 #include "base/logging.h" |
| 13 #include "base/macros.h" |
13 #include "base/numerics/safe_conversions.h" | 14 #include "base/numerics/safe_conversions.h" |
14 #include "base/strings/string_number_conversions.h" | 15 #include "base/strings/string_number_conversions.h" |
15 #include "base/strings/string_split.h" | 16 #include "base/strings/string_split.h" |
16 #include "base/strings/string_util.h" | 17 #include "base/strings/string_util.h" |
17 #include "base/strings/utf_offset_string_conversions.h" | 18 #include "base/strings/utf_offset_string_conversions.h" |
18 #include "base/strings/utf_string_conversions.h" | 19 #include "base/strings/utf_string_conversions.h" |
19 #include "components/bookmarks/browser/bookmark_utils.h" | 20 #include "components/bookmarks/browser/bookmark_utils.h" |
20 #include "components/omnibox/browser/history_url_provider.h" | 21 #include "components/omnibox/browser/history_url_provider.h" |
21 #include "components/omnibox/browser/omnibox_field_trial.h" | 22 #include "components/omnibox/browser/omnibox_field_trial.h" |
22 #include "components/omnibox/browser/url_prefix.h" | 23 #include "components/omnibox/browser/url_prefix.h" |
(...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
106 // static | 107 // static |
107 bool ScoredHistoryMatch::also_do_hup_like_scoring_; | 108 bool ScoredHistoryMatch::also_do_hup_like_scoring_; |
108 float ScoredHistoryMatch::bookmark_value_; | 109 float ScoredHistoryMatch::bookmark_value_; |
109 float ScoredHistoryMatch::typed_value_; | 110 float ScoredHistoryMatch::typed_value_; |
110 bool ScoredHistoryMatch::fix_few_visits_bug_; | 111 bool ScoredHistoryMatch::fix_few_visits_bug_; |
111 bool ScoredHistoryMatch::frequency_uses_sum_; | 112 bool ScoredHistoryMatch::frequency_uses_sum_; |
112 size_t ScoredHistoryMatch::max_visits_to_score_; | 113 size_t ScoredHistoryMatch::max_visits_to_score_; |
113 bool ScoredHistoryMatch::allow_tld_matches_; | 114 bool ScoredHistoryMatch::allow_tld_matches_; |
114 bool ScoredHistoryMatch::allow_scheme_matches_; | 115 bool ScoredHistoryMatch::allow_scheme_matches_; |
115 size_t ScoredHistoryMatch::num_title_words_to_allow_; | 116 size_t ScoredHistoryMatch::num_title_words_to_allow_; |
116 bool ScoredHistoryMatch::hqp_experimental_scoring_enabled_; | 117 float ScoredHistoryMatch::topicality_threshold_; |
117 | 118 ScoredHistoryMatch::ScoreMaxRelevances* |
118 // Default topicality threshold. See GetTopicalityScore() for details. | 119 ScoredHistoryMatch::relevance_buckets_override_ = nullptr; |
119 float ScoredHistoryMatch::topicality_threshold_ = 0.8f; | |
120 | |
121 // Default HQP relevance buckets. See GetFinalRelevancyScore() for more details | |
122 // on these numbers. | |
123 char ScoredHistoryMatch::hqp_relevance_buckets_str_[] = | |
124 "0.0:400,1.5:600,5.0:900,10.5:1203,15.0:1300,20.0:1399"; | |
125 std::vector<ScoredHistoryMatch::ScoreMaxRelevance>* | |
126 ScoredHistoryMatch::hqp_relevance_buckets_ = nullptr; | |
127 | 120 |
128 ScoredHistoryMatch::ScoredHistoryMatch() | 121 ScoredHistoryMatch::ScoredHistoryMatch() |
129 : ScoredHistoryMatch(history::URLRow(), | 122 : ScoredHistoryMatch(history::URLRow(), |
130 VisitInfoVector(), | 123 VisitInfoVector(), |
131 base::string16(), | 124 base::string16(), |
132 String16Vector(), | 125 String16Vector(), |
133 WordStarts(), | 126 WordStarts(), |
134 RowWordStarts(), | 127 RowWordStarts(), |
135 false, | 128 false, |
136 base::Time::Max()) { | 129 base::Time::Max()) { |
(...skipping 117 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
254 likely_can_inline = true; | 247 likely_can_inline = true; |
255 innermost_match = (best_inlineable_prefix->num_components == | 248 innermost_match = (best_inlineable_prefix->num_components == |
256 best_prefix->num_components); | 249 best_prefix->num_components); |
257 } | 250 } |
258 } | 251 } |
259 } | 252 } |
260 | 253 |
261 const float topicality_score = GetTopicalityScore( | 254 const float topicality_score = GetTopicalityScore( |
262 terms_vector.size(), url, terms_to_word_starts_offsets, word_starts); | 255 terms_vector.size(), url, terms_to_word_starts_offsets, word_starts); |
263 const float frequency_score = GetFrequency(now, is_url_bookmarked, visits); | 256 const float frequency_score = GetFrequency(now, is_url_bookmarked, visits); |
264 raw_score = base::saturated_cast<int>(GetFinalRelevancyScore( | 257 raw_score = base::saturated_cast<int>( |
265 topicality_score, frequency_score, *hqp_relevance_buckets_)); | 258 GetFinalRelevancyScore(topicality_score, frequency_score)); |
266 | 259 |
267 if (also_do_hup_like_scoring_ && likely_can_inline) { | 260 if (also_do_hup_like_scoring_ && likely_can_inline) { |
268 // HistoryURL-provider-like scoring gives any match that is | 261 // HistoryURL-provider-like scoring gives any match that is |
269 // capable of being inlined a certain minimum score. Some of these | 262 // capable of being inlined a certain minimum score. Some of these |
270 // are given a higher score that lets them be shown in inline. | 263 // are given a higher score that lets them be shown in inline. |
271 // This test here derives from the test in | 264 // This test here derives from the test in |
272 // HistoryURLProvider::PromoteMatchForInlineAutocomplete(). | 265 // HistoryURLProvider::PromoteMatchForInlineAutocomplete(). |
273 const bool promote_to_inline = | 266 const bool promote_to_inline = |
274 (row.typed_count() > 1) || (IsHostOnly() && (row.typed_count() == 1)); | 267 (row.typed_count() > 1) || (IsHostOnly() && (row.typed_count() == 1)); |
275 int hup_like_score = | 268 int hup_like_score = |
(...skipping 132 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
408 initialized = true; | 401 initialized = true; |
409 also_do_hup_like_scoring_ = OmniboxFieldTrial::HQPAlsoDoHUPLikeScoring(); | 402 also_do_hup_like_scoring_ = OmniboxFieldTrial::HQPAlsoDoHUPLikeScoring(); |
410 bookmark_value_ = OmniboxFieldTrial::HQPBookmarkValue(); | 403 bookmark_value_ = OmniboxFieldTrial::HQPBookmarkValue(); |
411 typed_value_ = OmniboxFieldTrial::HQPTypedValue(); | 404 typed_value_ = OmniboxFieldTrial::HQPTypedValue(); |
412 max_visits_to_score_ = OmniboxFieldTrial::HQPMaxVisitsToScore(); | 405 max_visits_to_score_ = OmniboxFieldTrial::HQPMaxVisitsToScore(); |
413 frequency_uses_sum_ = OmniboxFieldTrial::HQPFreqencyUsesSum(); | 406 frequency_uses_sum_ = OmniboxFieldTrial::HQPFreqencyUsesSum(); |
414 fix_few_visits_bug_ = OmniboxFieldTrial::HQPFixFewVisitsBug(); | 407 fix_few_visits_bug_ = OmniboxFieldTrial::HQPFixFewVisitsBug(); |
415 allow_tld_matches_ = OmniboxFieldTrial::HQPAllowMatchInTLDValue(); | 408 allow_tld_matches_ = OmniboxFieldTrial::HQPAllowMatchInTLDValue(); |
416 allow_scheme_matches_ = OmniboxFieldTrial::HQPAllowMatchInSchemeValue(); | 409 allow_scheme_matches_ = OmniboxFieldTrial::HQPAllowMatchInSchemeValue(); |
417 num_title_words_to_allow_ = OmniboxFieldTrial::HQPNumTitleWordsToAllow(); | 410 num_title_words_to_allow_ = OmniboxFieldTrial::HQPNumTitleWordsToAllow(); |
| 411 topicality_threshold_ = |
| 412 OmniboxFieldTrial::HQPExperimentalTopicalityThreshold(); |
418 | 413 |
419 InitRawTermScoreToTopicalityScoreArray(); | 414 InitRawTermScoreToTopicalityScoreArray(); |
420 InitDaysAgoToRecencyScoreArray(); | 415 InitDaysAgoToRecencyScoreArray(); |
421 InitHQPExperimentalParams(); | |
422 } | 416 } |
423 | 417 |
424 float ScoredHistoryMatch::GetTopicalityScore( | 418 float ScoredHistoryMatch::GetTopicalityScore( |
425 const int num_terms, | 419 const int num_terms, |
426 const base::string16& url, | 420 const base::string16& url, |
427 const WordStarts& terms_to_word_starts_offsets, | 421 const WordStarts& terms_to_word_starts_offsets, |
428 const RowWordStarts& word_starts) { | 422 const RowWordStarts& word_starts) { |
429 // A vector that accumulates per-term scores. The strongest match--a | 423 // A vector that accumulates per-term scores. The strongest match--a |
430 // match in the hostname at a word boundary--is worth 10 points. | 424 // match in the hostname at a word boundary--is worth 10 points. |
431 // Everything else is less. In general, a match that's not at a word | 425 // Everything else is less. In general, a match that's not at a word |
(...skipping 178 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
610 // Use |max_visits_to_score_| as the denominator for the average regardless of | 604 // Use |max_visits_to_score_| as the denominator for the average regardless of |
611 // how many visits there were in order to penalize a match that has | 605 // how many visits there were in order to penalize a match that has |
612 // fewer visits than kMaxVisitsToScore. | 606 // fewer visits than kMaxVisitsToScore. |
613 if (fix_few_visits_bug_) | 607 if (fix_few_visits_bug_) |
614 return summed_visit_points / ScoredHistoryMatch::max_visits_to_score_; | 608 return summed_visit_points / ScoredHistoryMatch::max_visits_to_score_; |
615 return visits.size() * summed_visit_points / | 609 return visits.size() * summed_visit_points / |
616 ScoredHistoryMatch::max_visits_to_score_; | 610 ScoredHistoryMatch::max_visits_to_score_; |
617 } | 611 } |
618 | 612 |
619 // static | 613 // static |
620 float ScoredHistoryMatch::GetFinalRelevancyScore( | 614 float ScoredHistoryMatch::GetFinalRelevancyScore(float topicality_score, |
621 float topicality_score, | 615 float frequency_score) { |
622 float frequency_score, | 616 // |relevance_buckets| gives a mapping from intemerdiate score to the final |
623 const std::vector<ScoreMaxRelevance>& hqp_relevance_buckets) { | 617 // relevance score. |
624 DCHECK(hqp_relevance_buckets.size() > 0); | 618 CR_DEFINE_STATIC_LOCAL(ScoreMaxRelevances, default_relevance_buckets, |
625 DCHECK_EQ(hqp_relevance_buckets[0].first, 0.0); | 619 (GetHQPBuckets())); |
| 620 ScoreMaxRelevances* relevance_buckets = relevance_buckets_override_ |
| 621 ? relevance_buckets_override_ |
| 622 : &default_relevance_buckets; |
| 623 DCHECK(!relevance_buckets->empty()); |
| 624 DCHECK_EQ(0.0, (*relevance_buckets)[0].first); |
626 | 625 |
627 if (topicality_score == 0) | 626 if (topicality_score == 0) |
628 return 0; | 627 return 0; |
629 // Here's how to interpret intermediate_score: Suppose the omnibox | 628 // Here's how to interpret intermediate_score: Suppose the omnibox |
630 // has one input term. Suppose we have a URL for which the omnibox | 629 // has one input term. Suppose we have a URL for which the omnibox |
631 // input term has a single URL hostname hit at a word boundary. (This | 630 // input term has a single URL hostname hit at a word boundary. (This |
632 // implies topicality_score = 1.0.). Then the intermediate_score for | 631 // implies topicality_score = 1.0.). Then the intermediate_score for |
633 // this URL will depend entirely on the frequency_score with | 632 // this URL will depend entirely on the frequency_score with |
634 // this interpretation: | 633 // this interpretation: |
635 // - a single typed visit more than three months ago, no other visits -> 0.2 | 634 // - a single typed visit more than three months ago, no other visits -> 0.2 |
636 // - a visit every three days, no typed visits -> 0.706 | 635 // - a visit every three days, no typed visits -> 0.706 |
637 // - a visit every day, no typed visits -> 0.916 | 636 // - a visit every day, no typed visits -> 0.916 |
638 // - a single typed visit yesterday, no other visits -> 2.0 | 637 // - a single typed visit yesterday, no other visits -> 2.0 |
639 // - a typed visit once a week -> 11.77 | 638 // - a typed visit once a week -> 11.77 |
640 // - a typed visit every three days -> 14.12 | 639 // - a typed visit every three days -> 14.12 |
641 // - at least ten typed visits today -> 20.0 (maximum score) | 640 // - at least ten typed visits today -> 20.0 (maximum score) |
642 // | 641 // |
643 // The below code maps intermediate_score to the range [0, 1399]. | 642 // The below code maps intermediate_score to the range [0, 1399]. |
644 // For example: | 643 // For example: |
645 // HQP default scoring buckets: "0.0:400,1.5:600,12.0:1300,20.0:1399" | 644 // The default scoring buckets: "0.0:400,1.5:600,12.0:1300,20.0:1399" |
646 // We will linearly interpolate the scores between: | 645 // We will linearly interpolate the scores between: |
647 // 0 to 1.5 --> 400 to 600 | 646 // 0 to 1.5 --> 400 to 600 |
648 // 1.5 to 12.0 --> 600 to 1300 | 647 // 1.5 to 12.0 --> 600 to 1300 |
649 // 12.0 to 20.0 --> 1300 to 1399 | 648 // 12.0 to 20.0 --> 1300 to 1399 |
650 // >= 20.0 --> 1399 | 649 // >= 20.0 --> 1399 |
651 // | 650 // |
652 // The score maxes out at 1399 (i.e., cannot beat a good inlineable result | 651 // The score maxes out at 1399 (i.e., cannot beat a good inlineable result |
653 // from HistoryURL provider). | 652 // from HistoryURL provider). |
654 const float intermediate_score = topicality_score * frequency_score; | 653 const float intermediate_score = topicality_score * frequency_score; |
655 | 654 |
656 // Find the threshold where intermediate score is greater than bucket. | 655 // Find the threshold where intermediate score is greater than bucket. |
657 size_t i = 1; | 656 size_t i = 1; |
658 for (; i < hqp_relevance_buckets.size(); ++i) { | 657 for (; i < relevance_buckets->size(); ++i) { |
659 const ScoreMaxRelevance& hqp_bucket = hqp_relevance_buckets[i]; | 658 const ScoreMaxRelevance& bucket = (*relevance_buckets)[i]; |
660 if (intermediate_score >= hqp_bucket.first) { | 659 if (intermediate_score >= bucket.first) { |
661 continue; | 660 continue; |
662 } | 661 } |
663 const ScoreMaxRelevance& previous_bucket = hqp_relevance_buckets[i - 1]; | 662 const ScoreMaxRelevance& previous_bucket = (*relevance_buckets)[i - 1]; |
664 const float slope = ((hqp_bucket.second - previous_bucket.second) / | 663 const float slope = ((bucket.second - previous_bucket.second) / |
665 (hqp_bucket.first - previous_bucket.first)); | 664 (bucket.first - previous_bucket.first)); |
666 return (previous_bucket.second + | 665 return (previous_bucket.second + |
667 (slope * (intermediate_score - previous_bucket.first))); | 666 (slope * (intermediate_score - previous_bucket.first))); |
668 } | 667 } |
669 // It will reach this stage when the score is > highest bucket score. | 668 // It will reach this stage when the score is > highest bucket score. |
670 // Return the highest bucket score. | 669 // Return the highest bucket score. |
671 return hqp_relevance_buckets[i - 1].second; | 670 return (*relevance_buckets)[i - 1].second; |
672 } | 671 } |
673 | 672 |
674 // static | 673 // static |
675 void ScoredHistoryMatch::InitHQPExperimentalParams() { | 674 std::vector<ScoredHistoryMatch::ScoreMaxRelevance> |
676 // These are default HQP relevance scoring buckets. | 675 ScoredHistoryMatch::GetHQPBuckets() { |
677 // See GetFinalRelevancyScore() for details. | 676 // Start with the default buckets and override them if appropriate. |
678 std::string hqp_relevance_buckets_str = std::string( | 677 std::string relevance_buckets_str = |
679 hqp_relevance_buckets_str_); | 678 "0.0:400,1.5:600,5.0:900,10.5:1203,15.0:1300,20.0:1399"; |
680 | 679 std::string experimental_scoring_buckets = |
681 // Fetch the experiment params if they are any. | 680 OmniboxFieldTrial::HQPExperimentalScoringBuckets(); |
682 hqp_experimental_scoring_enabled_ = | 681 if (!experimental_scoring_buckets.empty()) |
683 OmniboxFieldTrial::HQPExperimentalScoringEnabled(); | 682 relevance_buckets_str = experimental_scoring_buckets; |
684 | 683 return GetHQPBucketsFromString(relevance_buckets_str); |
685 if (hqp_experimental_scoring_enabled_) { | |
686 // Add the topicality threshold from experiment params. | |
687 float hqp_experimental_topicality_threhold = | |
688 OmniboxFieldTrial::HQPExperimentalTopicalityThreshold(); | |
689 topicality_threshold_ = hqp_experimental_topicality_threhold; | |
690 | |
691 // Add the HQP experimental scoring buckets. | |
692 std::string hqp_experimental_scoring_buckets = | |
693 OmniboxFieldTrial::HQPExperimentalScoringBuckets(); | |
694 if (!hqp_experimental_scoring_buckets.empty()) | |
695 hqp_relevance_buckets_str = hqp_experimental_scoring_buckets; | |
696 } | |
697 | |
698 // Parse the hqp_relevance_buckets_str string once and store them in vector | |
699 // which is easy to access. | |
700 hqp_relevance_buckets_ = | |
701 new std::vector<ScoredHistoryMatch::ScoreMaxRelevance>(); | |
702 | |
703 bool is_valid_bucket_str = GetHQPBucketsFromString(hqp_relevance_buckets_str, | |
704 hqp_relevance_buckets_); | |
705 DCHECK(is_valid_bucket_str); | |
706 } | 684 } |
707 | 685 |
708 // static | 686 // static |
709 bool ScoredHistoryMatch::GetHQPBucketsFromString( | 687 ScoredHistoryMatch::ScoreMaxRelevances |
710 const std::string& buckets_str, | 688 ScoredHistoryMatch::GetHQPBucketsFromString(const std::string& buckets_str) { |
711 std::vector<ScoreMaxRelevance>* hqp_buckets) { | |
712 DCHECK(hqp_buckets != NULL); | |
713 DCHECK(!buckets_str.empty()); | 689 DCHECK(!buckets_str.empty()); |
714 | |
715 base::StringPairs kv_pairs; | 690 base::StringPairs kv_pairs; |
716 if (base::SplitStringIntoKeyValuePairs(buckets_str, ':', ',', &kv_pairs)) { | 691 if (!base::SplitStringIntoKeyValuePairs(buckets_str, ':', ',', &kv_pairs)) |
717 for (base::StringPairs::const_iterator it = kv_pairs.begin(); | 692 return ScoreMaxRelevances(); |
718 it != kv_pairs.end(); ++it) { | 693 ScoreMaxRelevances hqp_buckets; |
719 ScoreMaxRelevance bucket; | 694 for (base::StringPairs::const_iterator it = kv_pairs.begin(); |
720 bool is_valid_intermediate_score = | 695 it != kv_pairs.end(); ++it) { |
721 base::StringToDouble(it->first, &bucket.first); | 696 ScoreMaxRelevance bucket; |
722 DCHECK(is_valid_intermediate_score); | 697 bool is_valid_intermediate_score = |
723 bool is_valid_hqp_score = base::StringToInt(it->second, &bucket.second); | 698 base::StringToDouble(it->first, &bucket.first); |
724 DCHECK(is_valid_hqp_score); | 699 DCHECK(is_valid_intermediate_score); |
725 hqp_buckets->push_back(bucket); | 700 bool is_valid_hqp_score = base::StringToInt(it->second, &bucket.second); |
726 } | 701 DCHECK(is_valid_hqp_score); |
727 return true; | 702 hqp_buckets.push_back(bucket); |
728 } | 703 } |
729 return false; | 704 return hqp_buckets; |
730 } | 705 } |
OLD | NEW |