Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(40)

Side by Side Diff: chrome/browser/history/scored_history_match.cc

Issue 905023003: Adding knobs on HQP provider. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Addressing mark comments. Created 5 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/history/scored_history_match.h" 5 #include "chrome/browser/history/scored_history_match.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <functional> 8 #include <functional>
9 #include <iterator> 9 #include <iterator>
10 #include <numeric> 10 #include <numeric>
11 #include <set> 11 #include <set>
12 12
13 #include <math.h> 13 #include <math.h>
14 14
15 #include "base/logging.h" 15 #include "base/logging.h"
16 #include "base/metrics/histogram.h" 16 #include "base/metrics/histogram.h"
17 #include "base/strings/string_number_conversions.h"
18 #include "base/strings/string_split.h"
17 #include "base/strings/string_util.h" 19 #include "base/strings/string_util.h"
18 #include "base/strings/utf_string_conversions.h" 20 #include "base/strings/utf_string_conversions.h"
19 #include "chrome/browser/autocomplete/history_url_provider.h" 21 #include "chrome/browser/autocomplete/history_url_provider.h"
20 #include "components/bookmarks/browser/bookmark_utils.h" 22 #include "components/bookmarks/browser/bookmark_utils.h"
21 #include "components/history/core/browser/history_client.h" 23 #include "components/history/core/browser/history_client.h"
22 #include "components/omnibox/omnibox_field_trial.h" 24 #include "components/omnibox/omnibox_field_trial.h"
23 #include "components/omnibox/url_prefix.h" 25 #include "components/omnibox/url_prefix.h"
24 #include "content/public/browser/browser_thread.h" 26 #include "content/public/browser/browser_thread.h"
25 27
26 namespace history { 28 namespace history {
27 29
28 // ScoredHistoryMatch ---------------------------------------------------------- 30 // ScoredHistoryMatch ----------------------------------------------------------
29 31
30 // static 32 // static
31 const size_t ScoredHistoryMatch::kMaxVisitsToScore = 10; 33 const size_t ScoredHistoryMatch::kMaxVisitsToScore = 10;
32 const int ScoredHistoryMatch::kDaysToPrecomputeRecencyScoresFor = 366; 34 const int ScoredHistoryMatch::kDaysToPrecomputeRecencyScoresFor = 366;
33 const int ScoredHistoryMatch::kMaxRawTermScore = 30; 35 const int ScoredHistoryMatch::kMaxRawTermScore = 30;
34 float* ScoredHistoryMatch::raw_term_score_to_topicality_score_ = NULL; 36 float* ScoredHistoryMatch::raw_term_score_to_topicality_score_ = NULL;
35 float* ScoredHistoryMatch::days_ago_to_recency_score_ = NULL; 37 float* ScoredHistoryMatch::days_ago_to_recency_score_ = NULL;
36 bool ScoredHistoryMatch::initialized_ = false; 38 bool ScoredHistoryMatch::initialized_ = false;
37 int ScoredHistoryMatch::bookmark_value_ = 1; 39 int ScoredHistoryMatch::bookmark_value_ = 1;
38 bool ScoredHistoryMatch::allow_tld_matches_ = false; 40 bool ScoredHistoryMatch::allow_tld_matches_ = false;
39 bool ScoredHistoryMatch::allow_scheme_matches_ = false; 41 bool ScoredHistoryMatch::allow_scheme_matches_ = false;
40 bool ScoredHistoryMatch::also_do_hup_like_scoring_ = false; 42 bool ScoredHistoryMatch::also_do_hup_like_scoring_ = false;
41 int ScoredHistoryMatch::max_assigned_score_for_non_inlineable_matches_ = -1; 43 int ScoredHistoryMatch::max_assigned_score_for_non_inlineable_matches_ = -1;
42 44
45 bool ScoredHistoryMatch::hqp_experimental_scoring_enabled_ = false;
Mark P 2015/02/14 01:27:14 (1) These static initializers should be in the ord
Ashok vardhan 2015/02/17 01:23:52 Done.
46 float ScoredHistoryMatch::topicality_threshold_ = -1;
47 std::vector<ScoredHistoryMatch::ScoreMaxRelevance>*
48 ScoredHistoryMatch::hqp_relevance_buckets_ = NULL;
49
43 ScoredHistoryMatch::ScoredHistoryMatch() 50 ScoredHistoryMatch::ScoredHistoryMatch()
44 : raw_score_(0), 51 : raw_score_(0),
45 can_inline_(false) { 52 can_inline_(false) {
46 Init(); 53 Init();
47 } 54 }
48 55
49 ScoredHistoryMatch::ScoredHistoryMatch( 56 ScoredHistoryMatch::ScoredHistoryMatch(
50 const URLRow& row, 57 const URLRow& row,
51 const VisitInfoVector& visits, 58 const VisitInfoVector& visits,
52 const std::string& languages, 59 const std::string& languages,
(...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after
147 const int num_components_in_best_inlineable_prefix = 154 const int num_components_in_best_inlineable_prefix =
148 best_inlineable_prefix->num_components; 155 best_inlineable_prefix->num_components;
149 innermost_match = (num_components_in_best_inlineable_prefix == 156 innermost_match = (num_components_in_best_inlineable_prefix ==
150 num_components_in_best_prefix); 157 num_components_in_best_prefix);
151 } 158 }
152 159
153 const float topicality_score = GetTopicalityScore( 160 const float topicality_score = GetTopicalityScore(
154 terms.size(), url, terms_to_word_starts_offsets, word_starts); 161 terms.size(), url, terms_to_word_starts_offsets, word_starts);
155 const float frequency_score = GetFrequency( 162 const float frequency_score = GetFrequency(
156 now, (history_client && history_client->IsBookmarked(gurl)), visits); 163 now, (history_client && history_client->IsBookmarked(gurl)), visits);
157 raw_score_ = GetFinalRelevancyScore(topicality_score, frequency_score); 164 raw_score_ = GetFinalRelevancyScore(topicality_score, frequency_score,
165 *hqp_relevance_buckets_);
158 raw_score_ = 166 raw_score_ =
159 (raw_score_ <= kint32max) ? static_cast<int>(raw_score_) : kint32max; 167 (raw_score_ <= kint32max) ? static_cast<int>(raw_score_) : kint32max;
160 168
161 if (also_do_hup_like_scoring_ && can_inline_) { 169 if (also_do_hup_like_scoring_ && can_inline_) {
162 // HistoryURL-provider-like scoring gives any match that is 170 // HistoryURL-provider-like scoring gives any match that is
163 // capable of being inlined a certain minimum score. Some of these 171 // capable of being inlined a certain minimum score. Some of these
164 // are given a higher score that lets them be shown in inline. 172 // are given a higher score that lets them be shown in inline.
165 // This test here derives from the test in 173 // This test here derives from the test in
166 // HistoryURLProvider::PromoteMatchForInlineAutocomplete(). 174 // HistoryURLProvider::PromoteMatchForInlineAutocomplete().
167 const bool promote_to_inline = (row.typed_count() > 1) || 175 const bool promote_to_inline = (row.typed_count() > 1) ||
(...skipping 263 matching lines...) Expand 10 before | Expand all | Expand 10 after
431 // due to this test would look stupid if shown to the user. 439 // due to this test would look stupid if shown to the user.
432 if (term_scores[i] == 0) 440 if (term_scores[i] == 0)
433 return 0; 441 return 0;
434 topicality_score += raw_term_score_to_topicality_score_[ 442 topicality_score += raw_term_score_to_topicality_score_[
435 (term_scores[i] >= kMaxRawTermScore) ? (kMaxRawTermScore - 1) : 443 (term_scores[i] >= kMaxRawTermScore) ? (kMaxRawTermScore - 1) :
436 term_scores[i]]; 444 term_scores[i]];
437 } 445 }
438 // TODO(mpearson): If there are multiple terms, consider taking the 446 // TODO(mpearson): If there are multiple terms, consider taking the
439 // geometric mean of per-term scores rather than the arithmetic mean. 447 // geometric mean of per-term scores rather than the arithmetic mean.
440 448
441 return topicality_score / num_terms; 449 float final_topicality_score = topicality_score / num_terms;
450
451 // Demote all the URLs if the topicality score is less than threshold.
452 if (hqp_experimental_scoring_enabled_ &&
453 (final_topicality_score < topicality_threshold_)) {
454 return 0.0;
455 }
456
457 return final_topicality_score;
442 } 458 }
443 459
444 // static 460 // static
445 void ScoredHistoryMatch::FillInTermScoreToTopicalityScoreArray() { 461 void ScoredHistoryMatch::FillInTermScoreToTopicalityScoreArray() {
446 for (int term_score = 0; term_score < kMaxRawTermScore; ++term_score) { 462 for (int term_score = 0; term_score < kMaxRawTermScore; ++term_score) {
447 float topicality_score; 463 float topicality_score;
448 if (term_score < 10) { 464 if (term_score < 10) {
449 // If the term scores less than 10 points (no full-credit hit, or 465 // If the term scores less than 10 points (no full-credit hit, or
450 // no combination of hits that score that well), then the topicality 466 // no combination of hits that score that well), then the topicality
451 // score is linear in the term score. 467 // score is linear in the term score.
(...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after
535 if (bookmarked) 551 if (bookmarked)
536 value_of_transition = std::max(value_of_transition, bookmark_value_); 552 value_of_transition = std::max(value_of_transition, bookmark_value_);
537 const float bucket_weight = 553 const float bucket_weight =
538 GetRecencyScore((now - visits[i].first).InDays()); 554 GetRecencyScore((now - visits[i].first).InDays());
539 summed_visit_points += (value_of_transition * bucket_weight); 555 summed_visit_points += (value_of_transition * bucket_weight);
540 } 556 }
541 return visits.size() * summed_visit_points / kMaxVisitsToScore; 557 return visits.size() * summed_visit_points / kMaxVisitsToScore;
542 } 558 }
543 559
544 // static 560 // static
545 float ScoredHistoryMatch::GetFinalRelevancyScore(float topicality_score, 561 float ScoredHistoryMatch::GetFinalRelevancyScore(
546 float frequency_score) { 562 float topicality_score,
563 float frequency_score,
564 const std::vector<ScoreMaxRelevance>& hqp_relevance_buckets) {
547 if (topicality_score == 0) 565 if (topicality_score == 0)
548 return 0; 566 return 0;
549 // Here's how to interpret intermediate_score: Suppose the omnibox 567 // Here's how to interpret intermediate_score: Suppose the omnibox
550 // has one input term. Suppose we have a URL for which the omnibox 568 // has one input term. Suppose we have a URL for which the omnibox
551 // input term has a single URL hostname hit at a word boundary. (This 569 // input term has a single URL hostname hit at a word boundary. (This
552 // implies topicality_score = 1.0.). Then the intermediate_score for 570 // implies topicality_score = 1.0.). Then the intermediate_score for
553 // this URL will depend entirely on the frequency_score with 571 // this URL will depend entirely on the frequency_score with
554 // this interpretation: 572 // this interpretation:
555 // - a single typed visit more than three months ago, no other visits -> 0.2 573 // - a single typed visit more than three months ago, no other visits -> 0.2
556 // - a visit every three days, no typed visits -> 0.706 574 // - a visit every three days, no typed visits -> 0.706
557 // - a visit every day, no typed visits -> 0.916 575 // - a visit every day, no typed visits -> 0.916
558 // - a single typed visit yesterday, no other visits -> 2.0 576 // - a single typed visit yesterday, no other visits -> 2.0
559 // - a typed visit once a week -> 11.77 577 // - a typed visit once a week -> 11.77
560 // - a typed visit every three days -> 14.12 578 // - a typed visit every three days -> 14.12
561 // - at least ten typed visits today -> 20.0 (maximum score) 579 // - at least ten typed visits today -> 20.0 (maximum score)
580 //
581 // The below code maps intermediate_score to the range [0, 1399].
582 // For example:
583 // HQP default scoring buckets: "1.5:600,12.0:1300,20.0:1399"
Mark P 2015/02/14 01:27:13 I think you'll want your format to require having
Ashok vardhan 2015/02/17 01:23:52 Make sense. Thought all the minimum scores for pro
584 // We will linearly interpolate the scores between:
585 // 0 to 1.5 --> 400 to 600
586 // 1.5 to 12.0 --> 600 to 1300
587 // 12.0 to 20.0 --> 1300 to 1399
588 // >= 20.0 --> 1399
589 //
590 // The score maxes out at 1400 (i.e., cannot beat a good inline result).
Mark P 2015/02/14 01:27:14 1400 -> 1399 also, good inline result -> good inli
Ashok vardhan 2015/02/17 01:23:52 Done.
591 //
592 // If experimental scoring is enabled, then the score buckets will be like:
Mark P 2015/02/14 01:27:14 I don't know what line 592-593 are adding. Is it
Ashok vardhan 2015/02/17 01:23:52 Done.
593 // HQP experimental scoring buckets: "1.5:600,5.0:900,12.0:1100,20.0:1300"
562 const float intermediate_score = topicality_score * frequency_score; 594 const float intermediate_score = topicality_score * frequency_score;
563 // The below code maps intermediate_score to the range [0, 1399]. 595
564 // The score maxes out at 1400 (i.e., cannot beat a good inline result). 596 double base_intermediate_score = 0.0;
Mark P 2015/02/14 01:27:14 Please comment the variables in this block more or
Ashok vardhan 2015/02/17 01:23:52 Acknowledged.
565 if (intermediate_score <= 1) { 597 int base_hqp_score = 400;
566 // Linearly extrapolate between 0 and 1.5 so 0 has a score of 400 598 double max_intermediate_score = base_intermediate_score;
567 // and 1.5 has a score of 600. 599 int max_hqp_score = base_hqp_score;
568 const float slope = (600 - 400) / (1.5f - 0.0f); 600
569 return 400 + slope * intermediate_score; 601 // Find the threshold where intermediate score is greater than bucket.
602 for (size_t i = 0; i < hqp_relevance_buckets.size(); ++i) {
603 ScoreMaxRelevance hqp_bucket = hqp_relevance_buckets[i];
Mark P 2015/02/14 01:27:14 const &
Ashok vardhan 2015/02/17 01:23:52 Done.
604 max_intermediate_score = hqp_bucket.first;
605 max_hqp_score = hqp_bucket.second;
606 if (intermediate_score <= max_intermediate_score) {
607 const float slope = (
608 (max_hqp_score - base_hqp_score) /
609 (max_intermediate_score - base_intermediate_score));
610 const int final_hqp_score = (base_hqp_score +
Mark P 2015/02/14 01:27:13 does the whole right side of the equals fit on one
Mark P 2015/02/14 01:27:14 This function returns a float. Don't force it to
Ashok vardhan 2015/02/17 01:23:52 Nope. And also i guess its easy to read if it is l
Ashok vardhan 2015/02/17 01:23:52 Done.
Ashok vardhan 2015/02/17 01:23:52 Done.
611 (slope * (intermediate_score -
612 base_intermediate_score)));
613 return std::min(final_hqp_score, max_hqp_score);
Mark P 2015/02/14 01:27:13 Why is this min necessary?
Ashok vardhan 2015/02/17 01:23:52 Done.
614 }
615 base_intermediate_score = max_intermediate_score;
616 base_hqp_score = max_hqp_score;
570 } 617 }
571 if (intermediate_score <= 12.0) { 618 // It will reach this stage when the score is > highest bucket score or
572 // Linearly extrapolate up to 12 so 12 has a score of 1300. 619 // when buckets are not specified. Return max_hqp_score.
Mark P 2015/02/14 01:27:14 Buckets should never be not specified. Correct th
Ashok vardhan 2015/02/17 01:23:52 Done.
573 const float slope = (1300 - 600) / (12.0f - 1.5f); 620 return max_hqp_score;
574 return 600 + slope * (intermediate_score - 1.5);
575 }
576 // Linearly extrapolate so a score of 20 (or more) has a score of 1399.
577 // (Scores above 20 are possible for URLs that have multiple term hits
578 // in the URL and/or title and that are visited practically all
579 // the time using typed visits. We don't attempt to distinguish
580 // between these very good results.)
581 const float slope = (1399 - 1300) / (20.0f - 12.0f);
582 return std::min(1399.0, 1300 + slope * (intermediate_score - 12.0));
583 } 621 }
584 622
623 // static
624 void ScoredHistoryMatch::InitializeHQPExperimentalParams() {
625 // These are default HQP relevance scoring buckets.
626 // See GetFinalRelevancyScore() for details.
627 std::string hqp_relevance_buckets_str = "1.5:600,12.0:1300,20.0:1399";
628
629 // Fetch the experiment params if they are any.
630 hqp_experimental_scoring_enabled_ =
631 OmniboxFieldTrial::HQPExperimentalScoringEnabled();
632
633 if (hqp_experimental_scoring_enabled_) {
634 // Add the topicality threshold from experiment params.
635 float hqp_experimental_topicality_threhold =
636 OmniboxFieldTrial::HQPExperimentalTopicalityThreshold();
637 if (hqp_experimental_topicality_threhold > 0)
Mark P 2015/02/14 01:27:14 Why have a >0 test? If you're checking for unspec
Ashok vardhan 2015/02/17 01:23:52 Done.
638 topicality_threshold_ = hqp_experimental_topicality_threhold;
639
640 // Add the HQP experimental scoring buckets.
641 std::string hqp_experimental_scoring_buckets =
642 OmniboxFieldTrial::HQPExperimentalScoringBuckets();
643 if (!hqp_experimental_scoring_buckets.empty())
644 hqp_relevance_buckets_str = hqp_experimental_scoring_buckets;
645 }
646
647 // Parse the hqp_relevance_buckets_str string once and store them in vector
648 // which is easy to access.
649 hqp_relevance_buckets_ =
650 new std::vector<ScoredHistoryMatch::ScoreMaxRelevance>();
651 base::StringPairs kv_pairs;
652 if (base::SplitStringIntoKeyValuePairs(hqp_relevance_buckets_str,
653 ':', ',', &kv_pairs)) {
654 for (base::StringPairs::const_iterator it = kv_pairs.begin();
655 it != kv_pairs.end(); ++it) {
656 ScoreMaxRelevance bucket;
657 base::StringToDouble(it->first, &bucket.first);
Mark P 2015/02/14 01:27:14 minor nit: consider DCHECKING the return value for
Ashok vardhan 2015/02/17 01:23:52 DCHECK seems to be not working here. std::strin
658 base::StringToInt(it->second, &bucket.second);
659 hqp_relevance_buckets_->push_back(bucket);
660 }
661 }
662 }
663
664 // static
585 void ScoredHistoryMatch::Init() { 665 void ScoredHistoryMatch::Init() {
586 if (initialized_) 666 if (initialized_)
587 return; 667 return;
588 also_do_hup_like_scoring_ = false; 668 also_do_hup_like_scoring_ = false;
589 // When doing HUP-like scoring, don't allow a non-inlineable match 669 // When doing HUP-like scoring, don't allow a non-inlineable match
590 // to beat the score of good inlineable matches. This is a problem 670 // to beat the score of good inlineable matches. This is a problem
591 // because if a non-inlineable match ends up with the highest score 671 // because if a non-inlineable match ends up with the highest score
592 // from HistoryQuick provider, all HistoryQuick matches get demoted 672 // from HistoryQuick provider, all HistoryQuick matches get demoted
593 // to non-inlineable scores (scores less than 1200). Without 673 // to non-inlineable scores (scores less than 1200). Without
594 // HUP-like-scoring, these results would actually come from the HUP 674 // HUP-like-scoring, these results would actually come from the HUP
595 // and not be demoted, thus outscoring the demoted HQP results. 675 // and not be demoted, thus outscoring the demoted HQP results.
596 // When the HQP provides these, we need to clamp the non-inlineable 676 // When the HQP provides these, we need to clamp the non-inlineable
597 // results to preserve this behavior. 677 // results to preserve this behavior.
598 if (also_do_hup_like_scoring_) { 678 if (also_do_hup_like_scoring_) {
599 max_assigned_score_for_non_inlineable_matches_ = 679 max_assigned_score_for_non_inlineable_matches_ =
600 HistoryURLProvider::kScoreForBestInlineableResult - 1; 680 HistoryURLProvider::kScoreForBestInlineableResult - 1;
601 } 681 }
602 bookmark_value_ = OmniboxFieldTrial::HQPBookmarkValue(); 682 bookmark_value_ = OmniboxFieldTrial::HQPBookmarkValue();
603 allow_tld_matches_ = OmniboxFieldTrial::HQPAllowMatchInTLDValue(); 683 allow_tld_matches_ = OmniboxFieldTrial::HQPAllowMatchInTLDValue();
604 allow_scheme_matches_ = OmniboxFieldTrial::HQPAllowMatchInSchemeValue(); 684 allow_scheme_matches_ = OmniboxFieldTrial::HQPAllowMatchInSchemeValue();
685
686 // Initialize the HQP Experimental scoring params.
687 InitializeHQPExperimentalParams();
688
605 initialized_ = true; 689 initialized_ = true;
606 } 690 }
607 691
608 } // namespace history 692 } // namespace history
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698