Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1480)

Side by Side Diff: chrome/browser/history/scored_history_match.cc

Issue 905023003: Adding knobs on HQP provider. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Addressing mark comments. Created 5 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/history/scored_history_match.h" 5 #include "chrome/browser/history/scored_history_match.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <functional> 8 #include <functional>
9 #include <iterator> 9 #include <iterator>
10 #include <numeric> 10 #include <numeric>
11 #include <set> 11 #include <set>
12 12
13 #include <math.h> 13 #include <math.h>
14 14
15 #include "base/logging.h" 15 #include "base/logging.h"
16 #include "base/metrics/histogram.h" 16 #include "base/metrics/histogram.h"
17 #include "base/strings/string_number_conversions.h"
18 #include "base/strings/string_split.h"
17 #include "base/strings/string_util.h" 19 #include "base/strings/string_util.h"
18 #include "base/strings/utf_string_conversions.h" 20 #include "base/strings/utf_string_conversions.h"
19 #include "chrome/browser/autocomplete/history_url_provider.h" 21 #include "chrome/browser/autocomplete/history_url_provider.h"
20 #include "components/bookmarks/browser/bookmark_utils.h" 22 #include "components/bookmarks/browser/bookmark_utils.h"
21 #include "components/history/core/browser/history_client.h" 23 #include "components/history/core/browser/history_client.h"
22 #include "components/omnibox/omnibox_field_trial.h" 24 #include "components/omnibox/omnibox_field_trial.h"
23 #include "components/omnibox/url_prefix.h" 25 #include "components/omnibox/url_prefix.h"
24 #include "content/public/browser/browser_thread.h" 26 #include "content/public/browser/browser_thread.h"
25 27
26 namespace history { 28 namespace history {
27 29
28 // ScoredHistoryMatch ---------------------------------------------------------- 30 // ScoredHistoryMatch ----------------------------------------------------------
29 31
30 // static 32 // static
31 const size_t ScoredHistoryMatch::kMaxVisitsToScore = 10; 33 const size_t ScoredHistoryMatch::kMaxVisitsToScore = 10;
32 const int ScoredHistoryMatch::kDaysToPrecomputeRecencyScoresFor = 366; 34 const int ScoredHistoryMatch::kDaysToPrecomputeRecencyScoresFor = 366;
33 const int ScoredHistoryMatch::kMaxRawTermScore = 30; 35 const int ScoredHistoryMatch::kMaxRawTermScore = 30;
34 float* ScoredHistoryMatch::raw_term_score_to_topicality_score_ = NULL; 36 float* ScoredHistoryMatch::raw_term_score_to_topicality_score_ = NULL;
35 float* ScoredHistoryMatch::days_ago_to_recency_score_ = NULL; 37 float* ScoredHistoryMatch::days_ago_to_recency_score_ = NULL;
36 bool ScoredHistoryMatch::initialized_ = false; 38 bool ScoredHistoryMatch::initialized_ = false;
37 int ScoredHistoryMatch::bookmark_value_ = 1; 39 int ScoredHistoryMatch::bookmark_value_ = 1;
38 bool ScoredHistoryMatch::allow_tld_matches_ = false; 40 bool ScoredHistoryMatch::allow_tld_matches_ = false;
39 bool ScoredHistoryMatch::allow_scheme_matches_ = false; 41 bool ScoredHistoryMatch::allow_scheme_matches_ = false;
40 bool ScoredHistoryMatch::also_do_hup_like_scoring_ = false; 42 bool ScoredHistoryMatch::also_do_hup_like_scoring_ = false;
41 int ScoredHistoryMatch::max_assigned_score_for_non_inlineable_matches_ = -1; 43 int ScoredHistoryMatch::max_assigned_score_for_non_inlineable_matches_ = -1;
44 bool ScoredHistoryMatch::hqp_experimental_scoring_enabled_ = false;
45 float ScoredHistoryMatch::topicality_threshold_ = -1;
46 std::vector<ScoredHistoryMatch::ScoreMaxRelevance>*
47 ScoredHistoryMatch::hqp_relevance_buckets_ = NULL;
42 48
43 ScoredHistoryMatch::ScoredHistoryMatch() 49 ScoredHistoryMatch::ScoredHistoryMatch()
44 : raw_score_(0), 50 : raw_score_(0),
45 can_inline_(false) { 51 can_inline_(false) {
46 Init(); 52 Init();
47 } 53 }
48 54
49 ScoredHistoryMatch::ScoredHistoryMatch( 55 ScoredHistoryMatch::ScoredHistoryMatch(
50 const URLRow& row, 56 const URLRow& row,
51 const VisitInfoVector& visits, 57 const VisitInfoVector& visits,
(...skipping 95 matching lines...) Expand 10 before | Expand all | Expand 10 after
147 const int num_components_in_best_inlineable_prefix = 153 const int num_components_in_best_inlineable_prefix =
148 best_inlineable_prefix->num_components; 154 best_inlineable_prefix->num_components;
149 innermost_match = (num_components_in_best_inlineable_prefix == 155 innermost_match = (num_components_in_best_inlineable_prefix ==
150 num_components_in_best_prefix); 156 num_components_in_best_prefix);
151 } 157 }
152 158
153 const float topicality_score = GetTopicalityScore( 159 const float topicality_score = GetTopicalityScore(
154 terms.size(), url, terms_to_word_starts_offsets, word_starts); 160 terms.size(), url, terms_to_word_starts_offsets, word_starts);
155 const float frequency_score = GetFrequency( 161 const float frequency_score = GetFrequency(
156 now, (history_client && history_client->IsBookmarked(gurl)), visits); 162 now, (history_client && history_client->IsBookmarked(gurl)), visits);
157 raw_score_ = GetFinalRelevancyScore(topicality_score, frequency_score); 163 raw_score_ = GetFinalRelevancyScore(topicality_score, frequency_score,
164 *hqp_relevance_buckets_);
158 raw_score_ = 165 raw_score_ =
159 (raw_score_ <= kint32max) ? static_cast<int>(raw_score_) : kint32max; 166 (raw_score_ <= kint32max) ? static_cast<int>(raw_score_) : kint32max;
160 167
161 if (also_do_hup_like_scoring_ && can_inline_) { 168 if (also_do_hup_like_scoring_ && can_inline_) {
162 // HistoryURL-provider-like scoring gives any match that is 169 // HistoryURL-provider-like scoring gives any match that is
163 // capable of being inlined a certain minimum score. Some of these 170 // capable of being inlined a certain minimum score. Some of these
164 // are given a higher score that lets them be shown in inline. 171 // are given a higher score that lets them be shown in inline.
165 // This test here derives from the test in 172 // This test here derives from the test in
166 // HistoryURLProvider::PromoteMatchForInlineAutocomplete(). 173 // HistoryURLProvider::PromoteMatchForInlineAutocomplete().
167 const bool promote_to_inline = (row.typed_count() > 1) || 174 const bool promote_to_inline = (row.typed_count() > 1) ||
(...skipping 263 matching lines...) Expand 10 before | Expand all | Expand 10 after
431 // due to this test would look stupid if shown to the user. 438 // due to this test would look stupid if shown to the user.
432 if (term_scores[i] == 0) 439 if (term_scores[i] == 0)
433 return 0; 440 return 0;
434 topicality_score += raw_term_score_to_topicality_score_[ 441 topicality_score += raw_term_score_to_topicality_score_[
435 (term_scores[i] >= kMaxRawTermScore) ? (kMaxRawTermScore - 1) : 442 (term_scores[i] >= kMaxRawTermScore) ? (kMaxRawTermScore - 1) :
436 term_scores[i]]; 443 term_scores[i]];
437 } 444 }
438 // TODO(mpearson): If there are multiple terms, consider taking the 445 // TODO(mpearson): If there are multiple terms, consider taking the
439 // geometric mean of per-term scores rather than the arithmetic mean. 446 // geometric mean of per-term scores rather than the arithmetic mean.
440 447
441 return topicality_score / num_terms; 448 float final_topicality_score = topicality_score / num_terms;
449
450 // Demote all the URLs if the topicality score is less than threshold.
451 if (hqp_experimental_scoring_enabled_ &&
452 (final_topicality_score < topicality_threshold_)) {
453 return 0.0;
454 }
455
456 return final_topicality_score;
442 } 457 }
443 458
444 // static 459 // static
445 void ScoredHistoryMatch::FillInTermScoreToTopicalityScoreArray() { 460 void ScoredHistoryMatch::FillInTermScoreToTopicalityScoreArray() {
446 for (int term_score = 0; term_score < kMaxRawTermScore; ++term_score) { 461 for (int term_score = 0; term_score < kMaxRawTermScore; ++term_score) {
447 float topicality_score; 462 float topicality_score;
448 if (term_score < 10) { 463 if (term_score < 10) {
449 // If the term scores less than 10 points (no full-credit hit, or 464 // If the term scores less than 10 points (no full-credit hit, or
450 // no combination of hits that score that well), then the topicality 465 // no combination of hits that score that well), then the topicality
451 // score is linear in the term score. 466 // score is linear in the term score.
(...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after
535 if (bookmarked) 550 if (bookmarked)
536 value_of_transition = std::max(value_of_transition, bookmark_value_); 551 value_of_transition = std::max(value_of_transition, bookmark_value_);
537 const float bucket_weight = 552 const float bucket_weight =
538 GetRecencyScore((now - visits[i].first).InDays()); 553 GetRecencyScore((now - visits[i].first).InDays());
539 summed_visit_points += (value_of_transition * bucket_weight); 554 summed_visit_points += (value_of_transition * bucket_weight);
540 } 555 }
541 return visits.size() * summed_visit_points / kMaxVisitsToScore; 556 return visits.size() * summed_visit_points / kMaxVisitsToScore;
542 } 557 }
543 558
544 // static 559 // static
545 float ScoredHistoryMatch::GetFinalRelevancyScore(float topicality_score, 560 float ScoredHistoryMatch::GetFinalRelevancyScore(
546 float frequency_score) { 561 float topicality_score,
562 float frequency_score,
563 const std::vector<ScoreMaxRelevance>& hqp_relevance_buckets) {
564 DCHECK(hqp_relevance_buckets.size() > 0);
565
547 if (topicality_score == 0) 566 if (topicality_score == 0)
548 return 0; 567 return 0;
549 // Here's how to interpret intermediate_score: Suppose the omnibox 568 // Here's how to interpret intermediate_score: Suppose the omnibox
550 // has one input term. Suppose we have a URL for which the omnibox 569 // has one input term. Suppose we have a URL for which the omnibox
551 // input term has a single URL hostname hit at a word boundary. (This 570 // input term has a single URL hostname hit at a word boundary. (This
552 // implies topicality_score = 1.0.). Then the intermediate_score for 571 // implies topicality_score = 1.0.). Then the intermediate_score for
553 // this URL will depend entirely on the frequency_score with 572 // this URL will depend entirely on the frequency_score with
554 // this interpretation: 573 // this interpretation:
555 // - a single typed visit more than three months ago, no other visits -> 0.2 574 // - a single typed visit more than three months ago, no other visits -> 0.2
556 // - a visit every three days, no typed visits -> 0.706 575 // - a visit every three days, no typed visits -> 0.706
557 // - a visit every day, no typed visits -> 0.916 576 // - a visit every day, no typed visits -> 0.916
558 // - a single typed visit yesterday, no other visits -> 2.0 577 // - a single typed visit yesterday, no other visits -> 2.0
559 // - a typed visit once a week -> 11.77 578 // - a typed visit once a week -> 11.77
560 // - a typed visit every three days -> 14.12 579 // - a typed visit every three days -> 14.12
561 // - at least ten typed visits today -> 20.0 (maximum score) 580 // - at least ten typed visits today -> 20.0 (maximum score)
581 //
582 // The below code maps intermediate_score to the range [0, 1399].
583 // For example:
584 // HQP default scoring buckets: "0.0:400,1.5:600,12.0:1300,20.0:1399"
585 // We will linearly interpolate the scores between:
586 // 0 to 1.5 --> 400 to 600
587 // 1.5 to 12.0 --> 600 to 1300
588 // 12.0 to 20.0 --> 1300 to 1399
589 // >= 20.0 --> 1399
590 //
591 // The score maxes out at 1399 (i.e., cannot beat a good inlineable result
592 // from HistoryURL provider)
Mark P 2015/02/18 00:03:32 nit: period.
Ashok vardhan 2015/02/18 01:11:21 Done.
562 const float intermediate_score = topicality_score * frequency_score; 593 const float intermediate_score = topicality_score * frequency_score;
563 // The below code maps intermediate_score to the range [0, 1399]. 594
564 // The score maxes out at 1400 (i.e., cannot beat a good inline result). 595 double min_intermediate_score;
Mark P 2015/02/18 00:03:32 If intermediate_score == 0, you end up using this
Ashok vardhan 2015/02/18 01:11:21 Yes, i have added ">=" after refactoring. You can
565 if (intermediate_score <= 1) { 596 float min_hqp_score;
566 // Linearly extrapolate between 0 and 1.5 so 0 has a score of 400 597 // Find the threshold where intermediate score is greater than bucket.
567 // and 1.5 has a score of 600. 598 for (size_t i = 0; i < hqp_relevance_buckets.size(); ++i) {
568 const float slope = (600 - 400) / (1.5f - 0.0f); 599 const ScoreMaxRelevance& hqp_bucket = hqp_relevance_buckets[i];
569 return 400 + slope * intermediate_score; 600 if (intermediate_score > hqp_bucket.first) {
601 min_intermediate_score = hqp_bucket.first;
602 min_hqp_score = hqp_bucket.second;
603 } else {
604 const float slope = ((hqp_bucket.second - min_hqp_score) /
605 (hqp_bucket.first - min_intermediate_score));
606 const float final_hqp_score = (min_hqp_score +
607 (slope * (intermediate_score -
608 min_intermediate_score)));
609 return final_hqp_score;
Mark P 2015/02/18 00:03:32 nit: simply return final_hqp_score; no need to use
Ashok vardhan 2015/02/18 01:11:21 Done.
610 }
570 } 611 }
571 if (intermediate_score <= 12.0) { 612 // It will reach this stage when the score is > highest bucket score.
572 // Linearly extrapolate up to 12 so 12 has a score of 1300. 613 return min_hqp_score;
573 const float slope = (1300 - 600) / (12.0f - 1.5f);
574 return 600 + slope * (intermediate_score - 1.5);
575 }
576 // Linearly extrapolate so a score of 20 (or more) has a score of 1399.
577 // (Scores above 20 are possible for URLs that have multiple term hits
578 // in the URL and/or title and that are visited practically all
579 // the time using typed visits. We don't attempt to distinguish
580 // between these very good results.)
581 const float slope = (1399 - 1300) / (20.0f - 12.0f);
582 return std::min(1399.0, 1300 + slope * (intermediate_score - 12.0));
583 } 614 }
584 615
616 // static
617 void ScoredHistoryMatch::InitializeHQPExperimentalParams() {
618 // These are default HQP relevance scoring buckets.
619 // See GetFinalRelevancyScore() for details.
620 std::string hqp_relevance_buckets_str = "0.0:400,1.5:600,12.0:1300,20.0:1399";
621
622 // Fetch the experiment params if they are any.
623 hqp_experimental_scoring_enabled_ =
624 OmniboxFieldTrial::HQPExperimentalScoringEnabled();
625
626 if (hqp_experimental_scoring_enabled_) {
627 // Add the topicality threshold from experiment params.
628 float hqp_experimental_topicality_threhold =
629 OmniboxFieldTrial::HQPExperimentalTopicalityThreshold();
630 topicality_threshold_ = hqp_experimental_topicality_threhold;
631
632 // Add the HQP experimental scoring buckets.
633 std::string hqp_experimental_scoring_buckets =
634 OmniboxFieldTrial::HQPExperimentalScoringBuckets();
635 if (!hqp_experimental_scoring_buckets.empty())
636 hqp_relevance_buckets_str = hqp_experimental_scoring_buckets;
637 }
638
639 // Parse the hqp_relevance_buckets_str string once and store them in vector
640 // which is easy to access.
641 hqp_relevance_buckets_ =
642 new std::vector<ScoredHistoryMatch::ScoreMaxRelevance>();
643 base::StringPairs kv_pairs;
644 if (base::SplitStringIntoKeyValuePairs(hqp_relevance_buckets_str,
645 ':', ',', &kv_pairs)) {
646 for (base::StringPairs::const_iterator it = kv_pairs.begin();
647 it != kv_pairs.end(); ++it) {
648 ScoreMaxRelevance bucket;
649 base::StringToDouble(it->first, &bucket.first);
Mark P 2015/02/18 00:03:32 DCHECKS are not executed in non-debug builds, mean
Ashok vardhan 2015/02/18 01:11:21 I see, thanks.!
650 base::StringToInt(it->second, &bucket.second);
651 hqp_relevance_buckets_->push_back(bucket);
652 }
653 }
654 }
655
656 // static
585 void ScoredHistoryMatch::Init() { 657 void ScoredHistoryMatch::Init() {
586 if (initialized_) 658 if (initialized_)
587 return; 659 return;
588 also_do_hup_like_scoring_ = false; 660 also_do_hup_like_scoring_ = false;
589 // When doing HUP-like scoring, don't allow a non-inlineable match 661 // When doing HUP-like scoring, don't allow a non-inlineable match
590 // to beat the score of good inlineable matches. This is a problem 662 // to beat the score of good inlineable matches. This is a problem
591 // because if a non-inlineable match ends up with the highest score 663 // because if a non-inlineable match ends up with the highest score
592 // from HistoryQuick provider, all HistoryQuick matches get demoted 664 // from HistoryQuick provider, all HistoryQuick matches get demoted
593 // to non-inlineable scores (scores less than 1200). Without 665 // to non-inlineable scores (scores less than 1200). Without
594 // HUP-like-scoring, these results would actually come from the HUP 666 // HUP-like-scoring, these results would actually come from the HUP
595 // and not be demoted, thus outscoring the demoted HQP results. 667 // and not be demoted, thus outscoring the demoted HQP results.
596 // When the HQP provides these, we need to clamp the non-inlineable 668 // When the HQP provides these, we need to clamp the non-inlineable
597 // results to preserve this behavior. 669 // results to preserve this behavior.
598 if (also_do_hup_like_scoring_) { 670 if (also_do_hup_like_scoring_) {
599 max_assigned_score_for_non_inlineable_matches_ = 671 max_assigned_score_for_non_inlineable_matches_ =
600 HistoryURLProvider::kScoreForBestInlineableResult - 1; 672 HistoryURLProvider::kScoreForBestInlineableResult - 1;
601 } 673 }
602 bookmark_value_ = OmniboxFieldTrial::HQPBookmarkValue(); 674 bookmark_value_ = OmniboxFieldTrial::HQPBookmarkValue();
603 allow_tld_matches_ = OmniboxFieldTrial::HQPAllowMatchInTLDValue(); 675 allow_tld_matches_ = OmniboxFieldTrial::HQPAllowMatchInTLDValue();
604 allow_scheme_matches_ = OmniboxFieldTrial::HQPAllowMatchInSchemeValue(); 676 allow_scheme_matches_ = OmniboxFieldTrial::HQPAllowMatchInSchemeValue();
677
678 // Initialize the HQP Experimental scoring params.
679 InitializeHQPExperimentalParams();
680
605 initialized_ = true; 681 initialized_ = true;
606 } 682 }
607 683
608 } // namespace history 684 } // namespace history
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698