Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(401)

Side by Side Diff: chrome/browser/history/scored_history_match.cc

Issue 905023003: Adding knobs on HQP provider. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Addressing bart comments. PTAL. Created 5 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/history/scored_history_match.h" 5 #include "chrome/browser/history/scored_history_match.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <functional> 8 #include <functional>
9 #include <iterator> 9 #include <iterator>
10 #include <numeric> 10 #include <numeric>
11 #include <set> 11 #include <set>
12 12
13 #include <math.h> 13 #include <math.h>
14 14
15 #include "base/logging.h" 15 #include "base/logging.h"
16 #include "base/metrics/histogram.h" 16 #include "base/metrics/histogram.h"
17 #include "base/strings/string_number_conversions.h"
18 #include "base/strings/string_split.h"
17 #include "base/strings/string_util.h" 19 #include "base/strings/string_util.h"
18 #include "base/strings/utf_string_conversions.h" 20 #include "base/strings/utf_string_conversions.h"
19 #include "chrome/browser/autocomplete/history_url_provider.h" 21 #include "chrome/browser/autocomplete/history_url_provider.h"
20 #include "components/bookmarks/browser/bookmark_utils.h" 22 #include "components/bookmarks/browser/bookmark_utils.h"
21 #include "components/history/core/browser/history_client.h" 23 #include "components/history/core/browser/history_client.h"
22 #include "components/omnibox/omnibox_field_trial.h" 24 #include "components/omnibox/omnibox_field_trial.h"
23 #include "components/omnibox/url_prefix.h" 25 #include "components/omnibox/url_prefix.h"
24 #include "content/public/browser/browser_thread.h" 26 #include "content/public/browser/browser_thread.h"
25 27
26 namespace history { 28 namespace history {
(...skipping 27 matching lines...) Expand all
54 const String16Vector& terms, 56 const String16Vector& terms,
55 const WordStarts& terms_to_word_starts_offsets, 57 const WordStarts& terms_to_word_starts_offsets,
56 const RowWordStarts& word_starts, 58 const RowWordStarts& word_starts,
57 const base::Time now, 59 const base::Time now,
58 HistoryClient* history_client) 60 HistoryClient* history_client)
59 : HistoryMatch(row, 0, false, false), 61 : HistoryMatch(row, 0, false, false),
60 raw_score_(0), 62 raw_score_(0),
61 can_inline_(false) { 63 can_inline_(false) {
62 Init(); 64 Init();
63 65
66 // Initialize the HQP scoring params.
67 InitializeHQPExperimentalParams();
68
64 GURL gurl = row.url(); 69 GURL gurl = row.url();
65 if (!gurl.is_valid()) 70 if (!gurl.is_valid())
66 return; 71 return;
67 72
68 // Figure out where each search term appears in the URL and/or page title 73 // Figure out where each search term appears in the URL and/or page title
69 // so that we can score as well as provide autocomplete highlighting. 74 // so that we can score as well as provide autocomplete highlighting.
70 base::OffsetAdjuster::Adjustments adjustments; 75 base::OffsetAdjuster::Adjustments adjustments;
71 base::string16 url = 76 base::string16 url =
72 bookmarks::CleanUpUrlForMatching(gurl, languages, &adjustments); 77 bookmarks::CleanUpUrlForMatching(gurl, languages, &adjustments);
73 base::string16 title = bookmarks::CleanUpTitleForMatching(row.title()); 78 base::string16 title = bookmarks::CleanUpTitleForMatching(row.title());
(...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after
147 const int num_components_in_best_inlineable_prefix = 152 const int num_components_in_best_inlineable_prefix =
148 best_inlineable_prefix->num_components; 153 best_inlineable_prefix->num_components;
149 innermost_match = (num_components_in_best_inlineable_prefix == 154 innermost_match = (num_components_in_best_inlineable_prefix ==
150 num_components_in_best_prefix); 155 num_components_in_best_prefix);
151 } 156 }
152 157
153 const float topicality_score = GetTopicalityScore( 158 const float topicality_score = GetTopicalityScore(
154 terms.size(), url, terms_to_word_starts_offsets, word_starts); 159 terms.size(), url, terms_to_word_starts_offsets, word_starts);
155 const float frequency_score = GetFrequency( 160 const float frequency_score = GetFrequency(
156 now, (history_client && history_client->IsBookmarked(gurl)), visits); 161 now, (history_client && history_client->IsBookmarked(gurl)), visits);
157 raw_score_ = GetFinalRelevancyScore(topicality_score, frequency_score); 162 raw_score_ = GetFinalRelevancyScore(topicality_score, frequency_score,
163 hqp_relevance_buckets_);
158 raw_score_ = 164 raw_score_ =
159 (raw_score_ <= kint32max) ? static_cast<int>(raw_score_) : kint32max; 165 (raw_score_ <= kint32max) ? static_cast<int>(raw_score_) : kint32max;
160 166
161 if (also_do_hup_like_scoring_ && can_inline_) { 167 if (also_do_hup_like_scoring_ && can_inline_) {
162 // HistoryURL-provider-like scoring gives any match that is 168 // HistoryURL-provider-like scoring gives any match that is
163 // capable of being inlined a certain minimum score. Some of these 169 // capable of being inlined a certain minimum score. Some of these
164 // are given a higher score that lets them be shown in inline. 170 // are given a higher score that lets them be shown in inline.
165 // This test here derives from the test in 171 // This test here derives from the test in
166 // HistoryURLProvider::PromoteMatchForInlineAutocomplete(). 172 // HistoryURLProvider::PromoteMatchForInlineAutocomplete().
167 const bool promote_to_inline = (row.typed_count() > 1) || 173 const bool promote_to_inline = (row.typed_count() > 1) ||
(...skipping 263 matching lines...) Expand 10 before | Expand all | Expand 10 after
431 // due to this test would look stupid if shown to the user. 437 // due to this test would look stupid if shown to the user.
432 if (term_scores[i] == 0) 438 if (term_scores[i] == 0)
433 return 0; 439 return 0;
434 topicality_score += raw_term_score_to_topicality_score_[ 440 topicality_score += raw_term_score_to_topicality_score_[
435 (term_scores[i] >= kMaxRawTermScore) ? (kMaxRawTermScore - 1) : 441 (term_scores[i] >= kMaxRawTermScore) ? (kMaxRawTermScore - 1) :
436 term_scores[i]]; 442 term_scores[i]];
437 } 443 }
438 // TODO(mpearson): If there are multiple terms, consider taking the 444 // TODO(mpearson): If there are multiple terms, consider taking the
439 // geometric mean of per-term scores rather than the arithmetic mean. 445 // geometric mean of per-term scores rather than the arithmetic mean.
440 446
441 return topicality_score / num_terms; 447 float final_topicality_score = topicality_score / num_terms;
448
449 // Demote all the URLs if the topicality score is less than threshold.
450 if (hqp_experimental_scoring_enabled_ &&
451 (final_topicality_score < topicality_threshold_)) {
452 return 0.0;
453 }
454
455 return final_topicality_score;
442 } 456 }
443 457
444 // static 458 // static
445 void ScoredHistoryMatch::FillInTermScoreToTopicalityScoreArray() { 459 void ScoredHistoryMatch::FillInTermScoreToTopicalityScoreArray() {
446 for (int term_score = 0; term_score < kMaxRawTermScore; ++term_score) { 460 for (int term_score = 0; term_score < kMaxRawTermScore; ++term_score) {
447 float topicality_score; 461 float topicality_score;
448 if (term_score < 10) { 462 if (term_score < 10) {
449 // If the term scores less than 10 points (no full-credit hit, or 463 // If the term scores less than 10 points (no full-credit hit, or
450 // no combination of hits that score that well), then the topicality 464 // no combination of hits that score that well), then the topicality
451 // score is linear in the term score. 465 // score is linear in the term score.
(...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after
535 if (bookmarked) 549 if (bookmarked)
536 value_of_transition = std::max(value_of_transition, bookmark_value_); 550 value_of_transition = std::max(value_of_transition, bookmark_value_);
537 const float bucket_weight = 551 const float bucket_weight =
538 GetRecencyScore((now - visits[i].first).InDays()); 552 GetRecencyScore((now - visits[i].first).InDays());
539 summed_visit_points += (value_of_transition * bucket_weight); 553 summed_visit_points += (value_of_transition * bucket_weight);
540 } 554 }
541 return visits.size() * summed_visit_points / kMaxVisitsToScore; 555 return visits.size() * summed_visit_points / kMaxVisitsToScore;
542 } 556 }
543 557
544 // static 558 // static
545 float ScoredHistoryMatch::GetFinalRelevancyScore(float topicality_score, 559 float ScoredHistoryMatch::GetFinalRelevancyScore(
546 float frequency_score) { 560 float topicality_score,
561 float frequency_score,
562 std::vector<ScoreMaxRelevance>& hqp_relevance_buckets) {
547 if (topicality_score == 0) 563 if (topicality_score == 0)
548 return 0; 564 return 0;
549 // Here's how to interpret intermediate_score: Suppose the omnibox 565 // Here's how to interpret intermediate_score: Suppose the omnibox
550 // has one input term. Suppose we have a URL for which the omnibox 566 // has one input term. Suppose we have a URL for which the omnibox
551 // input term has a single URL hostname hit at a word boundary. (This 567 // input term has a single URL hostname hit at a word boundary. (This
552 // implies topicality_score = 1.0.). Then the intermediate_score for 568 // implies topicality_score = 1.0.). Then the intermediate_score for
553 // this URL will depend entirely on the frequency_score with 569 // this URL will depend entirely on the frequency_score with
554 // this interpretation: 570 // this interpretation:
555 // - a single typed visit more than three months ago, no other visits -> 0.2 571 // - a single typed visit more than three months ago, no other visits -> 0.2
556 // - a visit every three days, no typed visits -> 0.706 572 // - a visit every three days, no typed visits -> 0.706
557 // - a visit every day, no typed visits -> 0.916 573 // - a visit every day, no typed visits -> 0.916
558 // - a single typed visit yesterday, no other visits -> 2.0 574 // - a single typed visit yesterday, no other visits -> 2.0
559 // - a typed visit once a week -> 11.77 575 // - a typed visit once a week -> 11.77
560 // - a typed visit every three days -> 14.12 576 // - a typed visit every three days -> 14.12
561 // - at least ten typed visits today -> 20.0 (maximum score) 577 // - at least ten typed visits today -> 20.0 (maximum score)
578 //
579 // The below code maps intermediate_score to the range [0, 1399].
580 // For example:
581 // HQP default scoring buckets: "1.5:600,12.0:1300,20.0:1399"
582 // We will linearly interpolate the scores between:
583 // 0 to 1.5 --> 400 to 600
584 // 1.5 to 12.0 --> 600 to 1300
585 // 12.0 to 20.0 --> 1300 to 1399
586 // >= 20.0 --> 1399
587 //
588 // The score maxes out at 1400 (i.e., cannot beat a good inline result).
589 //
590 // If experimental scoring is enabled, then the score buckets will be like:
591 // HQP experimental scoring buckets: "1.5:600,5.0:900,12.0:1100,20.0:1300"
562 const float intermediate_score = topicality_score * frequency_score; 592 const float intermediate_score = topicality_score * frequency_score;
563 // The below code maps intermediate_score to the range [0, 1399]. 593
564 // The score maxes out at 1400 (i.e., cannot beat a good inline result). 594 double base_intermediate_score = 0.0;
565 if (intermediate_score <= 1) { 595 int base_hqp_score = 400;
566 // Linearly extrapolate between 0 and 1.5 so 0 has a score of 400 596 double max_intermediate_score = base_intermediate_score;
567 // and 1.5 has a score of 600. 597 int max_hqp_score = base_hqp_score;
568 const float slope = (600 - 400) / (1.5f - 0.0f); 598
569 return 400 + slope * intermediate_score; 599 // Find the threshold where intermediate score is greater than bucket.
600 for (size_t i = 0; i < hqp_relevance_buckets.size(); ++i) {
601 ScoreMaxRelevance hqp_bucket = hqp_relevance_buckets[i];
602 max_intermediate_score = hqp_bucket.first;
603 max_hqp_score = hqp_bucket.second;
604 if (intermediate_score <= max_intermediate_score) {
605 const float slope = (
606 (max_hqp_score - base_hqp_score) /
607 (max_intermediate_score - base_intermediate_score));
608 const int final_hqp_score = (base_hqp_score +
609 (slope * (intermediate_score -
610 base_intermediate_score)));
611 return std::min(final_hqp_score, max_hqp_score);
612 }
613 base_intermediate_score = max_intermediate_score;
614 base_hqp_score = max_hqp_score;
570 } 615 }
571 if (intermediate_score <= 12.0) { 616 // It will reach this stage when the score is > highest bucket score or
572 // Linearly extrapolate up to 12 so 12 has a score of 1300. 617 // when buckets are not specified. Return max_hqp_score.
573 const float slope = (1300 - 600) / (12.0f - 1.5f); 618 return max_hqp_score;
574 return 600 + slope * (intermediate_score - 1.5);
575 }
576 // Linearly extrapolate so a score of 20 (or more) has a score of 1399.
577 // (Scores above 20 are possible for URLs that have multiple term hits
578 // in the URL and/or title and that are visited practically all
579 // the time using typed visits. We don't attempt to distinguish
580 // between these very good results.)
581 const float slope = (1399 - 1300) / (20.0f - 12.0f);
582 return std::min(1399.0, 1300 + slope * (intermediate_score - 12.0));
583 } 619 }
584 620
621 void ScoredHistoryMatch::InitializeHQPExperimentalParams() {
622 // Initialize the hqp experiment params.
623 hqp_experimental_scoring_enabled_ = false;
624 topicality_threshold_ = -1;
625 // These are default HQP scoring params.
626 // See GetFinalRelevancyScore() for details.
627 std::string hqp_relevance_buckets_str = "1.5:600,12.0:1300,20.0:1399";
628
629 // Fetch the experiment params if they are any.
630 hqp_experimental_scoring_enabled_ =
631 OmniboxFieldTrial::HQPExperimentalScoringEnabled();
632
633 if (hqp_experimental_scoring_enabled_) {
634 // Add the topicality threshold from experiment params.
635 float hqp_experimental_topicality_threhold =
636 OmniboxFieldTrial::HQPExperimentalTopicalityThreshold();
637 if (hqp_experimental_topicality_threhold > 0)
638 topicality_threshold_ = hqp_experimental_topicality_threhold;
Bart N. 2015/02/11 01:30:15 Extra space after =.
Ashok vardhan 2015/02/11 19:06:54 Done.
639
640 // Add the HQP experimental scoring buckets.
641 std::string hqp_experimental_scoring_buckets =
642 OmniboxFieldTrial::HQPExperimentalScoringBuckets();
643 if (!hqp_experimental_scoring_buckets.empty()) {
Bart N. 2015/02/11 01:30:15 You should be consistent with how you use {} aroun
Ashok vardhan 2015/02/11 19:06:54 Done.
644 hqp_relevance_buckets_str = hqp_experimental_scoring_buckets;
645 }
646 }
647
648 // Parse the hqp_relevance_buckets_str string once and store them in vector
649 // which is easy to access.
650 base::StringPairs kv_pairs;
651 if (base::SplitStringIntoKeyValuePairs(hqp_relevance_buckets_str,
652 ':', ',', &kv_pairs)) {
653 for (base::StringPairs::const_iterator it = kv_pairs.begin();
654 it != kv_pairs.end(); ++it) {
655 ScoreMaxRelevance bucket;
656 base::StringToDouble(it->first, &bucket.first);
657 base::StringToInt(it->second, &bucket.second);
658 hqp_relevance_buckets_.push_back(bucket);
659 }
660 }
661 }
662
663 // static
585 void ScoredHistoryMatch::Init() { 664 void ScoredHistoryMatch::Init() {
586 if (initialized_) 665 if (initialized_)
587 return; 666 return;
588 also_do_hup_like_scoring_ = false; 667 also_do_hup_like_scoring_ = false;
589 // When doing HUP-like scoring, don't allow a non-inlineable match 668 // When doing HUP-like scoring, don't allow a non-inlineable match
590 // to beat the score of good inlineable matches. This is a problem 669 // to beat the score of good inlineable matches. This is a problem
591 // because if a non-inlineable match ends up with the highest score 670 // because if a non-inlineable match ends up with the highest score
592 // from HistoryQuick provider, all HistoryQuick matches get demoted 671 // from HistoryQuick provider, all HistoryQuick matches get demoted
593 // to non-inlineable scores (scores less than 1200). Without 672 // to non-inlineable scores (scores less than 1200). Without
594 // HUP-like-scoring, these results would actually come from the HUP 673 // HUP-like-scoring, these results would actually come from the HUP
595 // and not be demoted, thus outscoring the demoted HQP results. 674 // and not be demoted, thus outscoring the demoted HQP results.
596 // When the HQP provides these, we need to clamp the non-inlineable 675 // When the HQP provides these, we need to clamp the non-inlineable
597 // results to preserve this behavior. 676 // results to preserve this behavior.
598 if (also_do_hup_like_scoring_) { 677 if (also_do_hup_like_scoring_) {
599 max_assigned_score_for_non_inlineable_matches_ = 678 max_assigned_score_for_non_inlineable_matches_ =
600 HistoryURLProvider::kScoreForBestInlineableResult - 1; 679 HistoryURLProvider::kScoreForBestInlineableResult - 1;
601 } 680 }
602 bookmark_value_ = OmniboxFieldTrial::HQPBookmarkValue(); 681 bookmark_value_ = OmniboxFieldTrial::HQPBookmarkValue();
603 allow_tld_matches_ = OmniboxFieldTrial::HQPAllowMatchInTLDValue(); 682 allow_tld_matches_ = OmniboxFieldTrial::HQPAllowMatchInTLDValue();
604 allow_scheme_matches_ = OmniboxFieldTrial::HQPAllowMatchInSchemeValue(); 683 allow_scheme_matches_ = OmniboxFieldTrial::HQPAllowMatchInSchemeValue();
684
605 initialized_ = true; 685 initialized_ = true;
606 } 686 }
607 687
608 } // namespace history 688 } // namespace history
OLDNEW
« no previous file with comments | « chrome/browser/history/scored_history_match.h ('k') | chrome/browser/history/scored_history_match_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698