OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/browser/history/scored_history_match.h" | 5 #include "chrome/browser/history/scored_history_match.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <functional> | 8 #include <functional> |
9 #include <iterator> | 9 #include <iterator> |
10 #include <numeric> | 10 #include <numeric> |
11 #include <set> | 11 #include <set> |
12 | 12 |
13 #include <math.h> | 13 #include <math.h> |
14 | 14 |
15 #include "base/logging.h" | 15 #include "base/logging.h" |
16 #include "base/metrics/histogram.h" | 16 #include "base/metrics/histogram.h" |
17 #include "base/strings/string_number_conversions.h" | |
18 #include "base/strings/string_split.h" | |
17 #include "base/strings/string_util.h" | 19 #include "base/strings/string_util.h" |
18 #include "base/strings/utf_string_conversions.h" | 20 #include "base/strings/utf_string_conversions.h" |
19 #include "chrome/browser/autocomplete/history_url_provider.h" | 21 #include "chrome/browser/autocomplete/history_url_provider.h" |
20 #include "components/bookmarks/browser/bookmark_utils.h" | 22 #include "components/bookmarks/browser/bookmark_utils.h" |
21 #include "components/history/core/browser/history_client.h" | 23 #include "components/history/core/browser/history_client.h" |
22 #include "components/omnibox/omnibox_field_trial.h" | 24 #include "components/omnibox/omnibox_field_trial.h" |
23 #include "components/omnibox/url_prefix.h" | 25 #include "components/omnibox/url_prefix.h" |
24 #include "content/public/browser/browser_thread.h" | 26 #include "content/public/browser/browser_thread.h" |
25 | 27 |
26 namespace history { | 28 namespace history { |
27 | 29 |
28 // ScoredHistoryMatch ---------------------------------------------------------- | 30 // ScoredHistoryMatch ---------------------------------------------------------- |
29 | 31 |
30 // static | 32 // static |
31 const size_t ScoredHistoryMatch::kMaxVisitsToScore = 10; | 33 const size_t ScoredHistoryMatch::kMaxVisitsToScore = 10; |
32 const int ScoredHistoryMatch::kDaysToPrecomputeRecencyScoresFor = 366; | 34 const int ScoredHistoryMatch::kDaysToPrecomputeRecencyScoresFor = 366; |
33 const int ScoredHistoryMatch::kMaxRawTermScore = 30; | 35 const int ScoredHistoryMatch::kMaxRawTermScore = 30; |
34 float* ScoredHistoryMatch::raw_term_score_to_topicality_score_ = NULL; | 36 float* ScoredHistoryMatch::raw_term_score_to_topicality_score_ = NULL; |
35 float* ScoredHistoryMatch::days_ago_to_recency_score_ = NULL; | 37 float* ScoredHistoryMatch::days_ago_to_recency_score_ = NULL; |
36 bool ScoredHistoryMatch::initialized_ = false; | 38 bool ScoredHistoryMatch::initialized_ = false; |
37 int ScoredHistoryMatch::bookmark_value_ = 1; | 39 int ScoredHistoryMatch::bookmark_value_ = 1; |
38 bool ScoredHistoryMatch::allow_tld_matches_ = false; | 40 bool ScoredHistoryMatch::allow_tld_matches_ = false; |
39 bool ScoredHistoryMatch::allow_scheme_matches_ = false; | 41 bool ScoredHistoryMatch::allow_scheme_matches_ = false; |
40 bool ScoredHistoryMatch::also_do_hup_like_scoring_ = false; | 42 bool ScoredHistoryMatch::also_do_hup_like_scoring_ = false; |
41 int ScoredHistoryMatch::max_assigned_score_for_non_inlineable_matches_ = -1; | 43 int ScoredHistoryMatch::max_assigned_score_for_non_inlineable_matches_ = -1; |
44 bool ScoredHistoryMatch::hqp_experimental_scoring_enabled_ = false; | |
45 float ScoredHistoryMatch::topicality_threshold_ = -1; | |
46 std::vector<ScoredHistoryMatch::ScoreMaxRelevance>* | |
47 ScoredHistoryMatch::hqp_relevance_buckets_ = NULL; | |
42 | 48 |
43 ScoredHistoryMatch::ScoredHistoryMatch() | 49 ScoredHistoryMatch::ScoredHistoryMatch() |
44 : raw_score_(0), | 50 : raw_score_(0), |
45 can_inline_(false) { | 51 can_inline_(false) { |
46 Init(); | 52 Init(); |
47 } | 53 } |
48 | 54 |
49 ScoredHistoryMatch::ScoredHistoryMatch( | 55 ScoredHistoryMatch::ScoredHistoryMatch( |
50 const URLRow& row, | 56 const URLRow& row, |
51 const VisitInfoVector& visits, | 57 const VisitInfoVector& visits, |
(...skipping 95 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
147 const int num_components_in_best_inlineable_prefix = | 153 const int num_components_in_best_inlineable_prefix = |
148 best_inlineable_prefix->num_components; | 154 best_inlineable_prefix->num_components; |
149 innermost_match = (num_components_in_best_inlineable_prefix == | 155 innermost_match = (num_components_in_best_inlineable_prefix == |
150 num_components_in_best_prefix); | 156 num_components_in_best_prefix); |
151 } | 157 } |
152 | 158 |
153 const float topicality_score = GetTopicalityScore( | 159 const float topicality_score = GetTopicalityScore( |
154 terms.size(), url, terms_to_word_starts_offsets, word_starts); | 160 terms.size(), url, terms_to_word_starts_offsets, word_starts); |
155 const float frequency_score = GetFrequency( | 161 const float frequency_score = GetFrequency( |
156 now, (history_client && history_client->IsBookmarked(gurl)), visits); | 162 now, (history_client && history_client->IsBookmarked(gurl)), visits); |
157 raw_score_ = GetFinalRelevancyScore(topicality_score, frequency_score); | 163 raw_score_ = GetFinalRelevancyScore(topicality_score, frequency_score, |
164 *hqp_relevance_buckets_); | |
158 raw_score_ = | 165 raw_score_ = |
159 (raw_score_ <= kint32max) ? static_cast<int>(raw_score_) : kint32max; | 166 (raw_score_ <= kint32max) ? static_cast<int>(raw_score_) : kint32max; |
160 | 167 |
161 if (also_do_hup_like_scoring_ && can_inline_) { | 168 if (also_do_hup_like_scoring_ && can_inline_) { |
162 // HistoryURL-provider-like scoring gives any match that is | 169 // HistoryURL-provider-like scoring gives any match that is |
163 // capable of being inlined a certain minimum score. Some of these | 170 // capable of being inlined a certain minimum score. Some of these |
164 // are given a higher score that lets them be shown in inline. | 171 // are given a higher score that lets them be shown in inline. |
165 // This test here derives from the test in | 172 // This test here derives from the test in |
166 // HistoryURLProvider::PromoteMatchForInlineAutocomplete(). | 173 // HistoryURLProvider::PromoteMatchForInlineAutocomplete(). |
167 const bool promote_to_inline = (row.typed_count() > 1) || | 174 const bool promote_to_inline = (row.typed_count() > 1) || |
(...skipping 263 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
431 // due to this test would look stupid if shown to the user. | 438 // due to this test would look stupid if shown to the user. |
432 if (term_scores[i] == 0) | 439 if (term_scores[i] == 0) |
433 return 0; | 440 return 0; |
434 topicality_score += raw_term_score_to_topicality_score_[ | 441 topicality_score += raw_term_score_to_topicality_score_[ |
435 (term_scores[i] >= kMaxRawTermScore) ? (kMaxRawTermScore - 1) : | 442 (term_scores[i] >= kMaxRawTermScore) ? (kMaxRawTermScore - 1) : |
436 term_scores[i]]; | 443 term_scores[i]]; |
437 } | 444 } |
438 // TODO(mpearson): If there are multiple terms, consider taking the | 445 // TODO(mpearson): If there are multiple terms, consider taking the |
439 // geometric mean of per-term scores rather than the arithmetic mean. | 446 // geometric mean of per-term scores rather than the arithmetic mean. |
440 | 447 |
441 return topicality_score / num_terms; | 448 float final_topicality_score = topicality_score / num_terms; |
449 | |
450 // Demote all the URLs if the topicality score is less than threshold. | |
451 if (hqp_experimental_scoring_enabled_ && | |
452 (final_topicality_score < topicality_threshold_)) { | |
453 return 0.0; | |
454 } | |
455 | |
456 return final_topicality_score; | |
442 } | 457 } |
443 | 458 |
444 // static | 459 // static |
445 void ScoredHistoryMatch::FillInTermScoreToTopicalityScoreArray() { | 460 void ScoredHistoryMatch::FillInTermScoreToTopicalityScoreArray() { |
446 for (int term_score = 0; term_score < kMaxRawTermScore; ++term_score) { | 461 for (int term_score = 0; term_score < kMaxRawTermScore; ++term_score) { |
447 float topicality_score; | 462 float topicality_score; |
448 if (term_score < 10) { | 463 if (term_score < 10) { |
449 // If the term scores less than 10 points (no full-credit hit, or | 464 // If the term scores less than 10 points (no full-credit hit, or |
450 // no combination of hits that score that well), then the topicality | 465 // no combination of hits that score that well), then the topicality |
451 // score is linear in the term score. | 466 // score is linear in the term score. |
(...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
535 if (bookmarked) | 550 if (bookmarked) |
536 value_of_transition = std::max(value_of_transition, bookmark_value_); | 551 value_of_transition = std::max(value_of_transition, bookmark_value_); |
537 const float bucket_weight = | 552 const float bucket_weight = |
538 GetRecencyScore((now - visits[i].first).InDays()); | 553 GetRecencyScore((now - visits[i].first).InDays()); |
539 summed_visit_points += (value_of_transition * bucket_weight); | 554 summed_visit_points += (value_of_transition * bucket_weight); |
540 } | 555 } |
541 return visits.size() * summed_visit_points / kMaxVisitsToScore; | 556 return visits.size() * summed_visit_points / kMaxVisitsToScore; |
542 } | 557 } |
543 | 558 |
544 // static | 559 // static |
545 float ScoredHistoryMatch::GetFinalRelevancyScore(float topicality_score, | 560 float ScoredHistoryMatch::GetFinalRelevancyScore( |
546 float frequency_score) { | 561 float topicality_score, |
562 float frequency_score, | |
563 const std::vector<ScoreMaxRelevance>& hqp_relevance_buckets) { | |
564 DCHECK(hqp_relevance_buckets.size() > 0); | |
565 | |
547 if (topicality_score == 0) | 566 if (topicality_score == 0) |
548 return 0; | 567 return 0; |
549 // Here's how to interpret intermediate_score: Suppose the omnibox | 568 // Here's how to interpret intermediate_score: Suppose the omnibox |
550 // has one input term. Suppose we have a URL for which the omnibox | 569 // has one input term. Suppose we have a URL for which the omnibox |
551 // input term has a single URL hostname hit at a word boundary. (This | 570 // input term has a single URL hostname hit at a word boundary. (This |
552 // implies topicality_score = 1.0.). Then the intermediate_score for | 571 // implies topicality_score = 1.0.). Then the intermediate_score for |
553 // this URL will depend entirely on the frequency_score with | 572 // this URL will depend entirely on the frequency_score with |
554 // this interpretation: | 573 // this interpretation: |
555 // - a single typed visit more than three months ago, no other visits -> 0.2 | 574 // - a single typed visit more than three months ago, no other visits -> 0.2 |
556 // - a visit every three days, no typed visits -> 0.706 | 575 // - a visit every three days, no typed visits -> 0.706 |
557 // - a visit every day, no typed visits -> 0.916 | 576 // - a visit every day, no typed visits -> 0.916 |
558 // - a single typed visit yesterday, no other visits -> 2.0 | 577 // - a single typed visit yesterday, no other visits -> 2.0 |
559 // - a typed visit once a week -> 11.77 | 578 // - a typed visit once a week -> 11.77 |
560 // - a typed visit every three days -> 14.12 | 579 // - a typed visit every three days -> 14.12 |
561 // - at least ten typed visits today -> 20.0 (maximum score) | 580 // - at least ten typed visits today -> 20.0 (maximum score) |
581 // | |
582 // The below code maps intermediate_score to the range [0, 1399]. | |
583 // For example: | |
584 // HQP default scoring buckets: "0.0:400,1.5:600,12.0:1300,20.0:1399" | |
585 // We will linearly interpolate the scores between: | |
586 // 0 to 1.5 --> 400 to 600 | |
587 // 1.5 to 12.0 --> 600 to 1300 | |
588 // 12.0 to 20.0 --> 1300 to 1399 | |
589 // >= 20.0 --> 1399 | |
590 // | |
591 // The score maxes out at 1399 (i.e., cannot beat a good inlineable result | |
592 // from HistoryURL provider) | |
Mark P
2015/02/18 00:03:32
nit: period.
Ashok vardhan
2015/02/18 01:11:21
Done.
| |
562 const float intermediate_score = topicality_score * frequency_score; | 593 const float intermediate_score = topicality_score * frequency_score; |
563 // The below code maps intermediate_score to the range [0, 1399]. | 594 |
564 // The score maxes out at 1400 (i.e., cannot beat a good inline result). | 595 double min_intermediate_score; |
Mark P
2015/02/18 00:03:32
If intermediate_score == 0, you end up using this
Ashok vardhan
2015/02/18 01:11:21
Yes, i have added ">=" after refactoring. You can
| |
565 if (intermediate_score <= 1) { | 596 float min_hqp_score; |
566 // Linearly extrapolate between 0 and 1.5 so 0 has a score of 400 | 597 // Find the threshold where intermediate score is greater than bucket. |
567 // and 1.5 has a score of 600. | 598 for (size_t i = 0; i < hqp_relevance_buckets.size(); ++i) { |
568 const float slope = (600 - 400) / (1.5f - 0.0f); | 599 const ScoreMaxRelevance& hqp_bucket = hqp_relevance_buckets[i]; |
569 return 400 + slope * intermediate_score; | 600 if (intermediate_score > hqp_bucket.first) { |
601 min_intermediate_score = hqp_bucket.first; | |
602 min_hqp_score = hqp_bucket.second; | |
603 } else { | |
604 const float slope = ((hqp_bucket.second - min_hqp_score) / | |
605 (hqp_bucket.first - min_intermediate_score)); | |
606 const float final_hqp_score = (min_hqp_score + | |
607 (slope * (intermediate_score - | |
608 min_intermediate_score))); | |
609 return final_hqp_score; | |
Mark P
2015/02/18 00:03:32
nit: simply return final_hqp_score; no need to use
Ashok vardhan
2015/02/18 01:11:21
Done.
| |
610 } | |
570 } | 611 } |
571 if (intermediate_score <= 12.0) { | 612 // It will reach this stage when the score is > highest bucket score. |
572 // Linearly extrapolate up to 12 so 12 has a score of 1300. | 613 return min_hqp_score; |
573 const float slope = (1300 - 600) / (12.0f - 1.5f); | |
574 return 600 + slope * (intermediate_score - 1.5); | |
575 } | |
576 // Linearly extrapolate so a score of 20 (or more) has a score of 1399. | |
577 // (Scores above 20 are possible for URLs that have multiple term hits | |
578 // in the URL and/or title and that are visited practically all | |
579 // the time using typed visits. We don't attempt to distinguish | |
580 // between these very good results.) | |
581 const float slope = (1399 - 1300) / (20.0f - 12.0f); | |
582 return std::min(1399.0, 1300 + slope * (intermediate_score - 12.0)); | |
583 } | 614 } |
584 | 615 |
616 // static | |
617 void ScoredHistoryMatch::InitializeHQPExperimentalParams() { | |
618 // These are default HQP relevance scoring buckets. | |
619 // See GetFinalRelevancyScore() for details. | |
620 std::string hqp_relevance_buckets_str = "0.0:400,1.5:600,12.0:1300,20.0:1399"; | |
621 | |
622 // Fetch the experiment params if they are any. | |
623 hqp_experimental_scoring_enabled_ = | |
624 OmniboxFieldTrial::HQPExperimentalScoringEnabled(); | |
625 | |
626 if (hqp_experimental_scoring_enabled_) { | |
627 // Add the topicality threshold from experiment params. | |
628 float hqp_experimental_topicality_threhold = | |
629 OmniboxFieldTrial::HQPExperimentalTopicalityThreshold(); | |
630 topicality_threshold_ = hqp_experimental_topicality_threhold; | |
631 | |
632 // Add the HQP experimental scoring buckets. | |
633 std::string hqp_experimental_scoring_buckets = | |
634 OmniboxFieldTrial::HQPExperimentalScoringBuckets(); | |
635 if (!hqp_experimental_scoring_buckets.empty()) | |
636 hqp_relevance_buckets_str = hqp_experimental_scoring_buckets; | |
637 } | |
638 | |
639 // Parse the hqp_relevance_buckets_str string once and store them in vector | |
640 // which is easy to access. | |
641 hqp_relevance_buckets_ = | |
642 new std::vector<ScoredHistoryMatch::ScoreMaxRelevance>(); | |
643 base::StringPairs kv_pairs; | |
644 if (base::SplitStringIntoKeyValuePairs(hqp_relevance_buckets_str, | |
645 ':', ',', &kv_pairs)) { | |
646 for (base::StringPairs::const_iterator it = kv_pairs.begin(); | |
647 it != kv_pairs.end(); ++it) { | |
648 ScoreMaxRelevance bucket; | |
649 base::StringToDouble(it->first, &bucket.first); | |
Mark P
2015/02/18 00:03:32
DCHECKS are not executed in non-debug builds, mean
Ashok vardhan
2015/02/18 01:11:21
I see, thanks.!
| |
650 base::StringToInt(it->second, &bucket.second); | |
651 hqp_relevance_buckets_->push_back(bucket); | |
652 } | |
653 } | |
654 } | |
655 | |
656 // static | |
585 void ScoredHistoryMatch::Init() { | 657 void ScoredHistoryMatch::Init() { |
586 if (initialized_) | 658 if (initialized_) |
587 return; | 659 return; |
588 also_do_hup_like_scoring_ = false; | 660 also_do_hup_like_scoring_ = false; |
589 // When doing HUP-like scoring, don't allow a non-inlineable match | 661 // When doing HUP-like scoring, don't allow a non-inlineable match |
590 // to beat the score of good inlineable matches. This is a problem | 662 // to beat the score of good inlineable matches. This is a problem |
591 // because if a non-inlineable match ends up with the highest score | 663 // because if a non-inlineable match ends up with the highest score |
592 // from HistoryQuick provider, all HistoryQuick matches get demoted | 664 // from HistoryQuick provider, all HistoryQuick matches get demoted |
593 // to non-inlineable scores (scores less than 1200). Without | 665 // to non-inlineable scores (scores less than 1200). Without |
594 // HUP-like-scoring, these results would actually come from the HUP | 666 // HUP-like-scoring, these results would actually come from the HUP |
595 // and not be demoted, thus outscoring the demoted HQP results. | 667 // and not be demoted, thus outscoring the demoted HQP results. |
596 // When the HQP provides these, we need to clamp the non-inlineable | 668 // When the HQP provides these, we need to clamp the non-inlineable |
597 // results to preserve this behavior. | 669 // results to preserve this behavior. |
598 if (also_do_hup_like_scoring_) { | 670 if (also_do_hup_like_scoring_) { |
599 max_assigned_score_for_non_inlineable_matches_ = | 671 max_assigned_score_for_non_inlineable_matches_ = |
600 HistoryURLProvider::kScoreForBestInlineableResult - 1; | 672 HistoryURLProvider::kScoreForBestInlineableResult - 1; |
601 } | 673 } |
602 bookmark_value_ = OmniboxFieldTrial::HQPBookmarkValue(); | 674 bookmark_value_ = OmniboxFieldTrial::HQPBookmarkValue(); |
603 allow_tld_matches_ = OmniboxFieldTrial::HQPAllowMatchInTLDValue(); | 675 allow_tld_matches_ = OmniboxFieldTrial::HQPAllowMatchInTLDValue(); |
604 allow_scheme_matches_ = OmniboxFieldTrial::HQPAllowMatchInSchemeValue(); | 676 allow_scheme_matches_ = OmniboxFieldTrial::HQPAllowMatchInSchemeValue(); |
677 | |
678 // Initialize the HQP Experimental scoring params. | |
679 InitializeHQPExperimentalParams(); | |
680 | |
605 initialized_ = true; | 681 initialized_ = true; |
606 } | 682 } |
607 | 683 |
608 } // namespace history | 684 } // namespace history |
OLD | NEW |