Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/browser/history/scored_history_match.h" | 5 #include "chrome/browser/history/scored_history_match.h" |
| 6 | 6 |
| 7 #include <algorithm> | 7 #include <algorithm> |
| 8 #include <functional> | 8 #include <functional> |
| 9 #include <iterator> | 9 #include <iterator> |
| 10 #include <numeric> | 10 #include <numeric> |
| 11 #include <set> | 11 #include <set> |
| 12 | 12 |
| 13 #include <math.h> | 13 #include <math.h> |
| 14 | 14 |
| 15 #include "base/logging.h" | 15 #include "base/logging.h" |
| 16 #include "base/metrics/histogram.h" | 16 #include "base/metrics/histogram.h" |
| 17 #include "base/strings/string_number_conversions.h" | |
| 18 #include "base/strings/string_split.h" | |
| 17 #include "base/strings/string_util.h" | 19 #include "base/strings/string_util.h" |
| 18 #include "base/strings/utf_string_conversions.h" | 20 #include "base/strings/utf_string_conversions.h" |
| 19 #include "chrome/browser/autocomplete/history_url_provider.h" | 21 #include "chrome/browser/autocomplete/history_url_provider.h" |
| 20 #include "components/bookmarks/browser/bookmark_utils.h" | 22 #include "components/bookmarks/browser/bookmark_utils.h" |
| 21 #include "components/history/core/browser/history_client.h" | 23 #include "components/history/core/browser/history_client.h" |
| 22 #include "components/omnibox/omnibox_field_trial.h" | 24 #include "components/omnibox/omnibox_field_trial.h" |
| 23 #include "components/omnibox/url_prefix.h" | 25 #include "components/omnibox/url_prefix.h" |
| 24 #include "content/public/browser/browser_thread.h" | 26 #include "content/public/browser/browser_thread.h" |
| 25 | 27 |
| 26 namespace history { | 28 namespace history { |
| 27 | 29 |
| 28 // ScoredHistoryMatch ---------------------------------------------------------- | 30 // ScoredHistoryMatch ---------------------------------------------------------- |
| 29 | 31 |
| 30 // static | 32 // static |
| 31 const size_t ScoredHistoryMatch::kMaxVisitsToScore = 10; | 33 const size_t ScoredHistoryMatch::kMaxVisitsToScore = 10; |
| 32 const int ScoredHistoryMatch::kDaysToPrecomputeRecencyScoresFor = 366; | 34 const int ScoredHistoryMatch::kDaysToPrecomputeRecencyScoresFor = 366; |
| 33 const int ScoredHistoryMatch::kMaxRawTermScore = 30; | 35 const int ScoredHistoryMatch::kMaxRawTermScore = 30; |
| 34 float* ScoredHistoryMatch::raw_term_score_to_topicality_score_ = NULL; | 36 float* ScoredHistoryMatch::raw_term_score_to_topicality_score_ = NULL; |
| 35 float* ScoredHistoryMatch::days_ago_to_recency_score_ = NULL; | 37 float* ScoredHistoryMatch::days_ago_to_recency_score_ = NULL; |
| 36 bool ScoredHistoryMatch::initialized_ = false; | 38 bool ScoredHistoryMatch::initialized_ = false; |
| 37 int ScoredHistoryMatch::bookmark_value_ = 1; | 39 int ScoredHistoryMatch::bookmark_value_ = 1; |
| 38 bool ScoredHistoryMatch::allow_tld_matches_ = false; | 40 bool ScoredHistoryMatch::allow_tld_matches_ = false; |
| 39 bool ScoredHistoryMatch::allow_scheme_matches_ = false; | 41 bool ScoredHistoryMatch::allow_scheme_matches_ = false; |
| 40 bool ScoredHistoryMatch::also_do_hup_like_scoring_ = false; | 42 bool ScoredHistoryMatch::also_do_hup_like_scoring_ = false; |
| 41 int ScoredHistoryMatch::max_assigned_score_for_non_inlineable_matches_ = -1; | 43 int ScoredHistoryMatch::max_assigned_score_for_non_inlineable_matches_ = -1; |
| 42 | 44 |
| 45 bool ScoredHistoryMatch::hqp_experimental_scoring_enabled_ = false; | |
|
Mark P
2015/02/14 01:27:14
(1) These static initializers should be in the ord
Ashok vardhan
2015/02/17 01:23:52
Done.
| |
| 46 float ScoredHistoryMatch::topicality_threshold_ = -1; | |
| 47 std::vector<ScoredHistoryMatch::ScoreMaxRelevance>* | |
| 48 ScoredHistoryMatch::hqp_relevance_buckets_ = NULL; | |
| 49 | |
| 43 ScoredHistoryMatch::ScoredHistoryMatch() | 50 ScoredHistoryMatch::ScoredHistoryMatch() |
| 44 : raw_score_(0), | 51 : raw_score_(0), |
| 45 can_inline_(false) { | 52 can_inline_(false) { |
| 46 Init(); | 53 Init(); |
| 47 } | 54 } |
| 48 | 55 |
| 49 ScoredHistoryMatch::ScoredHistoryMatch( | 56 ScoredHistoryMatch::ScoredHistoryMatch( |
| 50 const URLRow& row, | 57 const URLRow& row, |
| 51 const VisitInfoVector& visits, | 58 const VisitInfoVector& visits, |
| 52 const std::string& languages, | 59 const std::string& languages, |
| (...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 147 const int num_components_in_best_inlineable_prefix = | 154 const int num_components_in_best_inlineable_prefix = |
| 148 best_inlineable_prefix->num_components; | 155 best_inlineable_prefix->num_components; |
| 149 innermost_match = (num_components_in_best_inlineable_prefix == | 156 innermost_match = (num_components_in_best_inlineable_prefix == |
| 150 num_components_in_best_prefix); | 157 num_components_in_best_prefix); |
| 151 } | 158 } |
| 152 | 159 |
| 153 const float topicality_score = GetTopicalityScore( | 160 const float topicality_score = GetTopicalityScore( |
| 154 terms.size(), url, terms_to_word_starts_offsets, word_starts); | 161 terms.size(), url, terms_to_word_starts_offsets, word_starts); |
| 155 const float frequency_score = GetFrequency( | 162 const float frequency_score = GetFrequency( |
| 156 now, (history_client && history_client->IsBookmarked(gurl)), visits); | 163 now, (history_client && history_client->IsBookmarked(gurl)), visits); |
| 157 raw_score_ = GetFinalRelevancyScore(topicality_score, frequency_score); | 164 raw_score_ = GetFinalRelevancyScore(topicality_score, frequency_score, |
| 165 *hqp_relevance_buckets_); | |
| 158 raw_score_ = | 166 raw_score_ = |
| 159 (raw_score_ <= kint32max) ? static_cast<int>(raw_score_) : kint32max; | 167 (raw_score_ <= kint32max) ? static_cast<int>(raw_score_) : kint32max; |
| 160 | 168 |
| 161 if (also_do_hup_like_scoring_ && can_inline_) { | 169 if (also_do_hup_like_scoring_ && can_inline_) { |
| 162 // HistoryURL-provider-like scoring gives any match that is | 170 // HistoryURL-provider-like scoring gives any match that is |
| 163 // capable of being inlined a certain minimum score. Some of these | 171 // capable of being inlined a certain minimum score. Some of these |
| 164 // are given a higher score that lets them be shown in inline. | 172 // are given a higher score that lets them be shown in inline. |
| 165 // This test here derives from the test in | 173 // This test here derives from the test in |
| 166 // HistoryURLProvider::PromoteMatchForInlineAutocomplete(). | 174 // HistoryURLProvider::PromoteMatchForInlineAutocomplete(). |
| 167 const bool promote_to_inline = (row.typed_count() > 1) || | 175 const bool promote_to_inline = (row.typed_count() > 1) || |
| (...skipping 263 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 431 // due to this test would look stupid if shown to the user. | 439 // due to this test would look stupid if shown to the user. |
| 432 if (term_scores[i] == 0) | 440 if (term_scores[i] == 0) |
| 433 return 0; | 441 return 0; |
| 434 topicality_score += raw_term_score_to_topicality_score_[ | 442 topicality_score += raw_term_score_to_topicality_score_[ |
| 435 (term_scores[i] >= kMaxRawTermScore) ? (kMaxRawTermScore - 1) : | 443 (term_scores[i] >= kMaxRawTermScore) ? (kMaxRawTermScore - 1) : |
| 436 term_scores[i]]; | 444 term_scores[i]]; |
| 437 } | 445 } |
| 438 // TODO(mpearson): If there are multiple terms, consider taking the | 446 // TODO(mpearson): If there are multiple terms, consider taking the |
| 439 // geometric mean of per-term scores rather than the arithmetic mean. | 447 // geometric mean of per-term scores rather than the arithmetic mean. |
| 440 | 448 |
| 441 return topicality_score / num_terms; | 449 float final_topicality_score = topicality_score / num_terms; |
| 450 | |
| 451 // Demote all the URLs if the topicality score is less than threshold. | |
| 452 if (hqp_experimental_scoring_enabled_ && | |
| 453 (final_topicality_score < topicality_threshold_)) { | |
| 454 return 0.0; | |
| 455 } | |
| 456 | |
| 457 return final_topicality_score; | |
| 442 } | 458 } |
| 443 | 459 |
| 444 // static | 460 // static |
| 445 void ScoredHistoryMatch::FillInTermScoreToTopicalityScoreArray() { | 461 void ScoredHistoryMatch::FillInTermScoreToTopicalityScoreArray() { |
| 446 for (int term_score = 0; term_score < kMaxRawTermScore; ++term_score) { | 462 for (int term_score = 0; term_score < kMaxRawTermScore; ++term_score) { |
| 447 float topicality_score; | 463 float topicality_score; |
| 448 if (term_score < 10) { | 464 if (term_score < 10) { |
| 449 // If the term scores less than 10 points (no full-credit hit, or | 465 // If the term scores less than 10 points (no full-credit hit, or |
| 450 // no combination of hits that score that well), then the topicality | 466 // no combination of hits that score that well), then the topicality |
| 451 // score is linear in the term score. | 467 // score is linear in the term score. |
| (...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 535 if (bookmarked) | 551 if (bookmarked) |
| 536 value_of_transition = std::max(value_of_transition, bookmark_value_); | 552 value_of_transition = std::max(value_of_transition, bookmark_value_); |
| 537 const float bucket_weight = | 553 const float bucket_weight = |
| 538 GetRecencyScore((now - visits[i].first).InDays()); | 554 GetRecencyScore((now - visits[i].first).InDays()); |
| 539 summed_visit_points += (value_of_transition * bucket_weight); | 555 summed_visit_points += (value_of_transition * bucket_weight); |
| 540 } | 556 } |
| 541 return visits.size() * summed_visit_points / kMaxVisitsToScore; | 557 return visits.size() * summed_visit_points / kMaxVisitsToScore; |
| 542 } | 558 } |
| 543 | 559 |
| 544 // static | 560 // static |
| 545 float ScoredHistoryMatch::GetFinalRelevancyScore(float topicality_score, | 561 float ScoredHistoryMatch::GetFinalRelevancyScore( |
| 546 float frequency_score) { | 562 float topicality_score, |
| 563 float frequency_score, | |
| 564 const std::vector<ScoreMaxRelevance>& hqp_relevance_buckets) { | |
| 547 if (topicality_score == 0) | 565 if (topicality_score == 0) |
| 548 return 0; | 566 return 0; |
| 549 // Here's how to interpret intermediate_score: Suppose the omnibox | 567 // Here's how to interpret intermediate_score: Suppose the omnibox |
| 550 // has one input term. Suppose we have a URL for which the omnibox | 568 // has one input term. Suppose we have a URL for which the omnibox |
| 551 // input term has a single URL hostname hit at a word boundary. (This | 569 // input term has a single URL hostname hit at a word boundary. (This |
| 552 // implies topicality_score = 1.0.). Then the intermediate_score for | 570 // implies topicality_score = 1.0.). Then the intermediate_score for |
| 553 // this URL will depend entirely on the frequency_score with | 571 // this URL will depend entirely on the frequency_score with |
| 554 // this interpretation: | 572 // this interpretation: |
| 555 // - a single typed visit more than three months ago, no other visits -> 0.2 | 573 // - a single typed visit more than three months ago, no other visits -> 0.2 |
| 556 // - a visit every three days, no typed visits -> 0.706 | 574 // - a visit every three days, no typed visits -> 0.706 |
| 557 // - a visit every day, no typed visits -> 0.916 | 575 // - a visit every day, no typed visits -> 0.916 |
| 558 // - a single typed visit yesterday, no other visits -> 2.0 | 576 // - a single typed visit yesterday, no other visits -> 2.0 |
| 559 // - a typed visit once a week -> 11.77 | 577 // - a typed visit once a week -> 11.77 |
| 560 // - a typed visit every three days -> 14.12 | 578 // - a typed visit every three days -> 14.12 |
| 561 // - at least ten typed visits today -> 20.0 (maximum score) | 579 // - at least ten typed visits today -> 20.0 (maximum score) |
| 580 // | |
| 581 // The below code maps intermediate_score to the range [0, 1399]. | |
| 582 // For example: | |
| 583 // HQP default scoring buckets: "1.5:600,12.0:1300,20.0:1399" | |
|
Mark P
2015/02/14 01:27:13
I think you'll want your format to require having
Ashok vardhan
2015/02/17 01:23:52
Make sense. Thought all the minimum scores for pro
| |
| 584 // We will linearly interpolate the scores between: | |
| 585 // 0 to 1.5 --> 400 to 600 | |
| 586 // 1.5 to 12.0 --> 600 to 1300 | |
| 587 // 12.0 to 20.0 --> 1300 to 1399 | |
| 588 // >= 20.0 --> 1399 | |
| 589 // | |
| 590 // The score maxes out at 1400 (i.e., cannot beat a good inline result). | |
|
Mark P
2015/02/14 01:27:14
1400 -> 1399
also, good inline result -> good inli
Ashok vardhan
2015/02/17 01:23:52
Done.
| |
| 591 // | |
| 592 // If experimental scoring is enabled, then the score buckets will be like: | |
|
Mark P
2015/02/14 01:27:14
I don't know what line 592-593 are adding. Is it
Ashok vardhan
2015/02/17 01:23:52
Done.
| |
| 593 // HQP experimental scoring buckets: "1.5:600,5.0:900,12.0:1100,20.0:1300" | |
| 562 const float intermediate_score = topicality_score * frequency_score; | 594 const float intermediate_score = topicality_score * frequency_score; |
| 563 // The below code maps intermediate_score to the range [0, 1399]. | 595 |
| 564 // The score maxes out at 1400 (i.e., cannot beat a good inline result). | 596 double base_intermediate_score = 0.0; |
|
Mark P
2015/02/14 01:27:14
Please comment the variables in this block more or
Ashok vardhan
2015/02/17 01:23:52
Acknowledged.
| |
| 565 if (intermediate_score <= 1) { | 597 int base_hqp_score = 400; |
| 566 // Linearly extrapolate between 0 and 1.5 so 0 has a score of 400 | 598 double max_intermediate_score = base_intermediate_score; |
| 567 // and 1.5 has a score of 600. | 599 int max_hqp_score = base_hqp_score; |
| 568 const float slope = (600 - 400) / (1.5f - 0.0f); | 600 |
| 569 return 400 + slope * intermediate_score; | 601 // Find the threshold where intermediate score is greater than bucket. |
| 602 for (size_t i = 0; i < hqp_relevance_buckets.size(); ++i) { | |
| 603 ScoreMaxRelevance hqp_bucket = hqp_relevance_buckets[i]; | |
|
Mark P
2015/02/14 01:27:14
const &
Ashok vardhan
2015/02/17 01:23:52
Done.
| |
| 604 max_intermediate_score = hqp_bucket.first; | |
| 605 max_hqp_score = hqp_bucket.second; | |
| 606 if (intermediate_score <= max_intermediate_score) { | |
| 607 const float slope = ( | |
| 608 (max_hqp_score - base_hqp_score) / | |
| 609 (max_intermediate_score - base_intermediate_score)); | |
| 610 const int final_hqp_score = (base_hqp_score + | |
|
Mark P
2015/02/14 01:27:13
does the whole right side of the equals fit on one
Mark P
2015/02/14 01:27:14
This function returns a float. Don't force it to
Ashok vardhan
2015/02/17 01:23:52
Nope. And also i guess its easy to read if it is l
Ashok vardhan
2015/02/17 01:23:52
Done.
Ashok vardhan
2015/02/17 01:23:52
Done.
| |
| 611 (slope * (intermediate_score - | |
| 612 base_intermediate_score))); | |
| 613 return std::min(final_hqp_score, max_hqp_score); | |
|
Mark P
2015/02/14 01:27:13
Why is this min necessary?
Ashok vardhan
2015/02/17 01:23:52
Done.
| |
| 614 } | |
| 615 base_intermediate_score = max_intermediate_score; | |
| 616 base_hqp_score = max_hqp_score; | |
| 570 } | 617 } |
| 571 if (intermediate_score <= 12.0) { | 618 // It will reach this stage when the score is > highest bucket score or |
| 572 // Linearly extrapolate up to 12 so 12 has a score of 1300. | 619 // when buckets are not specified. Return max_hqp_score. |
|
Mark P
2015/02/14 01:27:14
Buckets should never be not specified. Correct th
Ashok vardhan
2015/02/17 01:23:52
Done.
| |
| 573 const float slope = (1300 - 600) / (12.0f - 1.5f); | 620 return max_hqp_score; |
| 574 return 600 + slope * (intermediate_score - 1.5); | |
| 575 } | |
| 576 // Linearly extrapolate so a score of 20 (or more) has a score of 1399. | |
| 577 // (Scores above 20 are possible for URLs that have multiple term hits | |
| 578 // in the URL and/or title and that are visited practically all | |
| 579 // the time using typed visits. We don't attempt to distinguish | |
| 580 // between these very good results.) | |
| 581 const float slope = (1399 - 1300) / (20.0f - 12.0f); | |
| 582 return std::min(1399.0, 1300 + slope * (intermediate_score - 12.0)); | |
| 583 } | 621 } |
| 584 | 622 |
| 623 // static | |
| 624 void ScoredHistoryMatch::InitializeHQPExperimentalParams() { | |
| 625 // These are default HQP relevance scoring buckets. | |
| 626 // See GetFinalRelevancyScore() for details. | |
| 627 std::string hqp_relevance_buckets_str = "1.5:600,12.0:1300,20.0:1399"; | |
| 628 | |
| 629 // Fetch the experiment params if they are any. | |
| 630 hqp_experimental_scoring_enabled_ = | |
| 631 OmniboxFieldTrial::HQPExperimentalScoringEnabled(); | |
| 632 | |
| 633 if (hqp_experimental_scoring_enabled_) { | |
| 634 // Add the topicality threshold from experiment params. | |
| 635 float hqp_experimental_topicality_threhold = | |
| 636 OmniboxFieldTrial::HQPExperimentalTopicalityThreshold(); | |
| 637 if (hqp_experimental_topicality_threhold > 0) | |
|
Mark P
2015/02/14 01:27:14
Why have a >0 test? If you're checking for unspec
Ashok vardhan
2015/02/17 01:23:52
Done.
| |
| 638 topicality_threshold_ = hqp_experimental_topicality_threhold; | |
| 639 | |
| 640 // Add the HQP experimental scoring buckets. | |
| 641 std::string hqp_experimental_scoring_buckets = | |
| 642 OmniboxFieldTrial::HQPExperimentalScoringBuckets(); | |
| 643 if (!hqp_experimental_scoring_buckets.empty()) | |
| 644 hqp_relevance_buckets_str = hqp_experimental_scoring_buckets; | |
| 645 } | |
| 646 | |
| 647 // Parse the hqp_relevance_buckets_str string once and store them in vector | |
| 648 // which is easy to access. | |
| 649 hqp_relevance_buckets_ = | |
| 650 new std::vector<ScoredHistoryMatch::ScoreMaxRelevance>(); | |
| 651 base::StringPairs kv_pairs; | |
| 652 if (base::SplitStringIntoKeyValuePairs(hqp_relevance_buckets_str, | |
| 653 ':', ',', &kv_pairs)) { | |
| 654 for (base::StringPairs::const_iterator it = kv_pairs.begin(); | |
| 655 it != kv_pairs.end(); ++it) { | |
| 656 ScoreMaxRelevance bucket; | |
| 657 base::StringToDouble(it->first, &bucket.first); | |
|
Mark P
2015/02/14 01:27:14
minor nit: consider DCHECKING the return value for
Ashok vardhan
2015/02/17 01:23:52
DCHECK seems to be not working here.
std::strin
| |
| 658 base::StringToInt(it->second, &bucket.second); | |
| 659 hqp_relevance_buckets_->push_back(bucket); | |
| 660 } | |
| 661 } | |
| 662 } | |
| 663 | |
| 664 // static | |
| 585 void ScoredHistoryMatch::Init() { | 665 void ScoredHistoryMatch::Init() { |
| 586 if (initialized_) | 666 if (initialized_) |
| 587 return; | 667 return; |
| 588 also_do_hup_like_scoring_ = false; | 668 also_do_hup_like_scoring_ = false; |
| 589 // When doing HUP-like scoring, don't allow a non-inlineable match | 669 // When doing HUP-like scoring, don't allow a non-inlineable match |
| 590 // to beat the score of good inlineable matches. This is a problem | 670 // to beat the score of good inlineable matches. This is a problem |
| 591 // because if a non-inlineable match ends up with the highest score | 671 // because if a non-inlineable match ends up with the highest score |
| 592 // from HistoryQuick provider, all HistoryQuick matches get demoted | 672 // from HistoryQuick provider, all HistoryQuick matches get demoted |
| 593 // to non-inlineable scores (scores less than 1200). Without | 673 // to non-inlineable scores (scores less than 1200). Without |
| 594 // HUP-like-scoring, these results would actually come from the HUP | 674 // HUP-like-scoring, these results would actually come from the HUP |
| 595 // and not be demoted, thus outscoring the demoted HQP results. | 675 // and not be demoted, thus outscoring the demoted HQP results. |
| 596 // When the HQP provides these, we need to clamp the non-inlineable | 676 // When the HQP provides these, we need to clamp the non-inlineable |
| 597 // results to preserve this behavior. | 677 // results to preserve this behavior. |
| 598 if (also_do_hup_like_scoring_) { | 678 if (also_do_hup_like_scoring_) { |
| 599 max_assigned_score_for_non_inlineable_matches_ = | 679 max_assigned_score_for_non_inlineable_matches_ = |
| 600 HistoryURLProvider::kScoreForBestInlineableResult - 1; | 680 HistoryURLProvider::kScoreForBestInlineableResult - 1; |
| 601 } | 681 } |
| 602 bookmark_value_ = OmniboxFieldTrial::HQPBookmarkValue(); | 682 bookmark_value_ = OmniboxFieldTrial::HQPBookmarkValue(); |
| 603 allow_tld_matches_ = OmniboxFieldTrial::HQPAllowMatchInTLDValue(); | 683 allow_tld_matches_ = OmniboxFieldTrial::HQPAllowMatchInTLDValue(); |
| 604 allow_scheme_matches_ = OmniboxFieldTrial::HQPAllowMatchInSchemeValue(); | 684 allow_scheme_matches_ = OmniboxFieldTrial::HQPAllowMatchInSchemeValue(); |
| 685 | |
| 686 // Initialize the HQP Experimental scoring params. | |
| 687 InitializeHQPExperimentalParams(); | |
| 688 | |
| 605 initialized_ = true; | 689 initialized_ = true; |
| 606 } | 690 } |
| 607 | 691 |
| 608 } // namespace history | 692 } // namespace history |
| OLD | NEW |