| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/browser/history/scored_history_match.h" | 5 #include "chrome/browser/history/scored_history_match.h" |
| 6 | 6 |
| 7 #include <algorithm> | 7 #include <algorithm> |
| 8 #include <functional> | 8 #include <functional> |
| 9 #include <iterator> | 9 #include <iterator> |
| 10 #include <numeric> | 10 #include <numeric> |
| 11 #include <set> | 11 #include <set> |
| 12 | 12 |
| 13 #include <math.h> | 13 #include <math.h> |
| 14 | 14 |
| 15 #include "base/metrics/histogram.h" | 15 #include "base/metrics/histogram.h" |
| 16 #include "base/strings/string_util.h" | 16 #include "base/strings/string_util.h" |
| 17 #include "base/strings/utf_string_conversions.h" | 17 #include "base/strings/utf_string_conversions.h" |
| 18 #include "chrome/browser/autocomplete/history_url_provider.h" | 18 #include "chrome/browser/autocomplete/history_url_provider.h" |
| 19 #include "chrome/browser/autocomplete/url_prefix.h" | 19 #include "chrome/browser/autocomplete/url_prefix.h" |
| 20 #include "chrome/browser/bookmarks/bookmark_service.h" | 20 #include "chrome/browser/bookmarks/bookmark_service.h" |
| 21 #include "chrome/browser/omnibox/omnibox_field_trial.h" |
| 21 #include "chrome/common/chrome_switches.h" | 22 #include "chrome/common/chrome_switches.h" |
| 22 #include "content/public/browser/browser_thread.h" | 23 #include "content/public/browser/browser_thread.h" |
| 23 | 24 |
| 24 namespace history { | 25 namespace history { |
| 25 | 26 |
| 26 // ScoredHistoryMatch ---------------------------------------------------------- | 27 // ScoredHistoryMatch ---------------------------------------------------------- |
| 27 | 28 |
| 28 // static | 29 // static |
| 29 const size_t ScoredHistoryMatch::kMaxVisitsToScore = 10u; | 30 const size_t ScoredHistoryMatch::kMaxVisitsToScore = 10u; |
| 30 const int ScoredHistoryMatch::kDaysToPrecomputeRecencyScoresFor = 366; | 31 const int ScoredHistoryMatch::kDaysToPrecomputeRecencyScoresFor = 366; |
| 31 const int ScoredHistoryMatch::kMaxRawTermScore = 30; | 32 const int ScoredHistoryMatch::kMaxRawTermScore = 30; |
| 32 float* ScoredHistoryMatch::raw_term_score_to_topicality_score_ = NULL; | 33 float* ScoredHistoryMatch::raw_term_score_to_topicality_score_ = NULL; |
| 33 float* ScoredHistoryMatch::days_ago_to_recency_score_ = NULL; | 34 float* ScoredHistoryMatch::days_ago_to_recency_score_ = NULL; |
| 34 bool ScoredHistoryMatch::initialized_ = false; | 35 bool ScoredHistoryMatch::initialized_ = false; |
| 36 int ScoredHistoryMatch::bookmark_value_ = 1; |
| 35 bool ScoredHistoryMatch::also_do_hup_like_scoring_ = false; | 37 bool ScoredHistoryMatch::also_do_hup_like_scoring_ = false; |
| 36 int ScoredHistoryMatch::max_assigned_score_for_non_inlineable_matches_ = -1; | 38 int ScoredHistoryMatch::max_assigned_score_for_non_inlineable_matches_ = -1; |
| 37 | 39 |
| 38 ScoredHistoryMatch::ScoredHistoryMatch() | 40 ScoredHistoryMatch::ScoredHistoryMatch() |
| 39 : raw_score_(0), | 41 : raw_score_(0), |
| 40 can_inline_(false) { | 42 can_inline_(false) { |
| 41 if (!initialized_) { | 43 Init(); |
| 42 InitializeAlsoDoHUPLikeScoringFieldAndMaxScoreField(); | |
| 43 initialized_ = true; | |
| 44 } | |
| 45 } | 44 } |
| 46 | 45 |
| 47 ScoredHistoryMatch::ScoredHistoryMatch(const URLRow& row, | 46 ScoredHistoryMatch::ScoredHistoryMatch(const URLRow& row, |
| 48 const VisitInfoVector& visits, | 47 const VisitInfoVector& visits, |
| 49 const std::string& languages, | 48 const std::string& languages, |
| 50 const string16& lower_string, | 49 const string16& lower_string, |
| 51 const String16Vector& terms, | 50 const String16Vector& terms, |
| 52 const RowWordStarts& word_starts, | 51 const RowWordStarts& word_starts, |
| 53 const base::Time now, | 52 const base::Time now, |
| 54 BookmarkService* bookmark_service) | 53 BookmarkService* bookmark_service) |
| 55 : HistoryMatch(row, 0, false, false), | 54 : HistoryMatch(row, 0, false, false), |
| 56 raw_score_(0), | 55 raw_score_(0), |
| 57 can_inline_(false) { | 56 can_inline_(false) { |
| 58 if (!initialized_) { | 57 Init(); |
| 59 InitializeAlsoDoHUPLikeScoringFieldAndMaxScoreField(); | |
| 60 initialized_ = true; | |
| 61 } | |
| 62 | 58 |
| 63 GURL gurl = row.url(); | 59 GURL gurl = row.url(); |
| 64 if (!gurl.is_valid()) | 60 if (!gurl.is_valid()) |
| 65 return; | 61 return; |
| 66 | 62 |
| 67 // Figure out where each search term appears in the URL and/or page title | 63 // Figure out where each search term appears in the URL and/or page title |
| 68 // so that we can score as well as provide autocomplete highlighting. | 64 // so that we can score as well as provide autocomplete highlighting. |
| 69 string16 url = CleanUpUrlForMatching(gurl, languages); | 65 string16 url = CleanUpUrlForMatching(gurl, languages); |
| 70 string16 title = CleanUpTitleForMatching(row.title()); | 66 string16 title = CleanUpTitleForMatching(row.title()); |
| 71 int term_num = 0; | 67 int term_num = 0; |
| (...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 143 // makes it inlineable may be empty. | 139 // makes it inlineable may be empty. |
| 144 DCHECK(best_inlineable_prefix != NULL); | 140 DCHECK(best_inlineable_prefix != NULL); |
| 145 const int num_components_in_best_inlineable_prefix = | 141 const int num_components_in_best_inlineable_prefix = |
| 146 best_inlineable_prefix->num_components; | 142 best_inlineable_prefix->num_components; |
| 147 innermost_match = (num_components_in_best_inlineable_prefix == | 143 innermost_match = (num_components_in_best_inlineable_prefix == |
| 148 num_components_in_best_prefix); | 144 num_components_in_best_prefix); |
| 149 } | 145 } |
| 150 | 146 |
| 151 const float topicality_score = | 147 const float topicality_score = |
| 152 GetTopicalityScore(terms.size(), url, word_starts); | 148 GetTopicalityScore(terms.size(), url, word_starts); |
| 153 const float frecency_score = GetFrecency(now, visits); | 149 const float frecency_score = GetFrecency( |
| 150 now, (bookmark_service && bookmark_service->IsBookmarked(gurl)), visits); |
| 154 raw_score_ = GetFinalRelevancyScore(topicality_score, frecency_score); | 151 raw_score_ = GetFinalRelevancyScore(topicality_score, frecency_score); |
| 155 raw_score_ = | 152 raw_score_ = |
| 156 (raw_score_ <= kint32max) ? static_cast<int>(raw_score_) : kint32max; | 153 (raw_score_ <= kint32max) ? static_cast<int>(raw_score_) : kint32max; |
| 157 | 154 |
| 158 if (also_do_hup_like_scoring_ && can_inline_) { | 155 if (also_do_hup_like_scoring_ && can_inline_) { |
| 159 // HistoryURL-provider-like scoring gives any match that is | 156 // HistoryURL-provider-like scoring gives any match that is |
| 160 // capable of being inlined a certain minimum score. Some of these | 157 // capable of being inlined a certain minimum score. Some of these |
| 161 // are given a higher score that lets them be shown in inline. | 158 // are given a higher score that lets them be shown in inline. |
| 162 // This test here derives from the test in | 159 // This test here derives from the test in |
| 163 // HistoryURLProvider::PromoteMatchForInlineAutocomplete(). | 160 // HistoryURLProvider::PromoteMatchForInlineAutocomplete(). |
| (...skipping 311 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 475 days_ago_to_recency_score_[days_ago] = unnormalized_recency_score / 100.0; | 472 days_ago_to_recency_score_[days_ago] = unnormalized_recency_score / 100.0; |
| 476 if (days_ago > 0) { | 473 if (days_ago > 0) { |
| 477 DCHECK_LE(days_ago_to_recency_score_[days_ago], | 474 DCHECK_LE(days_ago_to_recency_score_[days_ago], |
| 478 days_ago_to_recency_score_[days_ago - 1]); | 475 days_ago_to_recency_score_[days_ago - 1]); |
| 479 } | 476 } |
| 480 } | 477 } |
| 481 } | 478 } |
| 482 | 479 |
| 483 // static | 480 // static |
| 484 float ScoredHistoryMatch::GetFrecency(const base::Time& now, | 481 float ScoredHistoryMatch::GetFrecency(const base::Time& now, |
| 482 const bool bookmarked, |
| 485 const VisitInfoVector& visits) { | 483 const VisitInfoVector& visits) { |
| 486 // Compute the weighted average |value_of_transition| over the last at | 484 // Compute the weighted average |value_of_transition| over the last at |
| 487 // most kMaxVisitsToScore visits, where each visit is weighted using | 485 // most kMaxVisitsToScore visits, where each visit is weighted using |
| 488 // GetRecencyScore() based on how many days ago it happened. Use | 486 // GetRecencyScore() based on how many days ago it happened. Use |
| 489 // kMaxVisitsToScore as the denominator for the average regardless of | 487 // kMaxVisitsToScore as the denominator for the average regardless of |
| 490 // how many visits there were in order to penalize a match that has | 488 // how many visits there were in order to penalize a match that has |
| 491 // fewer visits than kMaxVisitsToScore. | 489 // fewer visits than kMaxVisitsToScore. |
| 492 const int total_sampled_visits = std::min(visits.size(), kMaxVisitsToScore); | 490 const int total_sampled_visits = std::min(visits.size(), kMaxVisitsToScore); |
| 493 if (total_sampled_visits == 0) | 491 if (total_sampled_visits == 0) |
| 494 return 0.0f; | 492 return 0.0f; |
| 495 float summed_visit_points = 0; | 493 float summed_visit_points = 0; |
| 496 for (int i = 0; i < total_sampled_visits; ++i) { | 494 for (int i = 0; i < total_sampled_visits; ++i) { |
| 497 const int value_of_transition = | 495 int value_of_transition = |
| 498 (visits[i].second == content::PAGE_TRANSITION_TYPED) ? 20 : 1; | 496 (visits[i].second == content::PAGE_TRANSITION_TYPED) ? 20 : 1; |
| 497 if (bookmarked) |
| 498 value_of_transition = std::max(value_of_transition, bookmark_value_); |
| 499 const float bucket_weight = | 499 const float bucket_weight = |
| 500 GetRecencyScore((now - visits[i].first).InDays()); | 500 GetRecencyScore((now - visits[i].first).InDays()); |
| 501 summed_visit_points += (value_of_transition * bucket_weight); | 501 summed_visit_points += (value_of_transition * bucket_weight); |
| 502 } | 502 } |
| 503 return visits.size() * summed_visit_points / total_sampled_visits; | 503 return visits.size() * summed_visit_points / total_sampled_visits; |
| 504 } | 504 } |
| 505 | 505 |
| 506 // static | 506 // static |
| 507 float ScoredHistoryMatch::GetFinalRelevancyScore(float topicality_score, | 507 float ScoredHistoryMatch::GetFinalRelevancyScore(float topicality_score, |
| 508 float frecency_score) { | 508 float frecency_score) { |
| (...skipping 28 matching lines...) Expand all Loading... |
| 537 } | 537 } |
| 538 // Linearly extrapolate so a score of 20 (or more) has a score of 1399. | 538 // Linearly extrapolate so a score of 20 (or more) has a score of 1399. |
| 539 // (Scores above 20 are possible for URLs that have multiple term hits | 539 // (Scores above 20 are possible for URLs that have multiple term hits |
| 540 // in the URL and/or title and that are visited practically all | 540 // in the URL and/or title and that are visited practically all |
| 541 // the time using typed visits. We don't attempt to distinguish | 541 // the time using typed visits. We don't attempt to distinguish |
| 542 // between these very good results.) | 542 // between these very good results.) |
| 543 const float slope = (1399 - 1300) / (20.0f - 12.0f); | 543 const float slope = (1399 - 1300) / (20.0f - 12.0f); |
| 544 return std::min(1399.0, 1300 + slope * (intermediate_score - 12.0)); | 544 return std::min(1399.0, 1300 + slope * (intermediate_score - 12.0)); |
| 545 } | 545 } |
| 546 | 546 |
| 547 void ScoredHistoryMatch::InitializeAlsoDoHUPLikeScoringFieldAndMaxScoreField() { | 547 void ScoredHistoryMatch::Init() { |
| 548 if (initialized_) |
| 549 return; |
| 548 also_do_hup_like_scoring_ = false; | 550 also_do_hup_like_scoring_ = false; |
| 549 // When doing HUP-like scoring, don't allow a non-inlineable match | 551 // When doing HUP-like scoring, don't allow a non-inlineable match |
| 550 // to beat the score of good inlineable matches. This is a problem | 552 // to beat the score of good inlineable matches. This is a problem |
| 551 // because if a non-inlineable match ends up with the highest score | 553 // because if a non-inlineable match ends up with the highest score |
| 552 // from HistoryQuick provider, all HistoryQuick matches get demoted | 554 // from HistoryQuick provider, all HistoryQuick matches get demoted |
| 553 // to non-inlineable scores (scores less than 1200). Without | 555 // to non-inlineable scores (scores less than 1200). Without |
| 554 // HUP-like-scoring, these results would actually come from the HUP | 556 // HUP-like-scoring, these results would actually come from the HUP |
| 555 // and not be demoted, thus outscoring the demoted HQP results. | 557 // and not be demoted, thus outscoring the demoted HQP results. |
| 556 // When the HQP provides these, we need to clamp the non-inlineable | 558 // When the HQP provides these, we need to clamp the non-inlineable |
| 557 // results to preserve this behavior. | 559 // results to preserve this behavior. |
| 558 if (also_do_hup_like_scoring_) { | 560 if (also_do_hup_like_scoring_) { |
| 559 max_assigned_score_for_non_inlineable_matches_ = | 561 max_assigned_score_for_non_inlineable_matches_ = |
| 560 HistoryURLProvider::kScoreForBestInlineableResult - 1; | 562 HistoryURLProvider::kScoreForBestInlineableResult - 1; |
| 561 } | 563 } |
| 564 bookmark_value_ = OmniboxFieldTrial::HQPBookmarkValue(); |
| 565 initialized_ = true; |
| 562 } | 566 } |
| 563 | 567 |
| 564 } // namespace history | 568 } // namespace history |
| OLD | NEW |