| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/browser/history/scored_history_match.h" | 5 #include "chrome/browser/history/scored_history_match.h" |
| 6 | 6 |
| 7 #include <algorithm> | 7 #include <algorithm> |
| 8 #include <functional> | 8 #include <functional> |
| 9 #include <iterator> | 9 #include <iterator> |
| 10 #include <numeric> | 10 #include <numeric> |
| (...skipping 132 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 143 float bookmark_boost = | 143 float bookmark_boost = |
| 144 (bookmark_service && bookmark_service->IsBookmarked(gurl)) ? 10.0 : 0.0; | 144 (bookmark_service && bookmark_service->IsBookmarked(gurl)) ? 10.0 : 0.0; |
| 145 | 145 |
| 146 if (use_new_scoring) { | 146 if (use_new_scoring) { |
| 147 const float topicality_score = GetTopicalityScore( | 147 const float topicality_score = GetTopicalityScore( |
| 148 terms.size(), url, url_matches, title_matches, word_starts); | 148 terms.size(), url, url_matches, title_matches, word_starts); |
| 149 const float recency_score = GetRecencyScore( | 149 const float recency_score = GetRecencyScore( |
| 150 (now - row.last_visit()).InDays()); | 150 (now - row.last_visit()).InDays()); |
| 151 const float popularity_score = GetPopularityScore( | 151 const float popularity_score = GetPopularityScore( |
| 152 row.typed_count() + bookmark_boost, row.visit_count()); | 152 row.typed_count() + bookmark_boost, row.visit_count()); |
| 153 | 153 raw_score = GetFinalRelevancyScore( |
| 154 // Combine recency, popularity, and topicality scores into one. | 154 topicality_score, recency_score, popularity_score); |
| 155 // Example of how this functions: Suppose the omnibox has one | |
| 156 // input term. Suppose we have a URL that has 30 typed visits with | |
| 157 // the most recent being within a day and the omnibox input term | |
| 158 // has a single URL hostname hit at a word boundary. Then this | |
| 159 // URL will score 1200 ( = 30 * 40.0). | |
| 160 raw_score = 40.0 * topicality_score * recency_score * popularity_score; | |
| 161 raw_score = | 155 raw_score = |
| 162 (raw_score <= kint32max) ? static_cast<int>(raw_score) : kint32max; | 156 (raw_score <= kint32max) ? static_cast<int>(raw_score) : kint32max; |
| 163 } else { // "old" scoring | 157 } else { // "old" scoring |
| 164 // Get partial scores based on term matching. Note that the score for | 158 // Get partial scores based on term matching. Note that the score for |
| 165 // each of the URL and title are adjusted by the fraction of the | 159 // each of the URL and title are adjusted by the fraction of the |
| 166 // terms appearing in each. | 160 // terms appearing in each. |
| 167 int url_score = | 161 int url_score = |
| 168 ScoreComponentForMatches(url_matches, word_starts.url_word_starts_, | 162 ScoreComponentForMatches(url_matches, word_starts.url_word_starts_, |
| 169 url.length()) * | 163 url.length()) * |
| 170 std::min(url_matches.size(), terms.size()) / terms.size(); | 164 std::min(url_matches.size(), terms.size()) / terms.size(); |
| (...skipping 353 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 524 // Advance next_word_starts until it's >= the position of the term | 518 // Advance next_word_starts until it's >= the position of the term |
| 525 // we're considering. | 519 // we're considering. |
| 526 while ((next_word_starts != end_word_starts) && | 520 while ((next_word_starts != end_word_starts) && |
| 527 (*next_word_starts < iter->offset)) { | 521 (*next_word_starts < iter->offset)) { |
| 528 ++next_word_starts; | 522 ++next_word_starts; |
| 529 ++word_num; | 523 ++word_num; |
| 530 } | 524 } |
| 531 if (word_num >= 10) break; // only count the first ten words | 525 if (word_num >= 10) break; // only count the first ten words |
| 532 const bool at_word_boundary = (next_word_starts != end_word_starts) && | 526 const bool at_word_boundary = (next_word_starts != end_word_starts) && |
| 533 (*next_word_starts == iter->offset); | 527 (*next_word_starts == iter->offset); |
| 534 term_scores[iter->term_num] += at_word_boundary ? 8 : 2; | 528 term_scores[iter->term_num] += at_word_boundary ? 8 : 0; |
| 535 } | 529 } |
| 536 // TODO(mpearson): Restore logic for penalizing out-of-order matches. | 530 // TODO(mpearson): Restore logic for penalizing out-of-order matches. |
| 537 // (Perhaps discount them by 0.8?) | 531 // (Perhaps discount them by 0.8?) |
| 538 // TODO(mpearson): Consider: if the earliest match occurs late in the string, | 532 // TODO(mpearson): Consider: if the earliest match occurs late in the string, |
| 539 // should we discount it? | 533 // should we discount it? |
| 540 // TODO(mpearson): Consider: do we want to score based on how much of the | 534 // TODO(mpearson): Consider: do we want to score based on how much of the |
| 541 // input string the input covers? (I'm leaning toward no.) | 535 // input string the input covers? (I'm leaning toward no.) |
| 542 | 536 |
| 543 // Compute the topicality_score as the sum of transformed term_scores. | 537 // Compute the topicality_score as the sum of transformed term_scores. |
| 544 float topicality_score = 0; | 538 float topicality_score = 0; |
| (...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 637 } | 631 } |
| 638 | 632 |
| 639 // static | 633 // static |
| 640 float ScoredHistoryMatch::GetPopularityScore(int typed_count, | 634 float ScoredHistoryMatch::GetPopularityScore(int typed_count, |
| 641 int visit_count) { | 635 int visit_count) { |
| 642 // The max()s are to guard against database corruption. | 636 // The max()s are to guard against database corruption. |
| 643 return (std::max(typed_count, 0) * 5.0 + std::max(visit_count, 0) * 3.0) / | 637 return (std::max(typed_count, 0) * 5.0 + std::max(visit_count, 0) * 3.0) / |
| 644 (5.0 + 3.0); | 638 (5.0 + 3.0); |
| 645 } | 639 } |
| 646 | 640 |
| 641 // static |
| 642 float ScoredHistoryMatch::GetFinalRelevancyScore( |
| 643 float topicality_score, float recency_score, float popularity_score) { |
| 644 // Here's how to interpret intermediate_score: Suppose the omnibox |
| 645 // has one input term. Suppose we have a URL that has 5 typed |
| 646 // visits with the most recent being within a day and the omnibox |
| 647 // input term has a single URL hostname hit at a word boundary. |
| 648 // This URL will have an intermediate_score of 5.0 (= 1 topicality * |
| 649 // 1 recency * 5 popularity). |
| 650 float intermediate_score = |
| 651 topicality_score * recency_score * popularity_score; |
| 652 // The below code takes intermediate_score from [0, infinity) to |
| 653 // relevancy scores in the range [0, 1400). |
| 654 float attenuating_factor = 1.0; |
| 655 if (intermediate_score < 4) { |
| 656 // The formula in the final return line in this function only works if |
| 657 // intermediate_score > 4. For lower scores, we linearly interpolate |
| 658 // between 0 and the formula when intermediate_score = 4.0. |
| 659 attenuating_factor = intermediate_score / 4.0; |
| 660 intermediate_score = 4.0; |
| 661 } |
| 662 DCHECK_GE(intermediate_score, 4.0); |
| 663 return attenuating_factor * 1400.0 * (2.0 - exp(2.0 / intermediate_score)); |
| 664 } |
| 665 |
| 647 void ScoredHistoryMatch::InitializeNewScoringField() { | 666 void ScoredHistoryMatch::InitializeNewScoringField() { |
| 648 enum NewScoringOption { | 667 enum NewScoringOption { |
| 649 OLD_SCORING = 0, | 668 OLD_SCORING = 0, |
| 650 NEW_SCORING = 1, | 669 NEW_SCORING = 1, |
| 651 NEW_SCORING_AUTO_BUT_NOT_IN_FIELD_TRIAL = 2, | 670 NEW_SCORING_AUTO_BUT_NOT_IN_FIELD_TRIAL = 2, |
| 652 NEW_SCORING_FIELD_TRIAL_DEFAULT_GROUP = 3, | 671 NEW_SCORING_FIELD_TRIAL_DEFAULT_GROUP = 3, |
| 653 NEW_SCORING_FIELD_TRIAL_EXPERIMENT_GROUP = 4, | 672 NEW_SCORING_FIELD_TRIAL_EXPERIMENT_GROUP = 4, |
| 654 NUM_OPTIONS = 5 | 673 NUM_OPTIONS = 5 |
| 655 }; | 674 }; |
| 656 // should always be overwritten | 675 // should always be overwritten |
| (...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 711 InHQPOnlyCountMatchesAtWordBoundariesFieldTrialExperimentGroup(); | 730 InHQPOnlyCountMatchesAtWordBoundariesFieldTrialExperimentGroup(); |
| 712 } | 731 } |
| 713 | 732 |
| 714 void ScoredHistoryMatch::InitializeAlsoDoHUPLikeScoringField() { | 733 void ScoredHistoryMatch::InitializeAlsoDoHUPLikeScoringField() { |
| 715 also_do_hup_like_scoring = | 734 also_do_hup_like_scoring = |
| 716 AutocompleteFieldTrial::InHQPReplaceHUPScoringFieldTrial() && | 735 AutocompleteFieldTrial::InHQPReplaceHUPScoringFieldTrial() && |
| 717 AutocompleteFieldTrial::InHQPReplaceHUPScoringFieldTrialExperimentGroup(); | 736 AutocompleteFieldTrial::InHQPReplaceHUPScoringFieldTrialExperimentGroup(); |
| 718 } | 737 } |
| 719 | 738 |
| 720 } // namespace history | 739 } // namespace history |
| OLD | NEW |