OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/browser/history/scored_history_match.h" | 5 #include "chrome/browser/history/scored_history_match.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <functional> | 8 #include <functional> |
9 #include <iterator> | 9 #include <iterator> |
10 #include <numeric> | 10 #include <numeric> |
(...skipping 129 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
140 float bookmark_boost = | 140 float bookmark_boost = |
141 (bookmark_service && bookmark_service->IsBookmarked(gurl)) ? 10.0 : 0.0; | 141 (bookmark_service && bookmark_service->IsBookmarked(gurl)) ? 10.0 : 0.0; |
142 | 142 |
143 if (use_new_scoring) { | 143 if (use_new_scoring) { |
144 const float topicality_score = GetTopicalityScore( | 144 const float topicality_score = GetTopicalityScore( |
145 terms.size(), url, url_matches, title_matches, word_starts); | 145 terms.size(), url, url_matches, title_matches, word_starts); |
146 const float recency_score = GetRecencyScore( | 146 const float recency_score = GetRecencyScore( |
147 (now - row.last_visit()).InDays()); | 147 (now - row.last_visit()).InDays()); |
148 const float popularity_score = GetPopularityScore( | 148 const float popularity_score = GetPopularityScore( |
149 row.typed_count() + bookmark_boost, row.visit_count()); | 149 row.typed_count() + bookmark_boost, row.visit_count()); |
150 | 150 raw_score = GetFinalRelevancyScore( |
151 // Combine recency, popularity, and topicality scores into one. | 151 topicality_score, recency_score, popularity_score); |
152 // Example of how this functions: Suppose the omnibox has one | |
153 // input term. Suppose we have a URL that has 30 typed visits with | |
154 // the most recent being within a day and the omnibox input term | |
155 // has a single URL hostname hit at a word boundary. Then this | |
156 // URL will score 1200 ( = 30 * 40.0). | |
157 raw_score = 40.0 * topicality_score * recency_score * popularity_score; | |
158 raw_score = | 152 raw_score = |
159 (raw_score <= kint32max) ? static_cast<int>(raw_score) : kint32max; | 153 (raw_score <= kint32max) ? static_cast<int>(raw_score) : kint32max; |
160 } else { // "old" scoring | 154 } else { // "old" scoring |
161 // Get partial scores based on term matching. Note that the score for | 155 // Get partial scores based on term matching. Note that the score for |
162 // each of the URL and title are adjusted by the fraction of the | 156 // each of the URL and title are adjusted by the fraction of the |
163 // terms appearing in each. | 157 // terms appearing in each. |
164 int url_score = ScoreComponentForMatches(url_matches, url.length()) * | 158 int url_score = ScoreComponentForMatches(url_matches, url.length()) * |
165 std::min(url_matches.size(), terms.size()) / terms.size(); | 159 std::min(url_matches.size(), terms.size()) / terms.size(); |
166 int title_score = | 160 int title_score = |
167 ScoreComponentForMatches(title_matches, title.length()) * | 161 ScoreComponentForMatches(title_matches, title.length()) * |
(...skipping 310 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
478 // Advance next_word_starts until it's >= the position of the term | 472 // Advance next_word_starts until it's >= the position of the term |
479 // we're considering. | 473 // we're considering. |
480 while ((next_word_starts != end_word_starts) && | 474 while ((next_word_starts != end_word_starts) && |
481 (*next_word_starts < iter->offset)) { | 475 (*next_word_starts < iter->offset)) { |
482 ++next_word_starts; | 476 ++next_word_starts; |
483 ++word_num; | 477 ++word_num; |
484 } | 478 } |
485 if (word_num >= 10) break; // only count the first ten words | 479 if (word_num >= 10) break; // only count the first ten words |
486 const bool at_word_boundary = (next_word_starts != end_word_starts) && | 480 const bool at_word_boundary = (next_word_starts != end_word_starts) && |
487 (*next_word_starts == iter->offset); | 481 (*next_word_starts == iter->offset); |
488 term_scores[iter->term_num] += at_word_boundary ? 8 : 2; | 482 term_scores[iter->term_num] += at_word_boundary ? 8 : 0; |
489 } | 483 } |
490 // TODO(mpearson): Restore logic for penalizing out-of-order matches. | 484 // TODO(mpearson): Restore logic for penalizing out-of-order matches. |
491 // (Perhaps discount them by 0.8?) | 485 // (Perhaps discount them by 0.8?) |
492 // TODO(mpearson): Consider: if the earliest match occurs late in the string, | 486 // TODO(mpearson): Consider: if the earliest match occurs late in the string, |
493 // should we discount it? | 487 // should we discount it? |
494 // TODO(mpearson): Consider: do we want to score based on how much of the | 488 // TODO(mpearson): Consider: do we want to score based on how much of the |
495 // input string the input covers? (I'm leaning toward no.) | 489 // input string the input covers? (I'm leaning toward no.) |
496 | 490 |
497 // Compute the topicality_score as the sum of transformed term_scores. | 491 // Compute the topicality_score as the sum of transformed term_scores. |
498 float topicality_score = 0; | 492 float topicality_score = 0; |
(...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
591 } | 585 } |
592 | 586 |
593 // static | 587 // static |
594 float ScoredHistoryMatch::GetPopularityScore(int typed_count, | 588 float ScoredHistoryMatch::GetPopularityScore(int typed_count, |
595 int visit_count) { | 589 int visit_count) { |
596 // The max()s are to guard against database corruption. | 590 // The max()s are to guard against database corruption. |
597 return (std::max(typed_count, 0) * 5.0 + std::max(visit_count, 0) * 3.0) / | 591 return (std::max(typed_count, 0) * 5.0 + std::max(visit_count, 0) * 3.0) / |
598 (5.0 + 3.0); | 592 (5.0 + 3.0); |
599 } | 593 } |
600 | 594 |
| 595 // static |
| 596 float ScoredHistoryMatch::GetFinalRelevancyScore( |
| 597 float topicality_score, float recency_score, float popularity_score) { |
| 598 // Here's how to interpret intermediate_score: Suppose the omnibox |
| 599 // has one input term. Suppose we have a URL that has 5 typed |
| 600 // visits with the most recent being within a day and the omnibox |
| 601 // input term has a single URL hostname hit at a word boundary. |
| 602 // This URL will have an intermediate_score of 5.0 (= 1 topicality * |
| 603 // 1 recency * 5 popularity). |
| 604 float intermediate_score = |
| 605 topicality_score * recency_score * popularity_score; |
| 606 // The below code takes intermediate_score from [0, infinity) to |
| 607 // relevancy scores in the range [0, 1400). |
| 608 float attenuating_factor = 1.0; |
| 609 if (intermediate_score < 4) { |
| 610 // The formula in the final return line in this function only works if |
| 611 // intermediate_score > 4. For lower scores, we linearly interpolate |
| 612 // between 0 and the formula when intermediate_score = 4.0. |
| 613 attenuating_factor = intermediate_score / 4.0; |
| 614 intermediate_score = 4.0; |
| 615 } |
| 616 DCHECK_GE(intermediate_score, 4.0); |
| 617 return attenuating_factor * 1400.0 * (2.0 - exp(2.0 / intermediate_score)); |
| 618 } |
| 619 |
601 void ScoredHistoryMatch::InitializeNewScoringField() { | 620 void ScoredHistoryMatch::InitializeNewScoringField() { |
602 enum NewScoringOption { | 621 enum NewScoringOption { |
603 OLD_SCORING = 0, | 622 OLD_SCORING = 0, |
604 NEW_SCORING = 1, | 623 NEW_SCORING = 1, |
605 NEW_SCORING_AUTO_BUT_NOT_IN_FIELD_TRIAL = 2, | 624 NEW_SCORING_AUTO_BUT_NOT_IN_FIELD_TRIAL = 2, |
606 NEW_SCORING_FIELD_TRIAL_DEFAULT_GROUP = 3, | 625 NEW_SCORING_FIELD_TRIAL_DEFAULT_GROUP = 3, |
607 NEW_SCORING_FIELD_TRIAL_EXPERIMENT_GROUP = 4, | 626 NEW_SCORING_FIELD_TRIAL_EXPERIMENT_GROUP = 4, |
608 NUM_OPTIONS = 5 | 627 NUM_OPTIONS = 5 |
609 }; | 628 }; |
610 // should always be overwritten | 629 // should always be overwritten |
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
657 new_scoring_option, NUM_OPTIONS); | 676 new_scoring_option, NUM_OPTIONS); |
658 } | 677 } |
659 | 678 |
660 void ScoredHistoryMatch::InitializeAlsoDoHUPLikeScoringField() { | 679 void ScoredHistoryMatch::InitializeAlsoDoHUPLikeScoringField() { |
661 also_do_hup_like_scoring = | 680 also_do_hup_like_scoring = |
662 AutocompleteFieldTrial::InHQPReplaceHUPScoringFieldTrial() && | 681 AutocompleteFieldTrial::InHQPReplaceHUPScoringFieldTrial() && |
663 AutocompleteFieldTrial::InHQPReplaceHUPScoringFieldTrialExperimentGroup(); | 682 AutocompleteFieldTrial::InHQPReplaceHUPScoringFieldTrialExperimentGroup(); |
664 } | 683 } |
665 | 684 |
666 } // namespace history | 685 } // namespace history |
OLD | NEW |