Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/browser/history/scored_history_match.h" | 5 #include "chrome/browser/history/scored_history_match.h" |
| 6 | 6 |
| 7 #include <algorithm> | 7 #include <algorithm> |
| 8 #include <functional> | 8 #include <functional> |
| 9 #include <iterator> | 9 #include <iterator> |
| 10 #include <numeric> | 10 #include <numeric> |
| (...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 49 // counts this score, which then gets modified slightly. This number | 49 // counts this score, which then gets modified slightly. This number |
| 50 // is derived from the score of 900 + kMaxMatches used in HistoryURL | 50 // is derived from the score of 900 + kMaxMatches used in HistoryURL |
| 51 // provider's CalculateRelevance() function for matches that are not | 51 // provider's CalculateRelevance() function for matches that are not |
| 52 // worthy of being inline autopleted. | 52 // worthy of being inline autopleted. |
| 53 const int kBaseScoreForUntypedResultsInHUPLikeScoring = 900; | 53 const int kBaseScoreForUntypedResultsInHUPLikeScoring = 900; |
| 54 | 54 |
| 55 // ScoredHistoryMatch ---------------------------------------------------------- | 55 // ScoredHistoryMatch ---------------------------------------------------------- |
| 56 | 56 |
| 57 bool ScoredHistoryMatch::initialized_ = false; | 57 bool ScoredHistoryMatch::initialized_ = false; |
| 58 bool ScoredHistoryMatch::use_new_scoring = false; | 58 bool ScoredHistoryMatch::use_new_scoring = false; |
| 59 bool ScoredHistoryMatch::only_count_matches_at_word_boundaries = false; | |
| 59 bool ScoredHistoryMatch::also_do_hup_like_scoring = false; | 60 bool ScoredHistoryMatch::also_do_hup_like_scoring = false; |
| 60 | 61 |
| 61 ScoredHistoryMatch::ScoredHistoryMatch() | 62 ScoredHistoryMatch::ScoredHistoryMatch() |
| 62 : raw_score(0), | 63 : raw_score(0), |
| 63 can_inline(false) { | 64 can_inline(false) { |
| 64 if (!initialized_) { | 65 if (!initialized_) { |
| 65 InitializeNewScoringField(); | 66 InitializeNewScoringField(); |
| 67 InitializeOnlyCountMatchesAtWordBoundariesField(); | |
| 66 InitializeAlsoDoHUPLikeScoringField(); | 68 InitializeAlsoDoHUPLikeScoringField(); |
| 67 initialized_ = true; | 69 initialized_ = true; |
| 68 } | 70 } |
| 69 } | 71 } |
| 70 | 72 |
| 71 ScoredHistoryMatch::ScoredHistoryMatch(const URLRow& row, | 73 ScoredHistoryMatch::ScoredHistoryMatch(const URLRow& row, |
| 72 const string16& lower_string, | 74 const string16& lower_string, |
| 73 const String16Vector& terms, | 75 const String16Vector& terms, |
| 74 const RowWordStarts& word_starts, | 76 const RowWordStarts& word_starts, |
| 75 const base::Time now, | 77 const base::Time now, |
| 76 BookmarkService* bookmark_service) | 78 BookmarkService* bookmark_service) |
| 77 : HistoryMatch(row, 0, false, false), | 79 : HistoryMatch(row, 0, false, false), |
| 78 raw_score(0), | 80 raw_score(0), |
| 79 can_inline(false) { | 81 can_inline(false) { |
| 80 if (!initialized_) { | 82 if (!initialized_) { |
| 81 InitializeNewScoringField(); | 83 InitializeNewScoringField(); |
| 84 InitializeOnlyCountMatchesAtWordBoundariesField(); | |
| 82 InitializeAlsoDoHUPLikeScoringField(); | 85 InitializeAlsoDoHUPLikeScoringField(); |
| 83 initialized_ = true; | 86 initialized_ = true; |
| 84 } | 87 } |
| 85 | 88 |
| 86 GURL gurl = row.url(); | 89 GURL gurl = row.url(); |
| 87 if (!gurl.is_valid()) | 90 if (!gurl.is_valid()) |
| 88 return; | 91 return; |
| 89 | 92 |
| 90 // Figure out where each search term appears in the URL and/or page title | 93 // Figure out where each search term appears in the URL and/or page title |
| 91 // so that we can score as well as provide autocomplete highlighting. | 94 // so that we can score as well as provide autocomplete highlighting. |
| (...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 154 // the most recent being within a day and the omnibox input term | 157 // the most recent being within a day and the omnibox input term |
| 155 // has a single URL hostname hit at a word boundary. Then this | 158 // has a single URL hostname hit at a word boundary. Then this |
| 156 // URL will score 1200 ( = 30 * 40.0). | 159 // URL will score 1200 ( = 30 * 40.0). |
| 157 raw_score = 40.0 * topicality_score * recency_score * popularity_score; | 160 raw_score = 40.0 * topicality_score * recency_score * popularity_score; |
| 158 raw_score = | 161 raw_score = |
| 159 (raw_score <= kint32max) ? static_cast<int>(raw_score) : kint32max; | 162 (raw_score <= kint32max) ? static_cast<int>(raw_score) : kint32max; |
| 160 } else { // "old" scoring | 163 } else { // "old" scoring |
| 161 // Get partial scores based on term matching. Note that the score for | 164 // Get partial scores based on term matching. Note that the score for |
| 162 // each of the URL and title are adjusted by the fraction of the | 165 // each of the URL and title are adjusted by the fraction of the |
| 163 // terms appearing in each. | 166 // terms appearing in each. |
| 164 int url_score = ScoreComponentForMatches(url_matches, url.length()) * | 167 int url_score = |
| 168 ScoreComponentForMatches(url_matches, word_starts.url_word_starts_, | |
| 169 url.length()) * | |
| 165 std::min(url_matches.size(), terms.size()) / terms.size(); | 170 std::min(url_matches.size(), terms.size()) / terms.size(); |
| 166 int title_score = | 171 int title_score = |
| 167 ScoreComponentForMatches(title_matches, title.length()) * | 172 ScoreComponentForMatches(title_matches, word_starts.title_word_starts_, |
| 173 title.length()) * | |
| 168 std::min(title_matches.size(), terms.size()) / terms.size(); | 174 std::min(title_matches.size(), terms.size()) / terms.size(); |
| 169 // Arbitrarily pick the best. | 175 // Arbitrarily pick the best. |
| 170 // TODO(mrossetti): It might make sense that a term which appears in both | 176 // TODO(mrossetti): It might make sense that a term which appears in both |
| 171 // the URL and the Title should boost the score a bit. | 177 // the URL and the Title should boost the score a bit. |
| 172 int term_score = std::max(url_score, title_score); | 178 int term_score = std::max(url_score, title_score); |
| 173 if (term_score == 0) | 179 if (term_score == 0) |
| 174 return; | 180 return; |
| 175 | 181 |
| 176 // Determine scoring factors for the recency of visit, visit count and typed | 182 // Determine scoring factors for the recency of visit, visit count and typed |
| 177 // count attributes of the URLRow. | 183 // count attributes of the URLRow. |
| (...skipping 102 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 280 | 286 |
| 281 ScoredHistoryMatch::~ScoredHistoryMatch() {} | 287 ScoredHistoryMatch::~ScoredHistoryMatch() {} |
| 282 | 288 |
| 283 // std::accumulate helper function to add up TermMatches' lengths as used in | 289 // std::accumulate helper function to add up TermMatches' lengths as used in |
| 284 // ScoreComponentForMatches | 290 // ScoreComponentForMatches |
| 285 int AccumulateMatchLength(int total, const TermMatch& match) { | 291 int AccumulateMatchLength(int total, const TermMatch& match) { |
| 286 return total + match.length; | 292 return total + match.length; |
| 287 } | 293 } |
| 288 | 294 |
| 289 // static | 295 // static |
| 290 int ScoredHistoryMatch::ScoreComponentForMatches(const TermMatches& matches, | 296 int ScoredHistoryMatch::ScoreComponentForMatches( |
| 291 size_t max_length) { | 297 const TermMatches& provided_matches, |
| 298 const WordStarts& word_starts, | |
| 299 size_t max_length) { | |
| 300 if (provided_matches.empty()) | |
| 301 return 0; | |
| 302 | |
| 303 TermMatches matches_at_word_boundaries; | |
| 304 if (only_count_matches_at_word_boundaries) { | |
| 305 MakeTermMatchesOnlyAtWordBoundaries( | |
| 306 provided_matches, word_starts, &matches_at_word_boundaries); | |
|
Peter Kasting
2012/12/01 01:21:44
Nit: I'd slightly prefer to wrap after arg 2 and i
Mark P
2012/12/01 01:55:32
Done.
| |
| 307 } | |
| 308 // The actual matches we'll use for matching. This is |provided_matches| | |
| 309 // with all the matches not at a word boundary removed (if told to do so). | |
| 310 const TermMatches& matches = only_count_matches_at_word_boundaries ? | |
| 311 matches_at_word_boundaries : provided_matches; | |
| 312 | |
| 292 if (matches.empty()) | 313 if (matches.empty()) |
| 293 return 0; | 314 return 0; |
| 294 | 315 |
| 295 // Score component for whether the input terms (if more than one) were found | 316 // Score component for whether the input terms (if more than one) were found |
| 296 // in the same order in the match. Start with kOrderMaxValue points divided | 317 // in the same order in the match. Start with kOrderMaxValue points divided |
| 297 // equally among (number of terms - 1); then discount each of those terms that | 318 // equally among (number of terms - 1); then discount each of those terms that |
| 298 // is out-of-order in the match. | 319 // is out-of-order in the match. |
| 299 const int kOrderMaxValue = 1000; | 320 const int kOrderMaxValue = 1000; |
| 300 int order_value = kOrderMaxValue; | 321 int order_value = kOrderMaxValue; |
| 301 if (matches.size() > 1) { | 322 if (matches.size() > 1) { |
| (...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 339 complete_value * kCompleteRelevance; | 360 complete_value * kCompleteRelevance; |
| 340 raw_score /= (kOrderRelevance + kStartRelevance + kCompleteRelevance); | 361 raw_score /= (kOrderRelevance + kStartRelevance + kCompleteRelevance); |
| 341 | 362 |
| 342 // Scale the raw score into a single score component in the same manner as | 363 // Scale the raw score into a single score component in the same manner as |
| 343 // used in ScoredMatchForURL(). | 364 // used in ScoredMatchForURL(). |
| 344 const int kTermScoreLevel[] = { 1000, 750, 500, 200 }; | 365 const int kTermScoreLevel[] = { 1000, 750, 500, 200 }; |
| 345 return ScoreForValue(raw_score, kTermScoreLevel); | 366 return ScoreForValue(raw_score, kTermScoreLevel); |
| 346 } | 367 } |
| 347 | 368 |
| 348 // static | 369 // static |
| 370 void ScoredHistoryMatch::MakeTermMatchesOnlyAtWordBoundaries( | |
| 371 const TermMatches& matches, | |
| 372 const WordStarts& word_starts, | |
| 373 TermMatches* matches_at_word_boundaries) { | |
| 374 matches_at_word_boundaries->clear(); | |
| 375 // Resize it to an upper-bound estimate of the correct size. | |
| 376 matches_at_word_boundaries->reserve(matches.size()); | |
| 377 std::vector<size_t>::const_iterator next_word_starts = word_starts.begin(); | |
|
Peter Kasting
2012/12/01 01:21:44
Nit: Is there a typedef we should be using here?
Mark P
2012/12/01 01:55:32
Done.
| |
| 378 for (TermMatches::const_iterator iter = matches.begin(); | |
| 379 iter != matches.end(); ++iter) { | |
| 380 // Advance next_word_starts until it's >= the position of the term | |
| 381 // we're considering. | |
| 382 while ((next_word_starts != word_starts.end()) && | |
| 383 (*next_word_starts < iter->offset)) { | |
| 384 ++next_word_starts; | |
| 385 } | |
| 386 if ((next_word_starts != word_starts.end()) && | |
| 387 (*next_word_starts == iter->offset)) { | |
| 388 // At word boundary: copy this element into |matches|. | |
| 389 matches_at_word_boundaries->push_back(*iter); | |
| 390 } | |
| 391 } | |
| 392 } | |
| 393 | |
| 394 // static | |
| 349 int ScoredHistoryMatch::ScoreForValue(int value, const int* value_ranks) { | 395 int ScoredHistoryMatch::ScoreForValue(int value, const int* value_ranks) { |
| 350 int i = 0; | 396 int i = 0; |
| 351 int rank_count = arraysize(kScoreRank); | 397 int rank_count = arraysize(kScoreRank); |
| 352 while ((i < rank_count) && ((value_ranks[0] < value_ranks[1]) ? | 398 while ((i < rank_count) && ((value_ranks[0] < value_ranks[1]) ? |
| 353 (value > value_ranks[i]) : (value < value_ranks[i]))) | 399 (value > value_ranks[i]) : (value < value_ranks[i]))) |
| 354 ++i; | 400 ++i; |
| 355 if (i >= rank_count) | 401 if (i >= rank_count) |
| 356 return 0; | 402 return 0; |
| 357 int score = kScoreRank[i]; | 403 int score = kScoreRank[i]; |
| 358 if (i > 0) { | 404 if (i > 0) { |
| (...skipping 291 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 650 } | 696 } |
| 651 | 697 |
| 652 // Add a beacon to the logs that'll allow us to identify later what | 698 // Add a beacon to the logs that'll allow us to identify later what |
| 653 // new scoring state a user is in. Do this by incrementing a bucket in | 699 // new scoring state a user is in. Do this by incrementing a bucket in |
| 654 // a histogram, where the bucket represents the user's new scoring state. | 700 // a histogram, where the bucket represents the user's new scoring state. |
| 655 UMA_HISTOGRAM_ENUMERATION( | 701 UMA_HISTOGRAM_ENUMERATION( |
| 656 "Omnibox.HistoryQuickProviderNewScoringFieldTrialBeacon", | 702 "Omnibox.HistoryQuickProviderNewScoringFieldTrialBeacon", |
| 657 new_scoring_option, NUM_OPTIONS); | 703 new_scoring_option, NUM_OPTIONS); |
| 658 } | 704 } |
| 659 | 705 |
| 706 void ScoredHistoryMatch::InitializeOnlyCountMatchesAtWordBoundariesField() { | |
| 707 only_count_matches_at_word_boundaries = | |
| 708 AutocompleteFieldTrial:: | |
| 709 InHQPOnlyCountMatchesAtWordBoundariesFieldTrial() && | |
| 710 AutocompleteFieldTrial:: | |
| 711 InHQPOnlyCountMatchesAtWordBoundariesFieldTrialExperimentGroup(); | |
| 712 } | |
| 713 | |
| 660 void ScoredHistoryMatch::InitializeAlsoDoHUPLikeScoringField() { | 714 void ScoredHistoryMatch::InitializeAlsoDoHUPLikeScoringField() { |
| 661 also_do_hup_like_scoring = | 715 also_do_hup_like_scoring = |
| 662 AutocompleteFieldTrial::InHQPReplaceHUPScoringFieldTrial() && | 716 AutocompleteFieldTrial::InHQPReplaceHUPScoringFieldTrial() && |
| 663 AutocompleteFieldTrial::InHQPReplaceHUPScoringFieldTrialExperimentGroup(); | 717 AutocompleteFieldTrial::InHQPReplaceHUPScoringFieldTrialExperimentGroup(); |
| 664 } | 718 } |
| 665 | 719 |
| 666 } // namespace history | 720 } // namespace history |
| OLD | NEW |