OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/browser/history/scored_history_match.h" | 5 #include "chrome/browser/history/scored_history_match.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <functional> | 8 #include <functional> |
9 #include <iterator> | 9 #include <iterator> |
10 #include <numeric> | 10 #include <numeric> |
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
49 // counts this score, which then gets modified slightly. This number | 49 // counts this score, which then gets modified slightly. This number |
50 // is derived from the score of 900 + kMaxMatches used in HistoryURL | 50 // is derived from the score of 900 + kMaxMatches used in HistoryURL |
51 // provider's CalculateRelevance() function for matches that are not | 51 // provider's CalculateRelevance() function for matches that are not |
52 // worthy of being inline autopleted. | 52 // worthy of being inline autopleted. |
53 const int kBaseScoreForUntypedResultsInHUPLikeScoring = 900; | 53 const int kBaseScoreForUntypedResultsInHUPLikeScoring = 900; |
54 | 54 |
55 // ScoredHistoryMatch ---------------------------------------------------------- | 55 // ScoredHistoryMatch ---------------------------------------------------------- |
56 | 56 |
57 bool ScoredHistoryMatch::initialized_ = false; | 57 bool ScoredHistoryMatch::initialized_ = false; |
58 bool ScoredHistoryMatch::use_new_scoring = false; | 58 bool ScoredHistoryMatch::use_new_scoring = false; |
59 bool ScoredHistoryMatch::only_count_matches_at_word_boundaries = false; | |
59 bool ScoredHistoryMatch::also_do_hup_like_scoring = false; | 60 bool ScoredHistoryMatch::also_do_hup_like_scoring = false; |
60 | 61 |
61 ScoredHistoryMatch::ScoredHistoryMatch() | 62 ScoredHistoryMatch::ScoredHistoryMatch() |
62 : raw_score(0), | 63 : raw_score(0), |
63 can_inline(false) { | 64 can_inline(false) { |
64 if (!initialized_) { | 65 if (!initialized_) { |
65 InitializeNewScoringField(); | 66 InitializeNewScoringField(); |
67 InitializeOnlyCountMatchesAtWordBoundariesField(); | |
66 InitializeAlsoDoHUPLikeScoringField(); | 68 InitializeAlsoDoHUPLikeScoringField(); |
67 initialized_ = true; | 69 initialized_ = true; |
68 } | 70 } |
69 } | 71 } |
70 | 72 |
71 ScoredHistoryMatch::ScoredHistoryMatch(const URLRow& row, | 73 ScoredHistoryMatch::ScoredHistoryMatch(const URLRow& row, |
72 const string16& lower_string, | 74 const string16& lower_string, |
73 const String16Vector& terms, | 75 const String16Vector& terms, |
74 const RowWordStarts& word_starts, | 76 const RowWordStarts& word_starts, |
75 const base::Time now, | 77 const base::Time now, |
76 BookmarkService* bookmark_service) | 78 BookmarkService* bookmark_service) |
77 : HistoryMatch(row, 0, false, false), | 79 : HistoryMatch(row, 0, false, false), |
78 raw_score(0), | 80 raw_score(0), |
79 can_inline(false) { | 81 can_inline(false) { |
80 if (!initialized_) { | 82 if (!initialized_) { |
81 InitializeNewScoringField(); | 83 InitializeNewScoringField(); |
84 InitializeOnlyCountMatchesAtWordBoundariesField(); | |
82 InitializeAlsoDoHUPLikeScoringField(); | 85 InitializeAlsoDoHUPLikeScoringField(); |
83 initialized_ = true; | 86 initialized_ = true; |
84 } | 87 } |
85 | 88 |
86 GURL gurl = row.url(); | 89 GURL gurl = row.url(); |
87 if (!gurl.is_valid()) | 90 if (!gurl.is_valid()) |
88 return; | 91 return; |
89 | 92 |
90 // Figure out where each search term appears in the URL and/or page title | 93 // Figure out where each search term appears in the URL and/or page title |
91 // so that we can score as well as provide autocomplete highlighting. | 94 // so that we can score as well as provide autocomplete highlighting. |
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
154 // the most recent being within a day and the omnibox input term | 157 // the most recent being within a day and the omnibox input term |
155 // has a single URL hostname hit at a word boundary. Then this | 158 // has a single URL hostname hit at a word boundary. Then this |
156 // URL will score 1200 ( = 30 * 40.0). | 159 // URL will score 1200 ( = 30 * 40.0). |
157 raw_score = 40.0 * topicality_score * recency_score * popularity_score; | 160 raw_score = 40.0 * topicality_score * recency_score * popularity_score; |
158 raw_score = | 161 raw_score = |
159 (raw_score <= kint32max) ? static_cast<int>(raw_score) : kint32max; | 162 (raw_score <= kint32max) ? static_cast<int>(raw_score) : kint32max; |
160 } else { // "old" scoring | 163 } else { // "old" scoring |
161 // Get partial scores based on term matching. Note that the score for | 164 // Get partial scores based on term matching. Note that the score for |
162 // each of the URL and title are adjusted by the fraction of the | 165 // each of the URL and title are adjusted by the fraction of the |
163 // terms appearing in each. | 166 // terms appearing in each. |
164 int url_score = ScoreComponentForMatches(url_matches, url.length()) * | 167 int url_score = |
168 ScoreComponentForMatches(url_matches, word_starts.url_word_starts_, | |
169 url.length()) * | |
165 std::min(url_matches.size(), terms.size()) / terms.size(); | 170 std::min(url_matches.size(), terms.size()) / terms.size(); |
166 int title_score = | 171 int title_score = |
167 ScoreComponentForMatches(title_matches, title.length()) * | 172 ScoreComponentForMatches(title_matches, word_starts.title_word_starts_, |
173 title.length()) * | |
168 std::min(title_matches.size(), terms.size()) / terms.size(); | 174 std::min(title_matches.size(), terms.size()) / terms.size(); |
169 // Arbitrarily pick the best. | 175 // Arbitrarily pick the best. |
170 // TODO(mrossetti): It might make sense that a term which appears in both | 176 // TODO(mrossetti): It might make sense that a term which appears in both |
171 // the URL and the Title should boost the score a bit. | 177 // the URL and the Title should boost the score a bit. |
172 int term_score = std::max(url_score, title_score); | 178 int term_score = std::max(url_score, title_score); |
173 if (term_score == 0) | 179 if (term_score == 0) |
174 return; | 180 return; |
175 | 181 |
176 // Determine scoring factors for the recency of visit, visit count and typed | 182 // Determine scoring factors for the recency of visit, visit count and typed |
177 // count attributes of the URLRow. | 183 // count attributes of the URLRow. |
(...skipping 102 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
280 | 286 |
281 ScoredHistoryMatch::~ScoredHistoryMatch() {} | 287 ScoredHistoryMatch::~ScoredHistoryMatch() {} |
282 | 288 |
283 // std::accumulate helper function to add up TermMatches' lengths as used in | 289 // std::accumulate helper function to add up TermMatches' lengths as used in |
284 // ScoreComponentForMatches | 290 // ScoreComponentForMatches |
285 int AccumulateMatchLength(int total, const TermMatch& match) { | 291 int AccumulateMatchLength(int total, const TermMatch& match) { |
286 return total + match.length; | 292 return total + match.length; |
287 } | 293 } |
288 | 294 |
289 // static | 295 // static |
290 int ScoredHistoryMatch::ScoreComponentForMatches(const TermMatches& matches, | 296 int ScoredHistoryMatch::ScoreComponentForMatches( |
291 size_t max_length) { | 297 const TermMatches& provided_matches, |
298 const WordStarts& word_starts, | |
299 size_t max_length) { | |
300 if (provided_matches.empty()) | |
301 return 0; | |
302 | |
303 TermMatches matches_at_word_boundaries; | |
304 if (only_count_matches_at_word_boundaries) { | |
305 MakeTermMatchesOnlyAtWordBoundaries( | |
306 provided_matches, word_starts, &matches_at_word_boundaries); | |
307 } | |
308 // The actual matches we'll use for matching. This is |provided_matches| | |
309 // with all the matches not at a word boundary removed (if told to do so). | |
310 const TermMatches& matches = only_count_matches_at_word_boundaries ? | |
311 matches_at_word_boundaries : provided_matches; | |
312 | |
292 if (matches.empty()) | 313 if (matches.empty()) |
293 return 0; | 314 return 0; |
294 | 315 |
295 // Score component for whether the input terms (if more than one) were found | 316 // Score component for whether the input terms (if more than one) were found |
296 // in the same order in the match. Start with kOrderMaxValue points divided | 317 // in the same order in the match. Start with kOrderMaxValue points divided |
297 // equally among (number of terms - 1); then discount each of those terms that | 318 // equally among (number of terms - 1); then discount each of those terms that |
298 // is out-of-order in the match. | 319 // is out-of-order in the match. |
299 const int kOrderMaxValue = 1000; | 320 const int kOrderMaxValue = 1000; |
300 int order_value = kOrderMaxValue; | 321 int order_value = kOrderMaxValue; |
301 if (matches.size() > 1) { | 322 if (matches.size() > 1) { |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
339 complete_value * kCompleteRelevance; | 360 complete_value * kCompleteRelevance; |
340 raw_score /= (kOrderRelevance + kStartRelevance + kCompleteRelevance); | 361 raw_score /= (kOrderRelevance + kStartRelevance + kCompleteRelevance); |
341 | 362 |
342 // Scale the raw score into a single score component in the same manner as | 363 // Scale the raw score into a single score component in the same manner as |
343 // used in ScoredMatchForURL(). | 364 // used in ScoredMatchForURL(). |
344 const int kTermScoreLevel[] = { 1000, 750, 500, 200 }; | 365 const int kTermScoreLevel[] = { 1000, 750, 500, 200 }; |
345 return ScoreForValue(raw_score, kTermScoreLevel); | 366 return ScoreForValue(raw_score, kTermScoreLevel); |
346 } | 367 } |
347 | 368 |
348 // static | 369 // static |
370 void ScoredHistoryMatch::MakeTermMatchesOnlyAtWordBoundaries( | |
Bart N.
2012/11/29 17:32:26
I think this particular method is worth a unit tes
Mark P
2012/11/29 19:36:48
Done.
| |
371 const TermMatches& matches, | |
372 const WordStarts& word_starts, | |
373 TermMatches* matches_at_word_boundaries) { | |
374 matches_at_word_boundaries->clear(); | |
375 // Resize it to an upper-bound estimate of the correct size. | |
376 matches_at_word_boundaries->reserve(matches.size()); | |
377 std::vector<size_t>::const_iterator next_word_starts = word_starts.begin(), | |
378 end_word_starts = word_starts.end(); | |
379 for (TermMatches::const_iterator iter = matches.begin(); | |
380 iter != matches.end(); ++iter) { | |
381 // Advance next_word_starts until it's >= the position of the term | |
382 // we're considering. | |
383 while ((next_word_starts != end_word_starts) && | |
384 (*next_word_starts < iter->offset)) { | |
385 ++next_word_starts; | |
386 } | |
387 if ((next_word_starts != end_word_starts) && | |
388 (*next_word_starts == iter->offset)) { | |
389 // At word boundary: copy this element into |matches|. | |
390 matches_at_word_boundaries->push_back(*iter); | |
391 } | |
392 } | |
393 } | |
394 | |
395 // static | |
349 int ScoredHistoryMatch::ScoreForValue(int value, const int* value_ranks) { | 396 int ScoredHistoryMatch::ScoreForValue(int value, const int* value_ranks) { |
350 int i = 0; | 397 int i = 0; |
351 int rank_count = arraysize(kScoreRank); | 398 int rank_count = arraysize(kScoreRank); |
352 while ((i < rank_count) && ((value_ranks[0] < value_ranks[1]) ? | 399 while ((i < rank_count) && ((value_ranks[0] < value_ranks[1]) ? |
353 (value > value_ranks[i]) : (value < value_ranks[i]))) | 400 (value > value_ranks[i]) : (value < value_ranks[i]))) |
354 ++i; | 401 ++i; |
355 if (i >= rank_count) | 402 if (i >= rank_count) |
356 return 0; | 403 return 0; |
357 int score = kScoreRank[i]; | 404 int score = kScoreRank[i]; |
358 if (i > 0) { | 405 if (i > 0) { |
(...skipping 291 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
650 } | 697 } |
651 | 698 |
652 // Add a beacon to the logs that'll allow us to identify later what | 699 // Add a beacon to the logs that'll allow us to identify later what |
653 // new scoring state a user is in. Do this by incrementing a bucket in | 700 // new scoring state a user is in. Do this by incrementing a bucket in |
654 // a histogram, where the bucket represents the user's new scoring state. | 701 // a histogram, where the bucket represents the user's new scoring state. |
655 UMA_HISTOGRAM_ENUMERATION( | 702 UMA_HISTOGRAM_ENUMERATION( |
656 "Omnibox.HistoryQuickProviderNewScoringFieldTrialBeacon", | 703 "Omnibox.HistoryQuickProviderNewScoringFieldTrialBeacon", |
657 new_scoring_option, NUM_OPTIONS); | 704 new_scoring_option, NUM_OPTIONS); |
658 } | 705 } |
659 | 706 |
707 void ScoredHistoryMatch::InitializeOnlyCountMatchesAtWordBoundariesField() { | |
708 only_count_matches_at_word_boundaries = | |
709 AutocompleteFieldTrial:: | |
710 InHQPOnlyCountMatchesAtWordBoundariesFieldTrial() && | |
711 AutocompleteFieldTrial:: | |
712 InHQPOnlyCountMatchesAtWordBoundariesFieldTrialExperimentGroup(); | |
713 } | |
714 | |
660 void ScoredHistoryMatch::InitializeAlsoDoHUPLikeScoringField() { | 715 void ScoredHistoryMatch::InitializeAlsoDoHUPLikeScoringField() { |
661 also_do_hup_like_scoring = | 716 also_do_hup_like_scoring = |
662 AutocompleteFieldTrial::InHQPReplaceHUPScoringFieldTrial() && | 717 AutocompleteFieldTrial::InHQPReplaceHUPScoringFieldTrial() && |
663 AutocompleteFieldTrial::InHQPReplaceHUPScoringFieldTrialExperimentGroup(); | 718 AutocompleteFieldTrial::InHQPReplaceHUPScoringFieldTrialExperimentGroup(); |
664 } | 719 } |
665 | 720 |
666 } // namespace history | 721 } // namespace history |
OLD | NEW |