Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(551)

Side by Side Diff: chrome/browser/history/scored_history_match.cc

Issue 11421139: Omnibox: Create Field Trial for HQP to Ignore Mid-Word Matches (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Created 8 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/history/scored_history_match.h" 5 #include "chrome/browser/history/scored_history_match.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <functional> 8 #include <functional>
9 #include <iterator> 9 #include <iterator>
10 #include <numeric> 10 #include <numeric>
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
49 // counts this score, which then gets modified slightly. This number 49 // counts this score, which then gets modified slightly. This number
50 // is derived from the score of 900 + kMaxMatches used in HistoryURL 50 // is derived from the score of 900 + kMaxMatches used in HistoryURL
51 // provider's CalculateRelevance() function for matches that are not 51 // provider's CalculateRelevance() function for matches that are not
52 // worthy of being inline autopleted. 52 // worthy of being inline autopleted.
53 const int kBaseScoreForUntypedResultsInHUPLikeScoring = 900; 53 const int kBaseScoreForUntypedResultsInHUPLikeScoring = 900;
54 54
55 // ScoredHistoryMatch ---------------------------------------------------------- 55 // ScoredHistoryMatch ----------------------------------------------------------
56 56
57 bool ScoredHistoryMatch::initialized_ = false; 57 bool ScoredHistoryMatch::initialized_ = false;
58 bool ScoredHistoryMatch::use_new_scoring = false; 58 bool ScoredHistoryMatch::use_new_scoring = false;
59 bool ScoredHistoryMatch::only_count_matches_at_word_boundaries = false;
59 bool ScoredHistoryMatch::also_do_hup_like_scoring = false; 60 bool ScoredHistoryMatch::also_do_hup_like_scoring = false;
60 61
61 ScoredHistoryMatch::ScoredHistoryMatch() 62 ScoredHistoryMatch::ScoredHistoryMatch()
62 : raw_score(0), 63 : raw_score(0),
63 can_inline(false) { 64 can_inline(false) {
64 if (!initialized_) { 65 if (!initialized_) {
65 InitializeNewScoringField(); 66 InitializeNewScoringField();
67 InitializeOnlyCountMatchesAtWordBoundariesField();
66 InitializeAlsoDoHUPLikeScoringField(); 68 InitializeAlsoDoHUPLikeScoringField();
67 initialized_ = true; 69 initialized_ = true;
68 } 70 }
69 } 71 }
70 72
71 ScoredHistoryMatch::ScoredHistoryMatch(const URLRow& row, 73 ScoredHistoryMatch::ScoredHistoryMatch(const URLRow& row,
72 const string16& lower_string, 74 const string16& lower_string,
73 const String16Vector& terms, 75 const String16Vector& terms,
74 const RowWordStarts& word_starts, 76 const RowWordStarts& word_starts,
75 const base::Time now, 77 const base::Time now,
76 BookmarkService* bookmark_service) 78 BookmarkService* bookmark_service)
77 : HistoryMatch(row, 0, false, false), 79 : HistoryMatch(row, 0, false, false),
78 raw_score(0), 80 raw_score(0),
79 can_inline(false) { 81 can_inline(false) {
80 if (!initialized_) { 82 if (!initialized_) {
81 InitializeNewScoringField(); 83 InitializeNewScoringField();
84 InitializeOnlyCountMatchesAtWordBoundariesField();
82 InitializeAlsoDoHUPLikeScoringField(); 85 InitializeAlsoDoHUPLikeScoringField();
83 initialized_ = true; 86 initialized_ = true;
84 } 87 }
85 88
86 GURL gurl = row.url(); 89 GURL gurl = row.url();
87 if (!gurl.is_valid()) 90 if (!gurl.is_valid())
88 return; 91 return;
89 92
90 // Figure out where each search term appears in the URL and/or page title 93 // Figure out where each search term appears in the URL and/or page title
91 // so that we can score as well as provide autocomplete highlighting. 94 // so that we can score as well as provide autocomplete highlighting.
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after
154 // the most recent being within a day and the omnibox input term 157 // the most recent being within a day and the omnibox input term
155 // has a single URL hostname hit at a word boundary. Then this 158 // has a single URL hostname hit at a word boundary. Then this
156 // URL will score 1200 ( = 30 * 40.0). 159 // URL will score 1200 ( = 30 * 40.0).
157 raw_score = 40.0 * topicality_score * recency_score * popularity_score; 160 raw_score = 40.0 * topicality_score * recency_score * popularity_score;
158 raw_score = 161 raw_score =
159 (raw_score <= kint32max) ? static_cast<int>(raw_score) : kint32max; 162 (raw_score <= kint32max) ? static_cast<int>(raw_score) : kint32max;
160 } else { // "old" scoring 163 } else { // "old" scoring
161 // Get partial scores based on term matching. Note that the score for 164 // Get partial scores based on term matching. Note that the score for
162 // each of the URL and title are adjusted by the fraction of the 165 // each of the URL and title are adjusted by the fraction of the
163 // terms appearing in each. 166 // terms appearing in each.
164 int url_score = ScoreComponentForMatches(url_matches, url.length()) * 167 int url_score =
168 ScoreComponentForMatches(url_matches, word_starts.url_word_starts_,
169 url.length()) *
165 std::min(url_matches.size(), terms.size()) / terms.size(); 170 std::min(url_matches.size(), terms.size()) / terms.size();
166 int title_score = 171 int title_score =
167 ScoreComponentForMatches(title_matches, title.length()) * 172 ScoreComponentForMatches(title_matches, word_starts.title_word_starts_,
173 title.length()) *
168 std::min(title_matches.size(), terms.size()) / terms.size(); 174 std::min(title_matches.size(), terms.size()) / terms.size();
169 // Arbitrarily pick the best. 175 // Arbitrarily pick the best.
170 // TODO(mrossetti): It might make sense that a term which appears in both 176 // TODO(mrossetti): It might make sense that a term which appears in both
171 // the URL and the Title should boost the score a bit. 177 // the URL and the Title should boost the score a bit.
172 int term_score = std::max(url_score, title_score); 178 int term_score = std::max(url_score, title_score);
173 if (term_score == 0) 179 if (term_score == 0)
174 return; 180 return;
175 181
176 // Determine scoring factors for the recency of visit, visit count and typed 182 // Determine scoring factors for the recency of visit, visit count and typed
177 // count attributes of the URLRow. 183 // count attributes of the URLRow.
(...skipping 102 matching lines...) Expand 10 before | Expand all | Expand 10 after
280 286
281 ScoredHistoryMatch::~ScoredHistoryMatch() {} 287 ScoredHistoryMatch::~ScoredHistoryMatch() {}
282 288
283 // std::accumulate helper function to add up TermMatches' lengths as used in 289 // std::accumulate helper function to add up TermMatches' lengths as used in
284 // ScoreComponentForMatches 290 // ScoreComponentForMatches
285 int AccumulateMatchLength(int total, const TermMatch& match) { 291 int AccumulateMatchLength(int total, const TermMatch& match) {
286 return total + match.length; 292 return total + match.length;
287 } 293 }
288 294
289 // static 295 // static
290 int ScoredHistoryMatch::ScoreComponentForMatches(const TermMatches& matches, 296 int ScoredHistoryMatch::ScoreComponentForMatches(
291 size_t max_length) { 297 const TermMatches& provided_matches,
298 const WordStarts& word_starts,
299 size_t max_length) {
300 if (provided_matches.empty())
301 return 0;
302
303 TermMatches matches_at_word_boundaries;
304 if (only_count_matches_at_word_boundaries) {
305 MakeTermMatchesOnlyAtWordBoundaries(
306 provided_matches, word_starts, &matches_at_word_boundaries);
307 }
308 // The actual matches we'll use for matching. This is |provided_matches|
309 // with all the matches not at a word boundary removed (if told to do so).
310 const TermMatches& matches = only_count_matches_at_word_boundaries ?
311 matches_at_word_boundaries : provided_matches;
312
292 if (matches.empty()) 313 if (matches.empty())
293 return 0; 314 return 0;
294 315
295 // Score component for whether the input terms (if more than one) were found 316 // Score component for whether the input terms (if more than one) were found
296 // in the same order in the match. Start with kOrderMaxValue points divided 317 // in the same order in the match. Start with kOrderMaxValue points divided
297 // equally among (number of terms - 1); then discount each of those terms that 318 // equally among (number of terms - 1); then discount each of those terms that
298 // is out-of-order in the match. 319 // is out-of-order in the match.
299 const int kOrderMaxValue = 1000; 320 const int kOrderMaxValue = 1000;
300 int order_value = kOrderMaxValue; 321 int order_value = kOrderMaxValue;
301 if (matches.size() > 1) { 322 if (matches.size() > 1) {
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
339 complete_value * kCompleteRelevance; 360 complete_value * kCompleteRelevance;
340 raw_score /= (kOrderRelevance + kStartRelevance + kCompleteRelevance); 361 raw_score /= (kOrderRelevance + kStartRelevance + kCompleteRelevance);
341 362
342 // Scale the raw score into a single score component in the same manner as 363 // Scale the raw score into a single score component in the same manner as
343 // used in ScoredMatchForURL(). 364 // used in ScoredMatchForURL().
344 const int kTermScoreLevel[] = { 1000, 750, 500, 200 }; 365 const int kTermScoreLevel[] = { 1000, 750, 500, 200 };
345 return ScoreForValue(raw_score, kTermScoreLevel); 366 return ScoreForValue(raw_score, kTermScoreLevel);
346 } 367 }
347 368
348 // static 369 // static
370 void ScoredHistoryMatch::MakeTermMatchesOnlyAtWordBoundaries(
Bart N. 2012/11/29 17:32:26 I think this particular method is worth a unit tes
Mark P 2012/11/29 19:36:48 Done.
371 const TermMatches& matches,
372 const WordStarts& word_starts,
373 TermMatches* matches_at_word_boundaries) {
374 matches_at_word_boundaries->clear();
375 // Resize it to an upper-bound estimate of the correct size.
376 matches_at_word_boundaries->reserve(matches.size());
377 std::vector<size_t>::const_iterator next_word_starts = word_starts.begin(),
378 end_word_starts = word_starts.end();
379 for (TermMatches::const_iterator iter = matches.begin();
380 iter != matches.end(); ++iter) {
381 // Advance next_word_starts until it's >= the position of the term
382 // we're considering.
383 while ((next_word_starts != end_word_starts) &&
384 (*next_word_starts < iter->offset)) {
385 ++next_word_starts;
386 }
387 if ((next_word_starts != end_word_starts) &&
388 (*next_word_starts == iter->offset)) {
389 // At word boundary: copy this element into |matches|.
390 matches_at_word_boundaries->push_back(*iter);
391 }
392 }
393 }
394
395 // static
349 int ScoredHistoryMatch::ScoreForValue(int value, const int* value_ranks) { 396 int ScoredHistoryMatch::ScoreForValue(int value, const int* value_ranks) {
350 int i = 0; 397 int i = 0;
351 int rank_count = arraysize(kScoreRank); 398 int rank_count = arraysize(kScoreRank);
352 while ((i < rank_count) && ((value_ranks[0] < value_ranks[1]) ? 399 while ((i < rank_count) && ((value_ranks[0] < value_ranks[1]) ?
353 (value > value_ranks[i]) : (value < value_ranks[i]))) 400 (value > value_ranks[i]) : (value < value_ranks[i])))
354 ++i; 401 ++i;
355 if (i >= rank_count) 402 if (i >= rank_count)
356 return 0; 403 return 0;
357 int score = kScoreRank[i]; 404 int score = kScoreRank[i];
358 if (i > 0) { 405 if (i > 0) {
(...skipping 291 matching lines...) Expand 10 before | Expand all | Expand 10 after
650 } 697 }
651 698
652 // Add a beacon to the logs that'll allow us to identify later what 699 // Add a beacon to the logs that'll allow us to identify later what
653 // new scoring state a user is in. Do this by incrementing a bucket in 700 // new scoring state a user is in. Do this by incrementing a bucket in
654 // a histogram, where the bucket represents the user's new scoring state. 701 // a histogram, where the bucket represents the user's new scoring state.
655 UMA_HISTOGRAM_ENUMERATION( 702 UMA_HISTOGRAM_ENUMERATION(
656 "Omnibox.HistoryQuickProviderNewScoringFieldTrialBeacon", 703 "Omnibox.HistoryQuickProviderNewScoringFieldTrialBeacon",
657 new_scoring_option, NUM_OPTIONS); 704 new_scoring_option, NUM_OPTIONS);
658 } 705 }
659 706
707 void ScoredHistoryMatch::InitializeOnlyCountMatchesAtWordBoundariesField() {
708 only_count_matches_at_word_boundaries =
709 AutocompleteFieldTrial::
710 InHQPOnlyCountMatchesAtWordBoundariesFieldTrial() &&
711 AutocompleteFieldTrial::
712 InHQPOnlyCountMatchesAtWordBoundariesFieldTrialExperimentGroup();
713 }
714
660 void ScoredHistoryMatch::InitializeAlsoDoHUPLikeScoringField() { 715 void ScoredHistoryMatch::InitializeAlsoDoHUPLikeScoringField() {
661 also_do_hup_like_scoring = 716 also_do_hup_like_scoring =
662 AutocompleteFieldTrial::InHQPReplaceHUPScoringFieldTrial() && 717 AutocompleteFieldTrial::InHQPReplaceHUPScoringFieldTrial() &&
663 AutocompleteFieldTrial::InHQPReplaceHUPScoringFieldTrialExperimentGroup(); 718 AutocompleteFieldTrial::InHQPReplaceHUPScoringFieldTrialExperimentGroup();
664 } 719 }
665 720
666 } // namespace history 721 } // namespace history
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698