chrome/browser/history/scored_history_match.cc - Issue 11421139: Omnibox: Create Field Trial for HQP to Ignore Mid-Word Matches

Side by Side Diff: chrome/browser/history/scored_history_match.cc

Issue 11421139: Omnibox: Create Field Trial for HQP to Ignore Mid-Word Matches (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Created 8 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "chrome/browser/history/scored_history_match.h"	5 #include "chrome/browser/history/scored_history_match.h"

6	6

7 #include <algorithm>	7 #include <algorithm>

8 #include <functional>	8 #include <functional>

9 #include <iterator>	9 #include <iterator>

10 #include <numeric>	10 #include <numeric>

(...skipping 38 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
49 // counts this score, which then gets modified slightly. This number	49 // counts this score, which then gets modified slightly. This number

50 // is derived from the score of 900 + kMaxMatches used in HistoryURL	50 // is derived from the score of 900 + kMaxMatches used in HistoryURL

51 // provider's CalculateRelevance() function for matches that are not	51 // provider's CalculateRelevance() function for matches that are not

52 // worthy of being inline autopleted.	52 // worthy of being inline autopleted.

53 const int kBaseScoreForUntypedResultsInHUPLikeScoring = 900;	53 const int kBaseScoreForUntypedResultsInHUPLikeScoring = 900;

54	54

55 // ScoredHistoryMatch ----------------------------------------------------------	55 // ScoredHistoryMatch ----------------------------------------------------------

56	56

57 bool ScoredHistoryMatch::initialized_ = false;	57 bool ScoredHistoryMatch::initialized_ = false;

58 bool ScoredHistoryMatch::use_new_scoring = false;	58 bool ScoredHistoryMatch::use_new_scoring = false;

	59 bool ScoredHistoryMatch::only_count_matches_at_word_boundaries = false;

59 bool ScoredHistoryMatch::also_do_hup_like_scoring = false;	60 bool ScoredHistoryMatch::also_do_hup_like_scoring = false;

60	61

61 ScoredHistoryMatch::ScoredHistoryMatch()	62 ScoredHistoryMatch::ScoredHistoryMatch()

62 : raw_score(0),	63 : raw_score(0),

63 can_inline(false) {	64 can_inline(false) {

64 if (!initialized_) {	65 if (!initialized_) {

65 InitializeNewScoringField();	66 InitializeNewScoringField();

	67 InitializeOnlyCountMatchesAtWordBoundariesField();

66 InitializeAlsoDoHUPLikeScoringField();	68 InitializeAlsoDoHUPLikeScoringField();

67 initialized_ = true;	69 initialized_ = true;

68 }	70 }

69 }	71 }

70	72

71 ScoredHistoryMatch::ScoredHistoryMatch(const URLRow& row,	73 ScoredHistoryMatch::ScoredHistoryMatch(const URLRow& row,

72 const string16& lower_string,	74 const string16& lower_string,

73 const String16Vector& terms,	75 const String16Vector& terms,

74 const RowWordStarts& word_starts,	76 const RowWordStarts& word_starts,

75 const base::Time now,	77 const base::Time now,

76 BookmarkService* bookmark_service)	78 BookmarkService* bookmark_service)

77 : HistoryMatch(row, 0, false, false),	79 : HistoryMatch(row, 0, false, false),

78 raw_score(0),	80 raw_score(0),

79 can_inline(false) {	81 can_inline(false) {

80 if (!initialized_) {	82 if (!initialized_) {

81 InitializeNewScoringField();	83 InitializeNewScoringField();

	84 InitializeOnlyCountMatchesAtWordBoundariesField();

82 InitializeAlsoDoHUPLikeScoringField();	85 InitializeAlsoDoHUPLikeScoringField();

83 initialized_ = true;	86 initialized_ = true;

84 }	87 }

85	88

86 GURL gurl = row.url();	89 GURL gurl = row.url();

87 if (!gurl.is_valid())	90 if (!gurl.is_valid())

88 return;	91 return;

89	92

90 // Figure out where each search term appears in the URL and/or page title	93 // Figure out where each search term appears in the URL and/or page title

91 // so that we can score as well as provide autocomplete highlighting.	94 // so that we can score as well as provide autocomplete highlighting.

(...skipping 62 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
154 // the most recent being within a day and the omnibox input term	157 // the most recent being within a day and the omnibox input term

155 // has a single URL hostname hit at a word boundary. Then this	158 // has a single URL hostname hit at a word boundary. Then this

156 // URL will score 1200 ( = 30 * 40.0).	159 // URL will score 1200 ( = 30 * 40.0).

157 raw_score = 40.0 * topicality_score * recency_score * popularity_score;	160 raw_score = 40.0 * topicality_score * recency_score * popularity_score;

158 raw_score =	161 raw_score =

159 (raw_score <= kint32max) ? static_cast<int>(raw_score) : kint32max;	162 (raw_score <= kint32max) ? static_cast<int>(raw_score) : kint32max;

160 } else { // "old" scoring	163 } else { // "old" scoring

161 // Get partial scores based on term matching. Note that the score for	164 // Get partial scores based on term matching. Note that the score for

162 // each of the URL and title are adjusted by the fraction of the	165 // each of the URL and title are adjusted by the fraction of the

163 // terms appearing in each.	166 // terms appearing in each.

164 int url_score = ScoreComponentForMatches(url_matches, url.length()) *	167 int url_score =

	168 ScoreComponentForMatches(url_matches, word_starts.url_word_starts_,

	169 url.length()) *

165 std::min(url_matches.size(), terms.size()) / terms.size();	170 std::min(url_matches.size(), terms.size()) / terms.size();

166 int title_score =	171 int title_score =

167 ScoreComponentForMatches(title_matches, title.length()) *	172 ScoreComponentForMatches(title_matches, word_starts.title_word_starts_,

	173 title.length()) *

168 std::min(title_matches.size(), terms.size()) / terms.size();	174 std::min(title_matches.size(), terms.size()) / terms.size();

169 // Arbitrarily pick the best.	175 // Arbitrarily pick the best.

170 // TODO(mrossetti): It might make sense that a term which appears in both	176 // TODO(mrossetti): It might make sense that a term which appears in both

171 // the URL and the Title should boost the score a bit.	177 // the URL and the Title should boost the score a bit.

172 int term_score = std::max(url_score, title_score);	178 int term_score = std::max(url_score, title_score);

173 if (term_score == 0)	179 if (term_score == 0)

174 return;	180 return;

175	181

176 // Determine scoring factors for the recency of visit, visit count and typed	182 // Determine scoring factors for the recency of visit, visit count and typed

177 // count attributes of the URLRow.	183 // count attributes of the URLRow.

(...skipping 102 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
280	286

281 ScoredHistoryMatch::~ScoredHistoryMatch() {}	287 ScoredHistoryMatch::~ScoredHistoryMatch() {}

282	288

283 // std::accumulate helper function to add up TermMatches' lengths as used in	289 // std::accumulate helper function to add up TermMatches' lengths as used in

284 // ScoreComponentForMatches	290 // ScoreComponentForMatches

285 int AccumulateMatchLength(int total, const TermMatch& match) {	291 int AccumulateMatchLength(int total, const TermMatch& match) {

286 return total + match.length;	292 return total + match.length;

287 }	293 }

288	294

289 // static	295 // static

290 int ScoredHistoryMatch::ScoreComponentForMatches(const TermMatches& matches,	296 int ScoredHistoryMatch::ScoreComponentForMatches(

291 size_t max_length) {	297 const TermMatches& provided_matches,

	298 const WordStarts& word_starts,

	299 size_t max_length) {

	300 if (provided_matches.empty())

	301 return 0;

	302

	303 TermMatches matches_at_word_boundaries;

	304 if (only_count_matches_at_word_boundaries) {

	305 MakeTermMatchesOnlyAtWordBoundaries(

	306 provided_matches, word_starts, &matches_at_word_boundaries);

	307 }

	308 // The actual matches we'll use for matching. This is \|provided_matches\|

	309 // with all the matches not at a word boundary removed (if told to do so).

	310 const TermMatches& matches = only_count_matches_at_word_boundaries ?

	311 matches_at_word_boundaries : provided_matches;

	312

292 if (matches.empty())	313 if (matches.empty())

293 return 0;	314 return 0;

294	315

295 // Score component for whether the input terms (if more than one) were found	316 // Score component for whether the input terms (if more than one) were found

296 // in the same order in the match. Start with kOrderMaxValue points divided	317 // in the same order in the match. Start with kOrderMaxValue points divided

297 // equally among (number of terms - 1); then discount each of those terms that	318 // equally among (number of terms - 1); then discount each of those terms that

298 // is out-of-order in the match.	319 // is out-of-order in the match.

299 const int kOrderMaxValue = 1000;	320 const int kOrderMaxValue = 1000;

300 int order_value = kOrderMaxValue;	321 int order_value = kOrderMaxValue;

301 if (matches.size() > 1) {	322 if (matches.size() > 1) {

(...skipping 37 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
339 complete_value * kCompleteRelevance;	360 complete_value * kCompleteRelevance;

340 raw_score /= (kOrderRelevance + kStartRelevance + kCompleteRelevance);	361 raw_score /= (kOrderRelevance + kStartRelevance + kCompleteRelevance);

341	362

342 // Scale the raw score into a single score component in the same manner as	363 // Scale the raw score into a single score component in the same manner as

343 // used in ScoredMatchForURL().	364 // used in ScoredMatchForURL().

344 const int kTermScoreLevel[] = { 1000, 750, 500, 200 };	365 const int kTermScoreLevel[] = { 1000, 750, 500, 200 };

345 return ScoreForValue(raw_score, kTermScoreLevel);	366 return ScoreForValue(raw_score, kTermScoreLevel);

346 }	367 }

347	368

348 // static	369 // static

	370 void ScoredHistoryMatch::MakeTermMatchesOnlyAtWordBoundaries(
	Bart N. 2012/11/29 17:32:26 I think this particular method is worth a unit tes I think this particular method is worth a unit test, as it's non trivial. Mark P 2012/11/29 19:36:48 Done. Show quoted text On 2012/11/29 17:32:26, Bart N. wrote: > I think this particular method is worth a unit test, as it's non trivial. Done.
	371 const TermMatches& matches,

	372 const WordStarts& word_starts,

	373 TermMatches* matches_at_word_boundaries) {

	374 matches_at_word_boundaries->clear();

	375 // Resize it to an upper-bound estimate of the correct size.

	376 matches_at_word_boundaries->reserve(matches.size());

	377 std::vector<size_t>::const_iterator next_word_starts = word_starts.begin(),

	378 end_word_starts = word_starts.end();

	379 for (TermMatches::const_iterator iter = matches.begin();

	380 iter != matches.end(); ++iter) {

	381 // Advance next_word_starts until it's >= the position of the term

	382 // we're considering.

	383 while ((next_word_starts != end_word_starts) &&

	384 (*next_word_starts < iter->offset)) {

	385 ++next_word_starts;

	386 }

	387 if ((next_word_starts != end_word_starts) &&

	388 (*next_word_starts == iter->offset)) {

	389 // At word boundary: copy this element into \|matches\|.

	390 matches_at_word_boundaries->push_back(*iter);

	391 }

	392 }

	393 }

	394

	395 // static

349 int ScoredHistoryMatch::ScoreForValue(int value, const int* value_ranks) {	396 int ScoredHistoryMatch::ScoreForValue(int value, const int* value_ranks) {

350 int i = 0;	397 int i = 0;

351 int rank_count = arraysize(kScoreRank);	398 int rank_count = arraysize(kScoreRank);

352 while ((i < rank_count) && ((value_ranks[0] < value_ranks[1]) ?	399 while ((i < rank_count) && ((value_ranks[0] < value_ranks[1]) ?

353 (value > value_ranks[i]) : (value < value_ranks[i])))	400 (value > value_ranks[i]) : (value < value_ranks[i])))

354 ++i;	401 ++i;

355 if (i >= rank_count)	402 if (i >= rank_count)

356 return 0;	403 return 0;

357 int score = kScoreRank[i];	404 int score = kScoreRank[i];

358 if (i > 0) {	405 if (i > 0) {

(...skipping 291 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
650 }	697 }

651	698

652 // Add a beacon to the logs that'll allow us to identify later what	699 // Add a beacon to the logs that'll allow us to identify later what

653 // new scoring state a user is in. Do this by incrementing a bucket in	700 // new scoring state a user is in. Do this by incrementing a bucket in

654 // a histogram, where the bucket represents the user's new scoring state.	701 // a histogram, where the bucket represents the user's new scoring state.

655 UMA_HISTOGRAM_ENUMERATION(	702 UMA_HISTOGRAM_ENUMERATION(

656 "Omnibox.HistoryQuickProviderNewScoringFieldTrialBeacon",	703 "Omnibox.HistoryQuickProviderNewScoringFieldTrialBeacon",

657 new_scoring_option, NUM_OPTIONS);	704 new_scoring_option, NUM_OPTIONS);

658 }	705 }

659	706

	707 void ScoredHistoryMatch::InitializeOnlyCountMatchesAtWordBoundariesField() {

	708 only_count_matches_at_word_boundaries =

	709 AutocompleteFieldTrial::

	710 InHQPOnlyCountMatchesAtWordBoundariesFieldTrial() &&

	711 AutocompleteFieldTrial::

	712 InHQPOnlyCountMatchesAtWordBoundariesFieldTrialExperimentGroup();

	713 }

	714

660 void ScoredHistoryMatch::InitializeAlsoDoHUPLikeScoringField() {	715 void ScoredHistoryMatch::InitializeAlsoDoHUPLikeScoringField() {

661 also_do_hup_like_scoring =	716 also_do_hup_like_scoring =

662 AutocompleteFieldTrial::InHQPReplaceHUPScoringFieldTrial() &&	717 AutocompleteFieldTrial::InHQPReplaceHUPScoringFieldTrial() &&

663 AutocompleteFieldTrial::InHQPReplaceHUPScoringFieldTrialExperimentGroup();	718 AutocompleteFieldTrial::InHQPReplaceHUPScoringFieldTrialExperimentGroup();

664 }	719 }

665	720

666 } // namespace history	721 } // namespace history

OLD	NEW

« chrome/browser/history/scored_history_match.h ('K') | « chrome/browser/history/scored_history_match.h ('k') | no next file » | no next file with comments »