components/omnibox/browser/scored_history_match.cc - Issue 2548363010: Omnibox - Refactor |relevance_buckets| to Remove Memory Leak on Exit

Side by Side Diff: components/omnibox/browser/scored_history_match.cc

Issue 2548363010: Omnibox - Refactor |relevance_buckets| to Remove Memory Leak on Exit (Closed)

Patch Set: polish Created 4 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

« components/omnibox/browser/scored_history_match.h ('K') | « components/omnibox/browser/scored_history_match.h ('k') | components/omnibox/browser/scored_history_match_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "components/omnibox/browser/scored_history_match.h"	5 #include "components/omnibox/browser/scored_history_match.h"

6	6

7 #include <math.h>	7 #include <math.h>

8	8

9 #include <algorithm>	9 #include <algorithm>

10 #include <vector>	10 #include <vector>

11	11

12 #include "base/logging.h"	12 #include "base/logging.h"

	13 #include "base/macros.h"

13 #include "base/numerics/safe_conversions.h"	14 #include "base/numerics/safe_conversions.h"

14 #include "base/strings/string_number_conversions.h"	15 #include "base/strings/string_number_conversions.h"

15 #include "base/strings/string_split.h"	16 #include "base/strings/string_split.h"

16 #include "base/strings/string_util.h"	17 #include "base/strings/string_util.h"

17 #include "base/strings/utf_offset_string_conversions.h"	18 #include "base/strings/utf_offset_string_conversions.h"

18 #include "base/strings/utf_string_conversions.h"	19 #include "base/strings/utf_string_conversions.h"

19 #include "components/bookmarks/browser/bookmark_utils.h"	20 #include "components/bookmarks/browser/bookmark_utils.h"

20 #include "components/omnibox/browser/history_url_provider.h"	21 #include "components/omnibox/browser/history_url_provider.h"

21 #include "components/omnibox/browser/omnibox_field_trial.h"	22 #include "components/omnibox/browser/omnibox_field_trial.h"

22 #include "components/omnibox/browser/url_prefix.h"	23 #include "components/omnibox/browser/url_prefix.h"

23	24

24 namespace {	25 namespace {

25	26

26 // The number of days of recency scores to precompute.	27 // The number of days of recency scores to precompute.

27 const int kDaysToPrecomputeRecencyScoresFor = 366;	28 const int kDaysToPrecomputeRecencyScoresFor = 366;

28	29

29 // The number of raw term score buckets use; raw term scores greater this are	30 // The number of raw term score buckets use; raw term scores greater this are

30 // capped at the score of the largest bucket.	31 // capped at the score of the largest bucket.

31 const int kMaxRawTermScore = 30;	32 const int kMaxRawTermScore = 30;

32	33

	34 // Default relevance buckets. See GetFinalRelevancyScore() for more details on

	35 // these numbers.

	36 const char kDefaultRelevanceBucketsString[] =

	37 "0.0:400,1.5:600,5.0:900,10.5:1203,15.0:1300,20.0:1399";

	38

33 // Pre-computed information to speed up calculating recency scores.	39 // Pre-computed information to speed up calculating recency scores.

34 // \|days_ago_to_recency_score\| is a simple array mapping how long ago a page was	40 // \|days_ago_to_recency_score\| is a simple array mapping how long ago a page was

35 // visited (in days) to the recency score we should assign it. This allows easy	41 // visited (in days) to the recency score we should assign it. This allows easy

36 // lookups of scores without requiring math. This is initialized by	42 // lookups of scores without requiring math. This is initialized by

37 // InitDaysAgoToRecencyScoreArray called by	43 // InitDaysAgoToRecencyScoreArray called by

38 // ScoredHistoryMatch::Init().	44 // ScoredHistoryMatch::Init().

39 float days_ago_to_recency_score[kDaysToPrecomputeRecencyScoresFor];	45 float days_ago_to_recency_score[kDaysToPrecomputeRecencyScoresFor];

40	46

41 // Pre-computed information to speed up calculating topicality scores.	47 // Pre-computed information to speed up calculating topicality scores.

42 // \|raw_term_score_to_topicality_score\| is a simple array mapping how raw terms	48 // \|raw_term_score_to_topicality_score\| is a simple array mapping how raw terms

(...skipping 63 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
106 // static	112 // static

107 bool ScoredHistoryMatch::also_do_hup_like_scoring_;	113 bool ScoredHistoryMatch::also_do_hup_like_scoring_;

108 float ScoredHistoryMatch::bookmark_value_;	114 float ScoredHistoryMatch::bookmark_value_;

109 float ScoredHistoryMatch::typed_value_;	115 float ScoredHistoryMatch::typed_value_;

110 bool ScoredHistoryMatch::fix_few_visits_bug_;	116 bool ScoredHistoryMatch::fix_few_visits_bug_;

111 bool ScoredHistoryMatch::frequency_uses_sum_;	117 bool ScoredHistoryMatch::frequency_uses_sum_;

112 size_t ScoredHistoryMatch::max_visits_to_score_;	118 size_t ScoredHistoryMatch::max_visits_to_score_;

113 bool ScoredHistoryMatch::allow_tld_matches_;	119 bool ScoredHistoryMatch::allow_tld_matches_;

114 bool ScoredHistoryMatch::allow_scheme_matches_;	120 bool ScoredHistoryMatch::allow_scheme_matches_;

115 size_t ScoredHistoryMatch::num_title_words_to_allow_;	121 size_t ScoredHistoryMatch::num_title_words_to_allow_;

116 bool ScoredHistoryMatch::hqp_experimental_scoring_enabled_;	122 float ScoredHistoryMatch::topicality_threshold_;

117	123 ScoredHistoryMatch::ScoreMaxRelevances*

118 // Default topicality threshold. See GetTopicalityScore() for details.	124 ScoredHistoryMatch::relevance_buckets_override_ = nullptr;

119 float ScoredHistoryMatch::topicality_threshold_ = 0.8f;

120

121 // Default HQP relevance buckets. See GetFinalRelevancyScore() for more details

122 // on these numbers.

123 char ScoredHistoryMatch::hqp_relevance_buckets_str_[] =

124 "0.0:400,1.5:600,5.0:900,10.5:1203,15.0:1300,20.0:1399";

125 std::vector<ScoredHistoryMatch::ScoreMaxRelevance>*

126 ScoredHistoryMatch::hqp_relevance_buckets_ = nullptr;

127	125

128 ScoredHistoryMatch::ScoredHistoryMatch()	126 ScoredHistoryMatch::ScoredHistoryMatch()

129 : ScoredHistoryMatch(history::URLRow(),	127 : ScoredHistoryMatch(history::URLRow(),

130 VisitInfoVector(),	128 VisitInfoVector(),

131 base::string16(),	129 base::string16(),

132 String16Vector(),	130 String16Vector(),

133 WordStarts(),	131 WordStarts(),

134 RowWordStarts(),	132 RowWordStarts(),

135 false,	133 false,

136 base::Time::Max()) {	134 base::Time::Max()) {

(...skipping 117 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
254 likely_can_inline = true;	252 likely_can_inline = true;

255 innermost_match = (best_inlineable_prefix->num_components ==	253 innermost_match = (best_inlineable_prefix->num_components ==

256 best_prefix->num_components);	254 best_prefix->num_components);

257 }	255 }

258 }	256 }

259 }	257 }

260	258

261 const float topicality_score = GetTopicalityScore(	259 const float topicality_score = GetTopicalityScore(

262 terms_vector.size(), url, terms_to_word_starts_offsets, word_starts);	260 terms_vector.size(), url, terms_to_word_starts_offsets, word_starts);

263 const float frequency_score = GetFrequency(now, is_url_bookmarked, visits);	261 const float frequency_score = GetFrequency(now, is_url_bookmarked, visits);

264 raw_score = base::saturated_cast<int>(GetFinalRelevancyScore(	262 raw_score = base::saturated_cast<int>(

265 topicality_score, frequency_score, *hqp_relevance_buckets_));	263 GetFinalRelevancyScore(topicality_score, frequency_score));

266	264

267 if (also_do_hup_like_scoring_ && likely_can_inline) {	265 if (also_do_hup_like_scoring_ && likely_can_inline) {

268 // HistoryURL-provider-like scoring gives any match that is	266 // HistoryURL-provider-like scoring gives any match that is

269 // capable of being inlined a certain minimum score. Some of these	267 // capable of being inlined a certain minimum score. Some of these

270 // are given a higher score that lets them be shown in inline.	268 // are given a higher score that lets them be shown in inline.

271 // This test here derives from the test in	269 // This test here derives from the test in

272 // HistoryURLProvider::PromoteMatchForInlineAutocomplete().	270 // HistoryURLProvider::PromoteMatchForInlineAutocomplete().

273 const bool promote_to_inline =	271 const bool promote_to_inline =

274 (row.typed_count() > 1) \|\| (IsHostOnly() && (row.typed_count() == 1));	272 (row.typed_count() > 1) \|\| (IsHostOnly() && (row.typed_count() == 1));

275 int hup_like_score =	273 int hup_like_score =

(...skipping 132 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
408 initialized = true;	406 initialized = true;

409 also_do_hup_like_scoring_ = OmniboxFieldTrial::HQPAlsoDoHUPLikeScoring();	407 also_do_hup_like_scoring_ = OmniboxFieldTrial::HQPAlsoDoHUPLikeScoring();

410 bookmark_value_ = OmniboxFieldTrial::HQPBookmarkValue();	408 bookmark_value_ = OmniboxFieldTrial::HQPBookmarkValue();

411 typed_value_ = OmniboxFieldTrial::HQPTypedValue();	409 typed_value_ = OmniboxFieldTrial::HQPTypedValue();

412 max_visits_to_score_ = OmniboxFieldTrial::HQPMaxVisitsToScore();	410 max_visits_to_score_ = OmniboxFieldTrial::HQPMaxVisitsToScore();

413 frequency_uses_sum_ = OmniboxFieldTrial::HQPFreqencyUsesSum();	411 frequency_uses_sum_ = OmniboxFieldTrial::HQPFreqencyUsesSum();

414 fix_few_visits_bug_ = OmniboxFieldTrial::HQPFixFewVisitsBug();	412 fix_few_visits_bug_ = OmniboxFieldTrial::HQPFixFewVisitsBug();

415 allow_tld_matches_ = OmniboxFieldTrial::HQPAllowMatchInTLDValue();	413 allow_tld_matches_ = OmniboxFieldTrial::HQPAllowMatchInTLDValue();

416 allow_scheme_matches_ = OmniboxFieldTrial::HQPAllowMatchInSchemeValue();	414 allow_scheme_matches_ = OmniboxFieldTrial::HQPAllowMatchInSchemeValue();

417 num_title_words_to_allow_ = OmniboxFieldTrial::HQPNumTitleWordsToAllow();	415 num_title_words_to_allow_ = OmniboxFieldTrial::HQPNumTitleWordsToAllow();

	416 topicality_threshold_ =

	417 OmniboxFieldTrial::HQPExperimentalTopicalityThreshold();

418	418

419 InitRawTermScoreToTopicalityScoreArray();	419 InitRawTermScoreToTopicalityScoreArray();

420 InitDaysAgoToRecencyScoreArray();	420 InitDaysAgoToRecencyScoreArray();

421 InitHQPExperimentalParams();

422 }	421 }

423	422

424 float ScoredHistoryMatch::GetTopicalityScore(	423 float ScoredHistoryMatch::GetTopicalityScore(

425 const int num_terms,	424 const int num_terms,

426 const base::string16& url,	425 const base::string16& url,

427 const WordStarts& terms_to_word_starts_offsets,	426 const WordStarts& terms_to_word_starts_offsets,

428 const RowWordStarts& word_starts) {	427 const RowWordStarts& word_starts) {

429 // A vector that accumulates per-term scores. The strongest match--a	428 // A vector that accumulates per-term scores. The strongest match--a

430 // match in the hostname at a word boundary--is worth 10 points.	429 // match in the hostname at a word boundary--is worth 10 points.

431 // Everything else is less. In general, a match that's not at a word	430 // Everything else is less. In general, a match that's not at a word

(...skipping 178 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
610 // Use \|max_visits_to_score_\| as the denominator for the average regardless of	609 // Use \|max_visits_to_score_\| as the denominator for the average regardless of

611 // how many visits there were in order to penalize a match that has	610 // how many visits there were in order to penalize a match that has

612 // fewer visits than kMaxVisitsToScore.	611 // fewer visits than kMaxVisitsToScore.

613 if (fix_few_visits_bug_)	612 if (fix_few_visits_bug_)

614 return summed_visit_points / ScoredHistoryMatch::max_visits_to_score_;	613 return summed_visit_points / ScoredHistoryMatch::max_visits_to_score_;

615 return visits.size() * summed_visit_points /	614 return visits.size() * summed_visit_points /

616 ScoredHistoryMatch::max_visits_to_score_;	615 ScoredHistoryMatch::max_visits_to_score_;

617 }	616 }

618	617

619 // static	618 // static

620 float ScoredHistoryMatch::GetFinalRelevancyScore(	619 float ScoredHistoryMatch::GetFinalRelevancyScore(float topicality_score,

621 float topicality_score,	620 float frequency_score) {

622 float frequency_score,	621 // \|relevance_buckets\| gives a mapping from intemerdiate score to the final

623 const std::vector<ScoreMaxRelevance>& hqp_relevance_buckets) {	622 // relevance score.

624 DCHECK(hqp_relevance_buckets.size() > 0);	623 CR_DEFINE_STATIC_LOCAL(ScoreMaxRelevances, default_relevance_buckets,

625 DCHECK_EQ(hqp_relevance_buckets[0].first, 0.0);	624 (GetHQPBuckets()));

	625 ScoreMaxRelevances* relevance_buckets = relevance_buckets_override_

	626 ? relevance_buckets_override_ : &default_relevance_buckets;
	Peter Kasting 2016/12/10 02:31:15 Nit: Bad indenting (See what git cl format would d Nit: Bad indenting (See what git cl format would do) Mark P 2016/12/11 04:57:22 Done. Show quoted text On 2016/12/10 02:31:15, Peter Kasting wrote: > Nit: Bad indenting (See what git cl format would do) Done.
	627 DCHECK(!relevance_buckets->empty());

	628 DCHECK_EQ(0.0, (*relevance_buckets)[0].first);

626	629

627 if (topicality_score == 0)	630 if (topicality_score == 0)

628 return 0;	631 return 0;

629 // Here's how to interpret intermediate_score: Suppose the omnibox	632 // Here's how to interpret intermediate_score: Suppose the omnibox

630 // has one input term. Suppose we have a URL for which the omnibox	633 // has one input term. Suppose we have a URL for which the omnibox

631 // input term has a single URL hostname hit at a word boundary. (This	634 // input term has a single URL hostname hit at a word boundary. (This

632 // implies topicality_score = 1.0.). Then the intermediate_score for	635 // implies topicality_score = 1.0.). Then the intermediate_score for

633 // this URL will depend entirely on the frequency_score with	636 // this URL will depend entirely on the frequency_score with

634 // this interpretation:	637 // this interpretation:

635 // - a single typed visit more than three months ago, no other visits -> 0.2	638 // - a single typed visit more than three months ago, no other visits -> 0.2

636 // - a visit every three days, no typed visits -> 0.706	639 // - a visit every three days, no typed visits -> 0.706

637 // - a visit every day, no typed visits -> 0.916	640 // - a visit every day, no typed visits -> 0.916

638 // - a single typed visit yesterday, no other visits -> 2.0	641 // - a single typed visit yesterday, no other visits -> 2.0

639 // - a typed visit once a week -> 11.77	642 // - a typed visit once a week -> 11.77

640 // - a typed visit every three days -> 14.12	643 // - a typed visit every three days -> 14.12

641 // - at least ten typed visits today -> 20.0 (maximum score)	644 // - at least ten typed visits today -> 20.0 (maximum score)

642 //	645 //

643 // The below code maps intermediate_score to the range [0, 1399].	646 // The below code maps intermediate_score to the range [0, 1399].

644 // For example:	647 // For example:

645 // HQP default scoring buckets: "0.0:400,1.5:600,12.0:1300,20.0:1399"	648 // The default scoring buckets: "0.0:400,1.5:600,12.0:1300,20.0:1399"

646 // We will linearly interpolate the scores between:	649 // We will linearly interpolate the scores between:

647 // 0 to 1.5 --> 400 to 600	650 // 0 to 1.5 --> 400 to 600

648 // 1.5 to 12.0 --> 600 to 1300	651 // 1.5 to 12.0 --> 600 to 1300

649 // 12.0 to 20.0 --> 1300 to 1399	652 // 12.0 to 20.0 --> 1300 to 1399

650 // >= 20.0 --> 1399	653 // >= 20.0 --> 1399

651 //	654 //

652 // The score maxes out at 1399 (i.e., cannot beat a good inlineable result	655 // The score maxes out at 1399 (i.e., cannot beat a good inlineable result

653 // from HistoryURL provider).	656 // from HistoryURL provider).

654 const float intermediate_score = topicality_score * frequency_score;	657 const float intermediate_score = topicality_score * frequency_score;

655	658

656 // Find the threshold where intermediate score is greater than bucket.	659 // Find the threshold where intermediate score is greater than bucket.

657 size_t i = 1;	660 size_t i = 1;

658 for (; i < hqp_relevance_buckets.size(); ++i) {	661 for (; i < relevance_buckets->size(); ++i) {

659 const ScoreMaxRelevance& hqp_bucket = hqp_relevance_buckets[i];	662 const ScoreMaxRelevance& bucket = (*relevance_buckets)[i];

660 if (intermediate_score >= hqp_bucket.first) {	663 if (intermediate_score >= bucket.first) {

661 continue;	664 continue;

662 }	665 }

663 const ScoreMaxRelevance& previous_bucket = hqp_relevance_buckets[i - 1];	666 const ScoreMaxRelevance& previous_bucket = (*relevance_buckets)[i - 1];

664 const float slope = ((hqp_bucket.second - previous_bucket.second) /	667 const float slope = ((bucket.second - previous_bucket.second) /

665 (hqp_bucket.first - previous_bucket.first));	668 (bucket.first - previous_bucket.first));

666 return (previous_bucket.second +	669 return (previous_bucket.second +

667 (slope * (intermediate_score - previous_bucket.first)));	670 (slope * (intermediate_score - previous_bucket.first)));

668 }	671 }

669 // It will reach this stage when the score is > highest bucket score.	672 // It will reach this stage when the score is > highest bucket score.

670 // Return the highest bucket score.	673 // Return the highest bucket score.

671 return hqp_relevance_buckets[i - 1].second;	674 return (*relevance_buckets)[i - 1].second;

672 }	675 }

673	676

674 // static	677 // static

675 void ScoredHistoryMatch::InitHQPExperimentalParams() {	678 std::vector<ScoredHistoryMatch::ScoreMaxRelevance>

676 // These are default HQP relevance scoring buckets.	679 ScoredHistoryMatch::GetHQPBuckets() {

677 // See GetFinalRelevancyScore() for details.	680 std::string relevance_buckets_str(kDefaultRelevanceBucketsString);
	Peter Kasting 2016/12/10 02:31:15 Nit: Prefer = to () for strings; see https://www.c Nit: Prefer = to () for strings; see https://www.chromium.org/developers/coding-style/cpp-dos-and-donts#TOC-Variab... . Mark P 2016/12/11 04:57:22 Done. Show quoted text On 2016/12/10 02:31:15, Peter Kasting wrote: > Nit: Prefer = to () for strings; see > https://www.chromium.org/developers/coding-style/cpp-dos-and-donts#TOC-Variab... > . Done.
678 std::string hqp_relevance_buckets_str = std::string(	681 std::string experimental_scoring_buckets =

679 hqp_relevance_buckets_str_);	682 OmniboxFieldTrial::HQPExperimentalScoringBuckets();

680	683 if (!experimental_scoring_buckets.empty())

681 // Fetch the experiment params if they are any.	684 relevance_buckets_str = experimental_scoring_buckets;

682 hqp_experimental_scoring_enabled_ =	685 return GetHQPBucketsFromString(relevance_buckets_str);

683 OmniboxFieldTrial::HQPExperimentalScoringEnabled();

684

685 if (hqp_experimental_scoring_enabled_) {

686 // Add the topicality threshold from experiment params.

687 float hqp_experimental_topicality_threhold =

688 OmniboxFieldTrial::HQPExperimentalTopicalityThreshold();

689 topicality_threshold_ = hqp_experimental_topicality_threhold;

690

691 // Add the HQP experimental scoring buckets.

692 std::string hqp_experimental_scoring_buckets =

693 OmniboxFieldTrial::HQPExperimentalScoringBuckets();

694 if (!hqp_experimental_scoring_buckets.empty())

695 hqp_relevance_buckets_str = hqp_experimental_scoring_buckets;

696 }

697

698 // Parse the hqp_relevance_buckets_str string once and store them in vector

699 // which is easy to access.

700 hqp_relevance_buckets_ =

701 new std::vector<ScoredHistoryMatch::ScoreMaxRelevance>();

702

703 bool is_valid_bucket_str = GetHQPBucketsFromString(hqp_relevance_buckets_str,

704 hqp_relevance_buckets_);

705 DCHECK(is_valid_bucket_str);

706 }	686 }

707	687

708 // static	688 // static

709 bool ScoredHistoryMatch::GetHQPBucketsFromString(	689 ScoredHistoryMatch::ScoreMaxRelevances

710 const std::string& buckets_str,	690 ScoredHistoryMatch::GetHQPBucketsFromString(const std::string& buckets_str) {

711 std::vector<ScoreMaxRelevance>* hqp_buckets) {

712 DCHECK(hqp_buckets != NULL);

713 DCHECK(!buckets_str.empty());	691 DCHECK(!buckets_str.empty());

714

715 base::StringPairs kv_pairs;	692 base::StringPairs kv_pairs;

716 if (base::SplitStringIntoKeyValuePairs(buckets_str, ':', ',', &kv_pairs)) {	693 if (!base::SplitStringIntoKeyValuePairs(buckets_str, ':', ',', &kv_pairs))

717 for (base::StringPairs::const_iterator it = kv_pairs.begin();	694 return ScoreMaxRelevances();

718 it != kv_pairs.end(); ++it) {	695 ScoreMaxRelevances hqp_buckets;

719 ScoreMaxRelevance bucket;	696 for (base::StringPairs::const_iterator it = kv_pairs.begin();

720 bool is_valid_intermediate_score =	697 it != kv_pairs.end(); ++it) {

721 base::StringToDouble(it->first, &bucket.first);	698 ScoreMaxRelevance bucket;

722 DCHECK(is_valid_intermediate_score);	699 bool is_valid_intermediate_score =

723 bool is_valid_hqp_score = base::StringToInt(it->second, &bucket.second);	700 base::StringToDouble(it->first, &bucket.first);

724 DCHECK(is_valid_hqp_score);	701 DCHECK(is_valid_intermediate_score);

725 hqp_buckets->push_back(bucket);	702 bool is_valid_hqp_score = base::StringToInt(it->second, &bucket.second);

726 }	703 DCHECK(is_valid_hqp_score);

727 return true;	704 hqp_buckets.push_back(bucket);

728 }	705 }

729 return false;	706 return hqp_buckets;

730 }	707 }

OLD	NEW