components/omnibox/browser/scored_history_match.cc - Issue 2548363010: Omnibox - Refactor |relevance_buckets| to Remove Memory Leak on Exit

Side by Side Diff: components/omnibox/browser/scored_history_match.cc

Issue 2548363010: Omnibox - Refactor |relevance_buckets| to Remove Memory Leak on Exit (Closed)

Patch Set: 0.8 -> 0.8f Created 4 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "components/omnibox/browser/scored_history_match.h"	5 #include "components/omnibox/browser/scored_history_match.h"

6	6

7 #include <math.h>	7 #include <math.h>

8	8

9 #include <algorithm>	9 #include <algorithm>

10 #include <vector>	10 #include <vector>

11	11

12 #include "base/logging.h"	12 #include "base/logging.h"

	13 #include "base/macros.h"

13 #include "base/numerics/safe_conversions.h"	14 #include "base/numerics/safe_conversions.h"

14 #include "base/strings/string_number_conversions.h"	15 #include "base/strings/string_number_conversions.h"

15 #include "base/strings/string_split.h"	16 #include "base/strings/string_split.h"

16 #include "base/strings/string_util.h"	17 #include "base/strings/string_util.h"

17 #include "base/strings/utf_offset_string_conversions.h"	18 #include "base/strings/utf_offset_string_conversions.h"

18 #include "base/strings/utf_string_conversions.h"	19 #include "base/strings/utf_string_conversions.h"

19 #include "components/bookmarks/browser/bookmark_utils.h"	20 #include "components/bookmarks/browser/bookmark_utils.h"

20 #include "components/omnibox/browser/history_url_provider.h"	21 #include "components/omnibox/browser/history_url_provider.h"

21 #include "components/omnibox/browser/omnibox_field_trial.h"	22 #include "components/omnibox/browser/omnibox_field_trial.h"

22 #include "components/omnibox/browser/url_prefix.h"	23 #include "components/omnibox/browser/url_prefix.h"

(...skipping 83 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
106 // static	107 // static

107 bool ScoredHistoryMatch::also_do_hup_like_scoring_;	108 bool ScoredHistoryMatch::also_do_hup_like_scoring_;

108 float ScoredHistoryMatch::bookmark_value_;	109 float ScoredHistoryMatch::bookmark_value_;

109 float ScoredHistoryMatch::typed_value_;	110 float ScoredHistoryMatch::typed_value_;

110 bool ScoredHistoryMatch::fix_few_visits_bug_;	111 bool ScoredHistoryMatch::fix_few_visits_bug_;

111 bool ScoredHistoryMatch::frequency_uses_sum_;	112 bool ScoredHistoryMatch::frequency_uses_sum_;

112 size_t ScoredHistoryMatch::max_visits_to_score_;	113 size_t ScoredHistoryMatch::max_visits_to_score_;

113 bool ScoredHistoryMatch::allow_tld_matches_;	114 bool ScoredHistoryMatch::allow_tld_matches_;

114 bool ScoredHistoryMatch::allow_scheme_matches_;	115 bool ScoredHistoryMatch::allow_scheme_matches_;

115 size_t ScoredHistoryMatch::num_title_words_to_allow_;	116 size_t ScoredHistoryMatch::num_title_words_to_allow_;

116 bool ScoredHistoryMatch::hqp_experimental_scoring_enabled_;	117 float ScoredHistoryMatch::topicality_threshold_;

117	118 ScoredHistoryMatch::ScoreMaxRelevances*

118 // Default topicality threshold. See GetTopicalityScore() for details.	119 ScoredHistoryMatch::relevance_buckets_override_ = nullptr;

119 float ScoredHistoryMatch::topicality_threshold_ = 0.8f;

120

121 // Default HQP relevance buckets. See GetFinalRelevancyScore() for more details

122 // on these numbers.

123 char ScoredHistoryMatch::hqp_relevance_buckets_str_[] =

124 "0.0:400,1.5:600,5.0:900,10.5:1203,15.0:1300,20.0:1399";

125 std::vector<ScoredHistoryMatch::ScoreMaxRelevance>*

126 ScoredHistoryMatch::hqp_relevance_buckets_ = nullptr;

127	120

128 ScoredHistoryMatch::ScoredHistoryMatch()	121 ScoredHistoryMatch::ScoredHistoryMatch()

129 : ScoredHistoryMatch(history::URLRow(),	122 : ScoredHistoryMatch(history::URLRow(),

130 VisitInfoVector(),	123 VisitInfoVector(),

131 base::string16(),	124 base::string16(),

132 String16Vector(),	125 String16Vector(),

133 WordStarts(),	126 WordStarts(),

134 RowWordStarts(),	127 RowWordStarts(),

135 false,	128 false,

136 base::Time::Max()) {	129 base::Time::Max()) {

(...skipping 117 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
254 likely_can_inline = true;	247 likely_can_inline = true;

255 innermost_match = (best_inlineable_prefix->num_components ==	248 innermost_match = (best_inlineable_prefix->num_components ==

256 best_prefix->num_components);	249 best_prefix->num_components);

257 }	250 }

258 }	251 }

259 }	252 }

260	253

261 const float topicality_score = GetTopicalityScore(	254 const float topicality_score = GetTopicalityScore(

262 terms_vector.size(), url, terms_to_word_starts_offsets, word_starts);	255 terms_vector.size(), url, terms_to_word_starts_offsets, word_starts);

263 const float frequency_score = GetFrequency(now, is_url_bookmarked, visits);	256 const float frequency_score = GetFrequency(now, is_url_bookmarked, visits);

264 raw_score = base::saturated_cast<int>(GetFinalRelevancyScore(	257 raw_score = base::saturated_cast<int>(

265 topicality_score, frequency_score, *hqp_relevance_buckets_));	258 GetFinalRelevancyScore(topicality_score, frequency_score));

266	259

267 if (also_do_hup_like_scoring_ && likely_can_inline) {	260 if (also_do_hup_like_scoring_ && likely_can_inline) {

268 // HistoryURL-provider-like scoring gives any match that is	261 // HistoryURL-provider-like scoring gives any match that is

269 // capable of being inlined a certain minimum score. Some of these	262 // capable of being inlined a certain minimum score. Some of these

270 // are given a higher score that lets them be shown in inline.	263 // are given a higher score that lets them be shown in inline.

271 // This test here derives from the test in	264 // This test here derives from the test in

272 // HistoryURLProvider::PromoteMatchForInlineAutocomplete().	265 // HistoryURLProvider::PromoteMatchForInlineAutocomplete().

273 const bool promote_to_inline =	266 const bool promote_to_inline =

274 (row.typed_count() > 1) \|\| (IsHostOnly() && (row.typed_count() == 1));	267 (row.typed_count() > 1) \|\| (IsHostOnly() && (row.typed_count() == 1));

275 int hup_like_score =	268 int hup_like_score =

(...skipping 132 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
408 initialized = true;	401 initialized = true;

409 also_do_hup_like_scoring_ = OmniboxFieldTrial::HQPAlsoDoHUPLikeScoring();	402 also_do_hup_like_scoring_ = OmniboxFieldTrial::HQPAlsoDoHUPLikeScoring();

410 bookmark_value_ = OmniboxFieldTrial::HQPBookmarkValue();	403 bookmark_value_ = OmniboxFieldTrial::HQPBookmarkValue();

411 typed_value_ = OmniboxFieldTrial::HQPTypedValue();	404 typed_value_ = OmniboxFieldTrial::HQPTypedValue();

412 max_visits_to_score_ = OmniboxFieldTrial::HQPMaxVisitsToScore();	405 max_visits_to_score_ = OmniboxFieldTrial::HQPMaxVisitsToScore();

413 frequency_uses_sum_ = OmniboxFieldTrial::HQPFreqencyUsesSum();	406 frequency_uses_sum_ = OmniboxFieldTrial::HQPFreqencyUsesSum();

414 fix_few_visits_bug_ = OmniboxFieldTrial::HQPFixFewVisitsBug();	407 fix_few_visits_bug_ = OmniboxFieldTrial::HQPFixFewVisitsBug();

415 allow_tld_matches_ = OmniboxFieldTrial::HQPAllowMatchInTLDValue();	408 allow_tld_matches_ = OmniboxFieldTrial::HQPAllowMatchInTLDValue();

416 allow_scheme_matches_ = OmniboxFieldTrial::HQPAllowMatchInSchemeValue();	409 allow_scheme_matches_ = OmniboxFieldTrial::HQPAllowMatchInSchemeValue();

417 num_title_words_to_allow_ = OmniboxFieldTrial::HQPNumTitleWordsToAllow();	410 num_title_words_to_allow_ = OmniboxFieldTrial::HQPNumTitleWordsToAllow();

	411 topicality_threshold_ =

	412 OmniboxFieldTrial::HQPExperimentalTopicalityThreshold();

418	413

419 InitRawTermScoreToTopicalityScoreArray();	414 InitRawTermScoreToTopicalityScoreArray();

420 InitDaysAgoToRecencyScoreArray();	415 InitDaysAgoToRecencyScoreArray();

421 InitHQPExperimentalParams();

422 }	416 }

423	417

424 float ScoredHistoryMatch::GetTopicalityScore(	418 float ScoredHistoryMatch::GetTopicalityScore(

425 const int num_terms,	419 const int num_terms,

426 const base::string16& url,	420 const base::string16& url,

427 const WordStarts& terms_to_word_starts_offsets,	421 const WordStarts& terms_to_word_starts_offsets,

428 const RowWordStarts& word_starts) {	422 const RowWordStarts& word_starts) {

429 // A vector that accumulates per-term scores. The strongest match--a	423 // A vector that accumulates per-term scores. The strongest match--a

430 // match in the hostname at a word boundary--is worth 10 points.	424 // match in the hostname at a word boundary--is worth 10 points.

431 // Everything else is less. In general, a match that's not at a word	425 // Everything else is less. In general, a match that's not at a word

(...skipping 178 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
610 // Use \|max_visits_to_score_\| as the denominator for the average regardless of	604 // Use \|max_visits_to_score_\| as the denominator for the average regardless of

611 // how many visits there were in order to penalize a match that has	605 // how many visits there were in order to penalize a match that has

612 // fewer visits than kMaxVisitsToScore.	606 // fewer visits than kMaxVisitsToScore.

613 if (fix_few_visits_bug_)	607 if (fix_few_visits_bug_)

614 return summed_visit_points / ScoredHistoryMatch::max_visits_to_score_;	608 return summed_visit_points / ScoredHistoryMatch::max_visits_to_score_;

615 return visits.size() * summed_visit_points /	609 return visits.size() * summed_visit_points /

616 ScoredHistoryMatch::max_visits_to_score_;	610 ScoredHistoryMatch::max_visits_to_score_;

617 }	611 }

618	612

619 // static	613 // static

620 float ScoredHistoryMatch::GetFinalRelevancyScore(	614 float ScoredHistoryMatch::GetFinalRelevancyScore(float topicality_score,

621 float topicality_score,	615 float frequency_score) {

622 float frequency_score,	616 // \|relevance_buckets\| gives a mapping from intemerdiate score to the final

623 const std::vector<ScoreMaxRelevance>& hqp_relevance_buckets) {	617 // relevance score.

624 DCHECK(hqp_relevance_buckets.size() > 0);	618 CR_DEFINE_STATIC_LOCAL(ScoreMaxRelevances, default_relevance_buckets,

625 DCHECK_EQ(hqp_relevance_buckets[0].first, 0.0);	619 (GetHQPBuckets()));

	620 ScoreMaxRelevances* relevance_buckets = relevance_buckets_override_

	621 ? relevance_buckets_override_

	622 : &default_relevance_buckets;

	623 DCHECK(!relevance_buckets->empty());

	624 DCHECK_EQ(0.0, (*relevance_buckets)[0].first);

626	625

627 if (topicality_score == 0)	626 if (topicality_score == 0)

628 return 0;	627 return 0;

629 // Here's how to interpret intermediate_score: Suppose the omnibox	628 // Here's how to interpret intermediate_score: Suppose the omnibox

630 // has one input term. Suppose we have a URL for which the omnibox	629 // has one input term. Suppose we have a URL for which the omnibox

631 // input term has a single URL hostname hit at a word boundary. (This	630 // input term has a single URL hostname hit at a word boundary. (This

632 // implies topicality_score = 1.0.). Then the intermediate_score for	631 // implies topicality_score = 1.0.). Then the intermediate_score for

633 // this URL will depend entirely on the frequency_score with	632 // this URL will depend entirely on the frequency_score with

634 // this interpretation:	633 // this interpretation:

635 // - a single typed visit more than three months ago, no other visits -> 0.2	634 // - a single typed visit more than three months ago, no other visits -> 0.2

636 // - a visit every three days, no typed visits -> 0.706	635 // - a visit every three days, no typed visits -> 0.706

637 // - a visit every day, no typed visits -> 0.916	636 // - a visit every day, no typed visits -> 0.916

638 // - a single typed visit yesterday, no other visits -> 2.0	637 // - a single typed visit yesterday, no other visits -> 2.0

639 // - a typed visit once a week -> 11.77	638 // - a typed visit once a week -> 11.77

640 // - a typed visit every three days -> 14.12	639 // - a typed visit every three days -> 14.12

641 // - at least ten typed visits today -> 20.0 (maximum score)	640 // - at least ten typed visits today -> 20.0 (maximum score)

642 //	641 //

643 // The below code maps intermediate_score to the range [0, 1399].	642 // The below code maps intermediate_score to the range [0, 1399].

644 // For example:	643 // For example:

645 // HQP default scoring buckets: "0.0:400,1.5:600,12.0:1300,20.0:1399"	644 // The default scoring buckets: "0.0:400,1.5:600,12.0:1300,20.0:1399"

646 // We will linearly interpolate the scores between:	645 // We will linearly interpolate the scores between:

647 // 0 to 1.5 --> 400 to 600	646 // 0 to 1.5 --> 400 to 600

648 // 1.5 to 12.0 --> 600 to 1300	647 // 1.5 to 12.0 --> 600 to 1300

649 // 12.0 to 20.0 --> 1300 to 1399	648 // 12.0 to 20.0 --> 1300 to 1399

650 // >= 20.0 --> 1399	649 // >= 20.0 --> 1399

651 //	650 //

652 // The score maxes out at 1399 (i.e., cannot beat a good inlineable result	651 // The score maxes out at 1399 (i.e., cannot beat a good inlineable result

653 // from HistoryURL provider).	652 // from HistoryURL provider).

654 const float intermediate_score = topicality_score * frequency_score;	653 const float intermediate_score = topicality_score * frequency_score;

655	654

656 // Find the threshold where intermediate score is greater than bucket.	655 // Find the threshold where intermediate score is greater than bucket.

657 size_t i = 1;	656 size_t i = 1;

658 for (; i < hqp_relevance_buckets.size(); ++i) {	657 for (; i < relevance_buckets->size(); ++i) {

659 const ScoreMaxRelevance& hqp_bucket = hqp_relevance_buckets[i];	658 const ScoreMaxRelevance& bucket = (*relevance_buckets)[i];

660 if (intermediate_score >= hqp_bucket.first) {	659 if (intermediate_score >= bucket.first) {

661 continue;	660 continue;

662 }	661 }

663 const ScoreMaxRelevance& previous_bucket = hqp_relevance_buckets[i - 1];	662 const ScoreMaxRelevance& previous_bucket = (*relevance_buckets)[i - 1];

664 const float slope = ((hqp_bucket.second - previous_bucket.second) /	663 const float slope = ((bucket.second - previous_bucket.second) /

665 (hqp_bucket.first - previous_bucket.first));	664 (bucket.first - previous_bucket.first));

666 return (previous_bucket.second +	665 return (previous_bucket.second +

667 (slope * (intermediate_score - previous_bucket.first)));	666 (slope * (intermediate_score - previous_bucket.first)));

668 }	667 }

669 // It will reach this stage when the score is > highest bucket score.	668 // It will reach this stage when the score is > highest bucket score.

670 // Return the highest bucket score.	669 // Return the highest bucket score.

671 return hqp_relevance_buckets[i - 1].second;	670 return (*relevance_buckets)[i - 1].second;

672 }	671 }

673	672

674 // static	673 // static

675 void ScoredHistoryMatch::InitHQPExperimentalParams() {	674 std::vector<ScoredHistoryMatch::ScoreMaxRelevance>

676 // These are default HQP relevance scoring buckets.	675 ScoredHistoryMatch::GetHQPBuckets() {

677 // See GetFinalRelevancyScore() for details.	676 // Start with the default buckets and override them if appropriate.

678 std::string hqp_relevance_buckets_str = std::string(	677 std::string relevance_buckets_str =

679 hqp_relevance_buckets_str_);	678 "0.0:400,1.5:600,5.0:900,10.5:1203,15.0:1300,20.0:1399";

680	679 std::string experimental_scoring_buckets =

681 // Fetch the experiment params if they are any.	680 OmniboxFieldTrial::HQPExperimentalScoringBuckets();

682 hqp_experimental_scoring_enabled_ =	681 if (!experimental_scoring_buckets.empty())

683 OmniboxFieldTrial::HQPExperimentalScoringEnabled();	682 relevance_buckets_str = experimental_scoring_buckets;

684	683 return GetHQPBucketsFromString(relevance_buckets_str);

685 if (hqp_experimental_scoring_enabled_) {

686 // Add the topicality threshold from experiment params.

687 float hqp_experimental_topicality_threhold =

688 OmniboxFieldTrial::HQPExperimentalTopicalityThreshold();

689 topicality_threshold_ = hqp_experimental_topicality_threhold;

690

691 // Add the HQP experimental scoring buckets.

692 std::string hqp_experimental_scoring_buckets =

693 OmniboxFieldTrial::HQPExperimentalScoringBuckets();

694 if (!hqp_experimental_scoring_buckets.empty())

695 hqp_relevance_buckets_str = hqp_experimental_scoring_buckets;

696 }

697

698 // Parse the hqp_relevance_buckets_str string once and store them in vector

699 // which is easy to access.

700 hqp_relevance_buckets_ =

701 new std::vector<ScoredHistoryMatch::ScoreMaxRelevance>();

702

703 bool is_valid_bucket_str = GetHQPBucketsFromString(hqp_relevance_buckets_str,

704 hqp_relevance_buckets_);

705 DCHECK(is_valid_bucket_str);

706 }	684 }

707	685

708 // static	686 // static

709 bool ScoredHistoryMatch::GetHQPBucketsFromString(	687 ScoredHistoryMatch::ScoreMaxRelevances

710 const std::string& buckets_str,	688 ScoredHistoryMatch::GetHQPBucketsFromString(const std::string& buckets_str) {

711 std::vector<ScoreMaxRelevance>* hqp_buckets) {

712 DCHECK(hqp_buckets != NULL);

713 DCHECK(!buckets_str.empty());	689 DCHECK(!buckets_str.empty());

714

715 base::StringPairs kv_pairs;	690 base::StringPairs kv_pairs;

716 if (base::SplitStringIntoKeyValuePairs(buckets_str, ':', ',', &kv_pairs)) {	691 if (!base::SplitStringIntoKeyValuePairs(buckets_str, ':', ',', &kv_pairs))

717 for (base::StringPairs::const_iterator it = kv_pairs.begin();	692 return ScoreMaxRelevances();

718 it != kv_pairs.end(); ++it) {	693 ScoreMaxRelevances hqp_buckets;

719 ScoreMaxRelevance bucket;	694 for (base::StringPairs::const_iterator it = kv_pairs.begin();

720 bool is_valid_intermediate_score =	695 it != kv_pairs.end(); ++it) {

721 base::StringToDouble(it->first, &bucket.first);	696 ScoreMaxRelevance bucket;

722 DCHECK(is_valid_intermediate_score);	697 bool is_valid_intermediate_score =

723 bool is_valid_hqp_score = base::StringToInt(it->second, &bucket.second);	698 base::StringToDouble(it->first, &bucket.first);

724 DCHECK(is_valid_hqp_score);	699 DCHECK(is_valid_intermediate_score);

725 hqp_buckets->push_back(bucket);	700 bool is_valid_hqp_score = base::StringToInt(it->second, &bucket.second);

726 }	701 DCHECK(is_valid_hqp_score);

727 return true;	702 hqp_buckets.push_back(bucket);

728 }	703 }

729 return false;	704 return hqp_buckets;

730 }	705 }

OLD	NEW

« no previous file with comments | « components/omnibox/browser/scored_history_match.h ('k') | components/omnibox/browser/scored_history_match_unittest.cc » ('j') | no next file with comments »