Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(26)

Side by Side Diff: components/omnibox/browser/scored_history_match.cc

Issue 2548363010: Omnibox - Refactor |relevance_buckets| to Remove Memory Leak on Exit (Closed)
Patch Set: 0.8 -> 0.8f Created 4 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/omnibox/browser/scored_history_match.h" 5 #include "components/omnibox/browser/scored_history_match.h"
6 6
7 #include <math.h> 7 #include <math.h>
8 8
9 #include <algorithm> 9 #include <algorithm>
10 #include <vector> 10 #include <vector>
11 11
12 #include "base/logging.h" 12 #include "base/logging.h"
13 #include "base/macros.h"
13 #include "base/numerics/safe_conversions.h" 14 #include "base/numerics/safe_conversions.h"
14 #include "base/strings/string_number_conversions.h" 15 #include "base/strings/string_number_conversions.h"
15 #include "base/strings/string_split.h" 16 #include "base/strings/string_split.h"
16 #include "base/strings/string_util.h" 17 #include "base/strings/string_util.h"
17 #include "base/strings/utf_offset_string_conversions.h" 18 #include "base/strings/utf_offset_string_conversions.h"
18 #include "base/strings/utf_string_conversions.h" 19 #include "base/strings/utf_string_conversions.h"
19 #include "components/bookmarks/browser/bookmark_utils.h" 20 #include "components/bookmarks/browser/bookmark_utils.h"
20 #include "components/omnibox/browser/history_url_provider.h" 21 #include "components/omnibox/browser/history_url_provider.h"
21 #include "components/omnibox/browser/omnibox_field_trial.h" 22 #include "components/omnibox/browser/omnibox_field_trial.h"
22 #include "components/omnibox/browser/url_prefix.h" 23 #include "components/omnibox/browser/url_prefix.h"
(...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after
106 // static 107 // static
107 bool ScoredHistoryMatch::also_do_hup_like_scoring_; 108 bool ScoredHistoryMatch::also_do_hup_like_scoring_;
108 float ScoredHistoryMatch::bookmark_value_; 109 float ScoredHistoryMatch::bookmark_value_;
109 float ScoredHistoryMatch::typed_value_; 110 float ScoredHistoryMatch::typed_value_;
110 bool ScoredHistoryMatch::fix_few_visits_bug_; 111 bool ScoredHistoryMatch::fix_few_visits_bug_;
111 bool ScoredHistoryMatch::frequency_uses_sum_; 112 bool ScoredHistoryMatch::frequency_uses_sum_;
112 size_t ScoredHistoryMatch::max_visits_to_score_; 113 size_t ScoredHistoryMatch::max_visits_to_score_;
113 bool ScoredHistoryMatch::allow_tld_matches_; 114 bool ScoredHistoryMatch::allow_tld_matches_;
114 bool ScoredHistoryMatch::allow_scheme_matches_; 115 bool ScoredHistoryMatch::allow_scheme_matches_;
115 size_t ScoredHistoryMatch::num_title_words_to_allow_; 116 size_t ScoredHistoryMatch::num_title_words_to_allow_;
116 bool ScoredHistoryMatch::hqp_experimental_scoring_enabled_; 117 float ScoredHistoryMatch::topicality_threshold_;
117 118 ScoredHistoryMatch::ScoreMaxRelevances*
118 // Default topicality threshold. See GetTopicalityScore() for details. 119 ScoredHistoryMatch::relevance_buckets_override_ = nullptr;
119 float ScoredHistoryMatch::topicality_threshold_ = 0.8f;
120
121 // Default HQP relevance buckets. See GetFinalRelevancyScore() for more details
122 // on these numbers.
123 char ScoredHistoryMatch::hqp_relevance_buckets_str_[] =
124 "0.0:400,1.5:600,5.0:900,10.5:1203,15.0:1300,20.0:1399";
125 std::vector<ScoredHistoryMatch::ScoreMaxRelevance>*
126 ScoredHistoryMatch::hqp_relevance_buckets_ = nullptr;
127 120
128 ScoredHistoryMatch::ScoredHistoryMatch() 121 ScoredHistoryMatch::ScoredHistoryMatch()
129 : ScoredHistoryMatch(history::URLRow(), 122 : ScoredHistoryMatch(history::URLRow(),
130 VisitInfoVector(), 123 VisitInfoVector(),
131 base::string16(), 124 base::string16(),
132 String16Vector(), 125 String16Vector(),
133 WordStarts(), 126 WordStarts(),
134 RowWordStarts(), 127 RowWordStarts(),
135 false, 128 false,
136 base::Time::Max()) { 129 base::Time::Max()) {
(...skipping 117 matching lines...) Expand 10 before | Expand all | Expand 10 after
254 likely_can_inline = true; 247 likely_can_inline = true;
255 innermost_match = (best_inlineable_prefix->num_components == 248 innermost_match = (best_inlineable_prefix->num_components ==
256 best_prefix->num_components); 249 best_prefix->num_components);
257 } 250 }
258 } 251 }
259 } 252 }
260 253
261 const float topicality_score = GetTopicalityScore( 254 const float topicality_score = GetTopicalityScore(
262 terms_vector.size(), url, terms_to_word_starts_offsets, word_starts); 255 terms_vector.size(), url, terms_to_word_starts_offsets, word_starts);
263 const float frequency_score = GetFrequency(now, is_url_bookmarked, visits); 256 const float frequency_score = GetFrequency(now, is_url_bookmarked, visits);
264 raw_score = base::saturated_cast<int>(GetFinalRelevancyScore( 257 raw_score = base::saturated_cast<int>(
265 topicality_score, frequency_score, *hqp_relevance_buckets_)); 258 GetFinalRelevancyScore(topicality_score, frequency_score));
266 259
267 if (also_do_hup_like_scoring_ && likely_can_inline) { 260 if (also_do_hup_like_scoring_ && likely_can_inline) {
268 // HistoryURL-provider-like scoring gives any match that is 261 // HistoryURL-provider-like scoring gives any match that is
269 // capable of being inlined a certain minimum score. Some of these 262 // capable of being inlined a certain minimum score. Some of these
270 // are given a higher score that lets them be shown in inline. 263 // are given a higher score that lets them be shown in inline.
271 // This test here derives from the test in 264 // This test here derives from the test in
272 // HistoryURLProvider::PromoteMatchForInlineAutocomplete(). 265 // HistoryURLProvider::PromoteMatchForInlineAutocomplete().
273 const bool promote_to_inline = 266 const bool promote_to_inline =
274 (row.typed_count() > 1) || (IsHostOnly() && (row.typed_count() == 1)); 267 (row.typed_count() > 1) || (IsHostOnly() && (row.typed_count() == 1));
275 int hup_like_score = 268 int hup_like_score =
(...skipping 132 matching lines...) Expand 10 before | Expand all | Expand 10 after
408 initialized = true; 401 initialized = true;
409 also_do_hup_like_scoring_ = OmniboxFieldTrial::HQPAlsoDoHUPLikeScoring(); 402 also_do_hup_like_scoring_ = OmniboxFieldTrial::HQPAlsoDoHUPLikeScoring();
410 bookmark_value_ = OmniboxFieldTrial::HQPBookmarkValue(); 403 bookmark_value_ = OmniboxFieldTrial::HQPBookmarkValue();
411 typed_value_ = OmniboxFieldTrial::HQPTypedValue(); 404 typed_value_ = OmniboxFieldTrial::HQPTypedValue();
412 max_visits_to_score_ = OmniboxFieldTrial::HQPMaxVisitsToScore(); 405 max_visits_to_score_ = OmniboxFieldTrial::HQPMaxVisitsToScore();
413 frequency_uses_sum_ = OmniboxFieldTrial::HQPFreqencyUsesSum(); 406 frequency_uses_sum_ = OmniboxFieldTrial::HQPFreqencyUsesSum();
414 fix_few_visits_bug_ = OmniboxFieldTrial::HQPFixFewVisitsBug(); 407 fix_few_visits_bug_ = OmniboxFieldTrial::HQPFixFewVisitsBug();
415 allow_tld_matches_ = OmniboxFieldTrial::HQPAllowMatchInTLDValue(); 408 allow_tld_matches_ = OmniboxFieldTrial::HQPAllowMatchInTLDValue();
416 allow_scheme_matches_ = OmniboxFieldTrial::HQPAllowMatchInSchemeValue(); 409 allow_scheme_matches_ = OmniboxFieldTrial::HQPAllowMatchInSchemeValue();
417 num_title_words_to_allow_ = OmniboxFieldTrial::HQPNumTitleWordsToAllow(); 410 num_title_words_to_allow_ = OmniboxFieldTrial::HQPNumTitleWordsToAllow();
411 topicality_threshold_ =
412 OmniboxFieldTrial::HQPExperimentalTopicalityThreshold();
418 413
419 InitRawTermScoreToTopicalityScoreArray(); 414 InitRawTermScoreToTopicalityScoreArray();
420 InitDaysAgoToRecencyScoreArray(); 415 InitDaysAgoToRecencyScoreArray();
421 InitHQPExperimentalParams();
422 } 416 }
423 417
424 float ScoredHistoryMatch::GetTopicalityScore( 418 float ScoredHistoryMatch::GetTopicalityScore(
425 const int num_terms, 419 const int num_terms,
426 const base::string16& url, 420 const base::string16& url,
427 const WordStarts& terms_to_word_starts_offsets, 421 const WordStarts& terms_to_word_starts_offsets,
428 const RowWordStarts& word_starts) { 422 const RowWordStarts& word_starts) {
429 // A vector that accumulates per-term scores. The strongest match--a 423 // A vector that accumulates per-term scores. The strongest match--a
430 // match in the hostname at a word boundary--is worth 10 points. 424 // match in the hostname at a word boundary--is worth 10 points.
431 // Everything else is less. In general, a match that's not at a word 425 // Everything else is less. In general, a match that's not at a word
(...skipping 178 matching lines...) Expand 10 before | Expand all | Expand 10 after
610 // Use |max_visits_to_score_| as the denominator for the average regardless of 604 // Use |max_visits_to_score_| as the denominator for the average regardless of
611 // how many visits there were in order to penalize a match that has 605 // how many visits there were in order to penalize a match that has
612 // fewer visits than kMaxVisitsToScore. 606 // fewer visits than kMaxVisitsToScore.
613 if (fix_few_visits_bug_) 607 if (fix_few_visits_bug_)
614 return summed_visit_points / ScoredHistoryMatch::max_visits_to_score_; 608 return summed_visit_points / ScoredHistoryMatch::max_visits_to_score_;
615 return visits.size() * summed_visit_points / 609 return visits.size() * summed_visit_points /
616 ScoredHistoryMatch::max_visits_to_score_; 610 ScoredHistoryMatch::max_visits_to_score_;
617 } 611 }
618 612
619 // static 613 // static
620 float ScoredHistoryMatch::GetFinalRelevancyScore( 614 float ScoredHistoryMatch::GetFinalRelevancyScore(float topicality_score,
621 float topicality_score, 615 float frequency_score) {
622 float frequency_score, 616 // |relevance_buckets| gives a mapping from intemerdiate score to the final
623 const std::vector<ScoreMaxRelevance>& hqp_relevance_buckets) { 617 // relevance score.
624 DCHECK(hqp_relevance_buckets.size() > 0); 618 CR_DEFINE_STATIC_LOCAL(ScoreMaxRelevances, default_relevance_buckets,
625 DCHECK_EQ(hqp_relevance_buckets[0].first, 0.0); 619 (GetHQPBuckets()));
620 ScoreMaxRelevances* relevance_buckets = relevance_buckets_override_
621 ? relevance_buckets_override_
622 : &default_relevance_buckets;
623 DCHECK(!relevance_buckets->empty());
624 DCHECK_EQ(0.0, (*relevance_buckets)[0].first);
626 625
627 if (topicality_score == 0) 626 if (topicality_score == 0)
628 return 0; 627 return 0;
629 // Here's how to interpret intermediate_score: Suppose the omnibox 628 // Here's how to interpret intermediate_score: Suppose the omnibox
630 // has one input term. Suppose we have a URL for which the omnibox 629 // has one input term. Suppose we have a URL for which the omnibox
631 // input term has a single URL hostname hit at a word boundary. (This 630 // input term has a single URL hostname hit at a word boundary. (This
632 // implies topicality_score = 1.0.). Then the intermediate_score for 631 // implies topicality_score = 1.0.). Then the intermediate_score for
633 // this URL will depend entirely on the frequency_score with 632 // this URL will depend entirely on the frequency_score with
634 // this interpretation: 633 // this interpretation:
635 // - a single typed visit more than three months ago, no other visits -> 0.2 634 // - a single typed visit more than three months ago, no other visits -> 0.2
636 // - a visit every three days, no typed visits -> 0.706 635 // - a visit every three days, no typed visits -> 0.706
637 // - a visit every day, no typed visits -> 0.916 636 // - a visit every day, no typed visits -> 0.916
638 // - a single typed visit yesterday, no other visits -> 2.0 637 // - a single typed visit yesterday, no other visits -> 2.0
639 // - a typed visit once a week -> 11.77 638 // - a typed visit once a week -> 11.77
640 // - a typed visit every three days -> 14.12 639 // - a typed visit every three days -> 14.12
641 // - at least ten typed visits today -> 20.0 (maximum score) 640 // - at least ten typed visits today -> 20.0 (maximum score)
642 // 641 //
643 // The below code maps intermediate_score to the range [0, 1399]. 642 // The below code maps intermediate_score to the range [0, 1399].
644 // For example: 643 // For example:
645 // HQP default scoring buckets: "0.0:400,1.5:600,12.0:1300,20.0:1399" 644 // The default scoring buckets: "0.0:400,1.5:600,12.0:1300,20.0:1399"
646 // We will linearly interpolate the scores between: 645 // We will linearly interpolate the scores between:
647 // 0 to 1.5 --> 400 to 600 646 // 0 to 1.5 --> 400 to 600
648 // 1.5 to 12.0 --> 600 to 1300 647 // 1.5 to 12.0 --> 600 to 1300
649 // 12.0 to 20.0 --> 1300 to 1399 648 // 12.0 to 20.0 --> 1300 to 1399
650 // >= 20.0 --> 1399 649 // >= 20.0 --> 1399
651 // 650 //
652 // The score maxes out at 1399 (i.e., cannot beat a good inlineable result 651 // The score maxes out at 1399 (i.e., cannot beat a good inlineable result
653 // from HistoryURL provider). 652 // from HistoryURL provider).
654 const float intermediate_score = topicality_score * frequency_score; 653 const float intermediate_score = topicality_score * frequency_score;
655 654
656 // Find the threshold where intermediate score is greater than bucket. 655 // Find the threshold where intermediate score is greater than bucket.
657 size_t i = 1; 656 size_t i = 1;
658 for (; i < hqp_relevance_buckets.size(); ++i) { 657 for (; i < relevance_buckets->size(); ++i) {
659 const ScoreMaxRelevance& hqp_bucket = hqp_relevance_buckets[i]; 658 const ScoreMaxRelevance& bucket = (*relevance_buckets)[i];
660 if (intermediate_score >= hqp_bucket.first) { 659 if (intermediate_score >= bucket.first) {
661 continue; 660 continue;
662 } 661 }
663 const ScoreMaxRelevance& previous_bucket = hqp_relevance_buckets[i - 1]; 662 const ScoreMaxRelevance& previous_bucket = (*relevance_buckets)[i - 1];
664 const float slope = ((hqp_bucket.second - previous_bucket.second) / 663 const float slope = ((bucket.second - previous_bucket.second) /
665 (hqp_bucket.first - previous_bucket.first)); 664 (bucket.first - previous_bucket.first));
666 return (previous_bucket.second + 665 return (previous_bucket.second +
667 (slope * (intermediate_score - previous_bucket.first))); 666 (slope * (intermediate_score - previous_bucket.first)));
668 } 667 }
669 // It will reach this stage when the score is > highest bucket score. 668 // It will reach this stage when the score is > highest bucket score.
670 // Return the highest bucket score. 669 // Return the highest bucket score.
671 return hqp_relevance_buckets[i - 1].second; 670 return (*relevance_buckets)[i - 1].second;
672 } 671 }
673 672
674 // static 673 // static
675 void ScoredHistoryMatch::InitHQPExperimentalParams() { 674 std::vector<ScoredHistoryMatch::ScoreMaxRelevance>
676 // These are default HQP relevance scoring buckets. 675 ScoredHistoryMatch::GetHQPBuckets() {
677 // See GetFinalRelevancyScore() for details. 676 // Start with the default buckets and override them if appropriate.
678 std::string hqp_relevance_buckets_str = std::string( 677 std::string relevance_buckets_str =
679 hqp_relevance_buckets_str_); 678 "0.0:400,1.5:600,5.0:900,10.5:1203,15.0:1300,20.0:1399";
680 679 std::string experimental_scoring_buckets =
681 // Fetch the experiment params if they are any. 680 OmniboxFieldTrial::HQPExperimentalScoringBuckets();
682 hqp_experimental_scoring_enabled_ = 681 if (!experimental_scoring_buckets.empty())
683 OmniboxFieldTrial::HQPExperimentalScoringEnabled(); 682 relevance_buckets_str = experimental_scoring_buckets;
684 683 return GetHQPBucketsFromString(relevance_buckets_str);
685 if (hqp_experimental_scoring_enabled_) {
686 // Add the topicality threshold from experiment params.
687 float hqp_experimental_topicality_threhold =
688 OmniboxFieldTrial::HQPExperimentalTopicalityThreshold();
689 topicality_threshold_ = hqp_experimental_topicality_threhold;
690
691 // Add the HQP experimental scoring buckets.
692 std::string hqp_experimental_scoring_buckets =
693 OmniboxFieldTrial::HQPExperimentalScoringBuckets();
694 if (!hqp_experimental_scoring_buckets.empty())
695 hqp_relevance_buckets_str = hqp_experimental_scoring_buckets;
696 }
697
698 // Parse the hqp_relevance_buckets_str string once and store them in vector
699 // which is easy to access.
700 hqp_relevance_buckets_ =
701 new std::vector<ScoredHistoryMatch::ScoreMaxRelevance>();
702
703 bool is_valid_bucket_str = GetHQPBucketsFromString(hqp_relevance_buckets_str,
704 hqp_relevance_buckets_);
705 DCHECK(is_valid_bucket_str);
706 } 684 }
707 685
708 // static 686 // static
709 bool ScoredHistoryMatch::GetHQPBucketsFromString( 687 ScoredHistoryMatch::ScoreMaxRelevances
710 const std::string& buckets_str, 688 ScoredHistoryMatch::GetHQPBucketsFromString(const std::string& buckets_str) {
711 std::vector<ScoreMaxRelevance>* hqp_buckets) {
712 DCHECK(hqp_buckets != NULL);
713 DCHECK(!buckets_str.empty()); 689 DCHECK(!buckets_str.empty());
714
715 base::StringPairs kv_pairs; 690 base::StringPairs kv_pairs;
716 if (base::SplitStringIntoKeyValuePairs(buckets_str, ':', ',', &kv_pairs)) { 691 if (!base::SplitStringIntoKeyValuePairs(buckets_str, ':', ',', &kv_pairs))
717 for (base::StringPairs::const_iterator it = kv_pairs.begin(); 692 return ScoreMaxRelevances();
718 it != kv_pairs.end(); ++it) { 693 ScoreMaxRelevances hqp_buckets;
719 ScoreMaxRelevance bucket; 694 for (base::StringPairs::const_iterator it = kv_pairs.begin();
720 bool is_valid_intermediate_score = 695 it != kv_pairs.end(); ++it) {
721 base::StringToDouble(it->first, &bucket.first); 696 ScoreMaxRelevance bucket;
722 DCHECK(is_valid_intermediate_score); 697 bool is_valid_intermediate_score =
723 bool is_valid_hqp_score = base::StringToInt(it->second, &bucket.second); 698 base::StringToDouble(it->first, &bucket.first);
724 DCHECK(is_valid_hqp_score); 699 DCHECK(is_valid_intermediate_score);
725 hqp_buckets->push_back(bucket); 700 bool is_valid_hqp_score = base::StringToInt(it->second, &bucket.second);
726 } 701 DCHECK(is_valid_hqp_score);
727 return true; 702 hqp_buckets.push_back(bucket);
728 } 703 }
729 return false; 704 return hqp_buckets;
730 } 705 }
OLDNEW
« no previous file with comments | « components/omnibox/browser/scored_history_match.h ('k') | components/omnibox/browser/scored_history_match_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698