| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef CHROME_BROWSER_HISTORY_SCORED_HISTORY_MATCH_H_ | 5 #ifndef CHROME_BROWSER_HISTORY_SCORED_HISTORY_MATCH_H_ |
| 6 #define CHROME_BROWSER_HISTORY_SCORED_HISTORY_MATCH_H_ | 6 #define CHROME_BROWSER_HISTORY_SCORED_HISTORY_MATCH_H_ |
| 7 | 7 |
| 8 #include <map> | 8 #include <map> |
| 9 #include <set> | 9 #include <set> |
| 10 #include <vector> | 10 #include <vector> |
| (...skipping 20 matching lines...) Expand all Loading... |
| 31 // If the row does not qualify the raw score will be 0. |bookmark_service| is | 31 // If the row does not qualify the raw score will be 0. |bookmark_service| is |
| 32 // used to determine if the match's URL is referenced by any bookmarks. | 32 // used to determine if the match's URL is referenced by any bookmarks. |
| 33 ScoredHistoryMatch(const URLRow& row, | 33 ScoredHistoryMatch(const URLRow& row, |
| 34 const string16& lower_string, | 34 const string16& lower_string, |
| 35 const String16Vector& terms_vector, | 35 const String16Vector& terms_vector, |
| 36 const RowWordStarts& word_starts, | 36 const RowWordStarts& word_starts, |
| 37 const base::Time now, | 37 const base::Time now, |
| 38 BookmarkService* bookmark_service); | 38 BookmarkService* bookmark_service); |
| 39 ~ScoredHistoryMatch(); | 39 ~ScoredHistoryMatch(); |
| 40 | 40 |
| 41 // Calculates a component score based on position, ordering and total | 41 // Calculates a component score based on position, ordering, word |
| 42 // substring match size using metrics recorded in |matches|. |max_length| | 42 // boundaries, and total substring match size using metrics recorded |
| 43 // is the length of the string against which the terms are being searched. | 43 // in |matches| and |word_starts|. |max_length| is the length of |
| 44 static int ScoreComponentForMatches(const TermMatches& matches, | 44 // the string against which the terms are being searched. |
| 45 // |provided_matches| should already be sorted and de-duped, and |
| 46 // |word_starts| must be sorted. |
| 47 static int ScoreComponentForMatches(const TermMatches& provided_matches, |
| 48 const WordStarts& word_starts, |
| 45 size_t max_length); | 49 size_t max_length); |
| 46 | 50 |
| 51 // Given a set of term matches |provided_matches| and word boundaries |
| 52 // |word_starts|, fills in |matches_at_word_boundaries| with only the |
| 53 // matches in |provided_matches| that are at word boundaries. |
| 54 // |provided_matches| should already be sorted and de-duped, and |
| 55 // |word_starts| must be sorted. |
| 56 static void MakeTermMatchesOnlyAtWordBoundaries( |
| 57 const TermMatches& provided_matches, |
| 58 const WordStarts& word_starts, |
| 59 TermMatches* matches_at_word_boundaries); |
| 60 |
| 47 // Converts a raw value for some particular scoring factor into a score | 61 // Converts a raw value for some particular scoring factor into a score |
| 48 // component for that factor. The conversion function is piecewise linear, | 62 // component for that factor. The conversion function is piecewise linear, |
| 49 // with input values provided in |value_ranks| and resulting output scores | 63 // with input values provided in |value_ranks| and resulting output scores |
| 50 // from |kScoreRank| (mathematically, f(value_rank[i]) = kScoreRank[i]). A | 64 // from |kScoreRank| (mathematically, f(value_rank[i]) = kScoreRank[i]). A |
| 51 // score cannot be higher than kScoreRank[0], and drops directly to 0 if | 65 // score cannot be higher than kScoreRank[0], and drops directly to 0 if |
| 52 // lower than kScoreRank[3]. | 66 // lower than kScoreRank[3]. |
| 53 // | 67 // |
| 54 // For example, take |value| == 70 and |value_ranks| == { 100, 50, 30, 10 }. | 68 // For example, take |value| == 70 and |value_ranks| == { 100, 50, 30, 10 }. |
| 55 // Because 70 falls between ranks 0 (100) and 1 (50), the score is given by | 69 // Because 70 falls between ranks 0 (100) and 1 (50), the score is given by |
| 56 // the linear function: | 70 // the linear function: |
| (...skipping 10 matching lines...) Expand all Loading... |
| 67 static bool MatchScoreGreater(const ScoredHistoryMatch& m1, | 81 static bool MatchScoreGreater(const ScoredHistoryMatch& m1, |
| 68 const ScoredHistoryMatch& m2); | 82 const ScoredHistoryMatch& m2); |
| 69 | 83 |
| 70 // Start of functions used only in "new" scoring ------------------------ | 84 // Start of functions used only in "new" scoring ------------------------ |
| 71 | 85 |
| 72 // Return a topicality score based on how many matches appear in the | 86 // Return a topicality score based on how many matches appear in the |
| 73 // |url| and the page's title and where they are (e.g., at word | 87 // |url| and the page's title and where they are (e.g., at word |
| 74 // boundaries). |url_matches| and |title_matches| provide details | 88 // boundaries). |url_matches| and |title_matches| provide details |
| 75 // about where the matches in the URL and title are and what terms | 89 // about where the matches in the URL and title are and what terms |
| 76 // (identified by a term number < |num_terms|) match where. | 90 // (identified by a term number < |num_terms|) match where. |
| 77 // |word_starts| explains where word boundaries are. | 91 // |word_starts| explains where word boundaries are. Its parts (title |
| 92 // and url) must be sorted. Also, |url_matches| and |
| 93 // |titles_matches| should already be sorted and de-duped. |
| 78 static float GetTopicalityScore(const int num_terms, | 94 static float GetTopicalityScore(const int num_terms, |
| 79 const string16& url, | 95 const string16& url, |
| 80 const TermMatches& url_matches, | 96 const TermMatches& url_matches, |
| 81 const TermMatches& title_matches, | 97 const TermMatches& title_matches, |
| 82 const RowWordStarts& word_starts); | 98 const RowWordStarts& word_starts); |
| 83 | 99 |
| 84 // Precalculates raw_term_score_to_topicality_score, used in | 100 // Precalculates raw_term_score_to_topicality_score, used in |
| 85 // GetTopicalityScore(). | 101 // GetTopicalityScore(). |
| 86 static void FillInTermScoreToTopicalityScoreArray(); | 102 static void FillInTermScoreToTopicalityScoreArray(); |
| 87 | 103 |
| 88 // Returns a recency score based on |last_visit_days_ago|, which is | 104 // Returns a recency score based on |last_visit_days_ago|, which is |
| 89 // how many days ago the page was last visited. | 105 // how many days ago the page was last visited. |
| 90 static float GetRecencyScore(int last_visit_days_ago); | 106 static float GetRecencyScore(int last_visit_days_ago); |
| 91 | 107 |
| 92 // Pre-calculates days_ago_to_recency_numerator_, used in | 108 // Pre-calculates days_ago_to_recency_numerator_, used in |
| 93 // GetRecencyScore(). | 109 // GetRecencyScore(). |
| 94 static void FillInDaysAgoToRecencyScoreArray(); | 110 static void FillInDaysAgoToRecencyScoreArray(); |
| 95 | 111 |
| 96 // Returns a popularity score based on |typed_count| and | 112 // Returns a popularity score based on |typed_count| and |
| 97 // |visit_count|. | 113 // |visit_count|. |
| 98 static float GetPopularityScore(int typed_count, | 114 static float GetPopularityScore(int typed_count, |
| 99 int visit_count); | 115 int visit_count); |
| 100 | 116 |
| 101 // Sets use_new_scoring based on command line flags and/or | 117 // Sets use_new_scoring based on command line flags and/or |
| 102 // field trial state. | 118 // field trial state. |
| 103 static void InitializeNewScoringField(); | 119 static void InitializeNewScoringField(); |
| 104 | 120 |
| 121 // Sets only_count_matches_at_word_boundaries based on the field trial state. |
| 122 static void InitializeOnlyCountMatchesAtWordBoundariesField(); |
| 123 |
| 105 // Sets also_do_hup_like_scoring based on the field trial state. | 124 // Sets also_do_hup_like_scoring based on the field trial state. |
| 106 static void InitializeAlsoDoHUPLikeScoringField(); | 125 static void InitializeAlsoDoHUPLikeScoringField(); |
| 107 | 126 |
| 108 // End of functions used only in "new" scoring -------------------------- | 127 // End of functions used only in "new" scoring -------------------------- |
| 109 | 128 |
| 110 // An interim score taking into consideration location and completeness | 129 // An interim score taking into consideration location and completeness |
| 111 // of the match. | 130 // of the match. |
| 112 int raw_score; | 131 int raw_score; |
| 113 TermMatches url_matches; // Term matches within the URL. | 132 TermMatches url_matches; // Term matches within the URL. |
| 114 TermMatches title_matches; // Term matches within the page title. | 133 TermMatches title_matches; // Term matches within the page title. |
| (...skipping 12 matching lines...) Expand all Loading... |
| 127 // scores. |raw_term_score_to_topicality_score| is a simple array | 146 // scores. |raw_term_score_to_topicality_score| is a simple array |
| 128 // mapping how raw terms scores (a weighted sum of the number of | 147 // mapping how raw terms scores (a weighted sum of the number of |
| 129 // hits for the term, weighted by how important the hit is: | 148 // hits for the term, weighted by how important the hit is: |
| 130 // hostname, path, etc.) to the topicality score we should assign | 149 // hostname, path, etc.) to the topicality score we should assign |
| 131 // it. This allows easy lookups of scores without requiring math. | 150 // it. This allows easy lookups of scores without requiring math. |
| 132 // This is initialized upon first use of GetTopicalityScore(), | 151 // This is initialized upon first use of GetTopicalityScore(), |
| 133 // which calls FillInTermScoreToTopicalityScoreArray(). | 152 // which calls FillInTermScoreToTopicalityScoreArray(). |
| 134 static const int kMaxRawTermScore = 30; | 153 static const int kMaxRawTermScore = 30; |
| 135 static float* raw_term_score_to_topicality_score; | 154 static float* raw_term_score_to_topicality_score; |
| 136 | 155 |
| 137 // Allows us to determing setting for use_new_scoring_ only once. | 156 // Used so we initialize static variables only once (on first use). |
| 138 static bool initialized_; | 157 static bool initialized_; |
| 139 | 158 |
| 140 // Whether to use new-scoring or old-scoring. Set in the | 159 // Whether to use new-scoring or old-scoring. Set in the |
| 141 // constructor by examining command line flags and field trial | 160 // constructor by examining command line flags and field trial |
| 142 // state. Note that new-scoring has to do with a new version of the | 161 // state. Note that new-scoring has to do with a new version of the |
| 143 // ordinary scoring done here. It has nothing to do with and no | 162 // ordinary scoring done here. It has nothing to do with and no |
| 144 // affect on HistoryURLProvider-like scoring that can happen in this | 163 // affect on HistoryURLProvider-like scoring that can happen in this |
| 145 // class as well (see boolean below). | 164 // class as well (see boolean below). |
| 146 static bool use_new_scoring; | 165 static bool use_new_scoring; |
| 147 | 166 |
| 167 // If true, we ignore all matches that are in the middle of a word. |
| 168 static bool only_count_matches_at_word_boundaries; |
| 169 |
| 148 // If true, assign raw scores to be max(whatever it normally would be, | 170 // If true, assign raw scores to be max(whatever it normally would be, |
| 149 // a score that's similar to the score HistoryURL provider would assign). | 171 // a score that's similar to the score HistoryURL provider would assign). |
| 150 // This variable is set in the constructor by examining the field trial | 172 // This variable is set in the constructor by examining the field trial |
| 151 // state. | 173 // state. |
| 152 static bool also_do_hup_like_scoring; | 174 static bool also_do_hup_like_scoring; |
| 153 }; | 175 }; |
| 154 typedef std::vector<ScoredHistoryMatch> ScoredHistoryMatches; | 176 typedef std::vector<ScoredHistoryMatch> ScoredHistoryMatches; |
| 155 | 177 |
| 156 } // namespace history | 178 } // namespace history |
| 157 | 179 |
| 158 #endif // CHROME_BROWSER_HISTORY_SCORED_HISTORY_MATCH_H_ | 180 #endif // CHROME_BROWSER_HISTORY_SCORED_HISTORY_MATCH_H_ |
| OLD | NEW |