| OLD | NEW |
| (Empty) |
| 1 // Copyright 2013 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "chrome/browser/ui/app_list/search/tokenized_string_match.h" | |
| 6 | |
| 7 #include "base/i18n/string_search.h" | |
| 8 #include "base/logging.h" | |
| 9 #include "chrome/browser/ui/app_list/search/tokenized_string_char_iterator.h" | |
| 10 | |
| 11 namespace app_list { | |
| 12 | |
| 13 namespace { | |
| 14 | |
| 15 // The factors below are applied when the current char of query matches | |
| 16 // the current char of the text to be matched. Different factors are chosen | |
| 17 // based on where the match happens. kIsPrefixMultiplier is used when the | |
| 18 // matched portion is a prefix of both the query and the text, which implies | |
| 19 // that the matched chars are at the same position in query and text. This is | |
| 20 // the most preferred case thus it has the highest score. When the current char | |
| 21 // of the query and the text does not match, the algorithm moves to the next | |
| 22 // token in the text and try to match from there. kIsFrontOfWordMultipler will | |
| 23 // be used if the first char of the token matches the current char of the query. | |
| 24 // Otherwise, the match is considered as weak and kIsWeakHitMultiplier is | |
| 25 // used. | |
| 26 // Examples: | |
| 27 // Suppose the text to be matched is 'Google Chrome'. | |
| 28 // Query 'go' would yield kIsPrefixMultiplier for each char. | |
| 29 // Query 'gc' would use kIsPrefixMultiplier for 'g' and | |
| 30 // kIsFrontOfWordMultipler for 'c'. | |
| 31 // Query 'ch' would use kIsFrontOfWordMultipler for 'c' and | |
| 32 // kIsWeakHitMultiplier for 'h'. | |
| 33 // Query 'oo' does not match any prefix and would use the substring match | |
| 34 // fallback, thus kIsSubstringMultiplier is used for each char. | |
| 35 const double kIsPrefixMultiplier = 1.0; | |
| 36 const double kIsFrontOfWordMultipler = 0.8; | |
| 37 const double kIsWeakHitMultiplier = 0.6; | |
| 38 const double kIsSubstringMultiplier = 0.4; | |
| 39 | |
| 40 // A relevance score that represents no match. | |
| 41 const double kNoMatchScore = 0.0; | |
| 42 | |
| 43 // PrefixMatcher matches the chars of a given query as prefix of tokens in | |
| 44 // a given text or as prefix of the acronyms of those text tokens. | |
| 45 class PrefixMatcher { | |
| 46 public: | |
| 47 PrefixMatcher(const TokenizedString& query, | |
| 48 const TokenizedString& text) | |
| 49 : query_iter_(query), | |
| 50 text_iter_(text), | |
| 51 current_match_(gfx::Range::InvalidRange()), | |
| 52 current_relevance_(kNoMatchScore) { | |
| 53 } | |
| 54 | |
| 55 // Invokes RunMatch to perform prefix match. Use |states_| as a stack to | |
| 56 // perform DFS (depth first search) so that all possible matches are | |
| 57 // attempted. Stops on the first full match and returns true. Otherwise, | |
| 58 // returns false to indicate no match. | |
| 59 bool Match() { | |
| 60 while (!RunMatch()) { | |
| 61 // No match found and no more states to try. Bail out. | |
| 62 if (states_.empty()) { | |
| 63 current_relevance_ = kNoMatchScore; | |
| 64 current_hits_.clear(); | |
| 65 return false; | |
| 66 } | |
| 67 | |
| 68 PopState(); | |
| 69 | |
| 70 // Skip restored match to try other possibilites. | |
| 71 AdvanceToNextTextToken(); | |
| 72 } | |
| 73 | |
| 74 if (current_match_.IsValid()) | |
| 75 current_hits_.push_back(current_match_); | |
| 76 | |
| 77 return true; | |
| 78 } | |
| 79 | |
| 80 double relevance() const { return current_relevance_; } | |
| 81 const TokenizedStringMatch::Hits& hits() const { return current_hits_; } | |
| 82 | |
| 83 private: | |
| 84 // Context record of a match. | |
| 85 struct State { | |
| 86 State() : relevance(kNoMatchScore) {} | |
| 87 State(double relevance, | |
| 88 const gfx::Range& current_match, | |
| 89 const TokenizedStringMatch::Hits& hits, | |
| 90 const TokenizedStringCharIterator& query_iter, | |
| 91 const TokenizedStringCharIterator& text_iter) | |
| 92 : relevance(relevance), | |
| 93 current_match(current_match), | |
| 94 hits(hits.begin(), hits.end()), | |
| 95 query_iter_state(query_iter.GetState()), | |
| 96 text_iter_state(text_iter.GetState()) {} | |
| 97 | |
| 98 // The current score of the processed query chars. | |
| 99 double relevance; | |
| 100 | |
| 101 // Current matching range. | |
| 102 gfx::Range current_match; | |
| 103 | |
| 104 // Completed matching ranges of the processed query chars. | |
| 105 TokenizedStringMatch::Hits hits; | |
| 106 | |
| 107 // States of the processed query and text chars. | |
| 108 TokenizedStringCharIterator::State query_iter_state; | |
| 109 TokenizedStringCharIterator::State text_iter_state; | |
| 110 }; | |
| 111 typedef std::vector<State> States; | |
| 112 | |
| 113 // Match chars from the query and text one by one. For each matching char, | |
| 114 // calculate relevance and matching ranges. And the current stats is | |
| 115 // recorded so that the match could be skipped later to try other | |
| 116 // possiblities. Repeat until any of the iterators run out. Return true if | |
| 117 // query iterator runs out, i.e. all chars in query are matched. | |
| 118 bool RunMatch() { | |
| 119 while (!query_iter_.end() && !text_iter_.end()) { | |
| 120 if (query_iter_.Get() == text_iter_.Get()) { | |
| 121 PushState(); | |
| 122 | |
| 123 if (query_iter_.GetArrayPos() == text_iter_.GetArrayPos()) | |
| 124 current_relevance_ += kIsPrefixMultiplier; | |
| 125 else if (text_iter_.IsFirstCharOfToken()) | |
| 126 current_relevance_ += kIsFrontOfWordMultipler; | |
| 127 else | |
| 128 current_relevance_ += kIsWeakHitMultiplier; | |
| 129 | |
| 130 if (!current_match_.IsValid()) | |
| 131 current_match_.set_start(text_iter_.GetArrayPos()); | |
| 132 current_match_.set_end(text_iter_.GetArrayPos() + | |
| 133 text_iter_.GetCharSize()); | |
| 134 | |
| 135 query_iter_.NextChar(); | |
| 136 text_iter_.NextChar(); | |
| 137 } else { | |
| 138 AdvanceToNextTextToken(); | |
| 139 } | |
| 140 } | |
| 141 | |
| 142 return query_iter_.end(); | |
| 143 } | |
| 144 | |
| 145 // Skip to the next text token and close current match. Invoked when a | |
| 146 // mismatch happens or to skip a restored match. | |
| 147 void AdvanceToNextTextToken() { | |
| 148 if (current_match_.IsValid()) { | |
| 149 current_hits_.push_back(current_match_); | |
| 150 current_match_ = gfx::Range::InvalidRange(); | |
| 151 } | |
| 152 | |
| 153 text_iter_.NextToken(); | |
| 154 } | |
| 155 | |
| 156 void PushState() { | |
| 157 states_.push_back(State(current_relevance_, | |
| 158 current_match_, | |
| 159 current_hits_, | |
| 160 query_iter_, | |
| 161 text_iter_)); | |
| 162 } | |
| 163 | |
| 164 void PopState() { | |
| 165 DCHECK(!states_.empty()); | |
| 166 | |
| 167 State& last_match = states_.back(); | |
| 168 current_relevance_ = last_match.relevance; | |
| 169 current_match_ = last_match.current_match; | |
| 170 current_hits_.swap(last_match.hits); | |
| 171 query_iter_.SetState(last_match.query_iter_state); | |
| 172 text_iter_.SetState(last_match.text_iter_state); | |
| 173 | |
| 174 states_.pop_back(); | |
| 175 } | |
| 176 | |
| 177 TokenizedStringCharIterator query_iter_; | |
| 178 TokenizedStringCharIterator text_iter_; | |
| 179 | |
| 180 States states_; | |
| 181 gfx::Range current_match_; | |
| 182 | |
| 183 double current_relevance_; | |
| 184 TokenizedStringMatch::Hits current_hits_; | |
| 185 | |
| 186 DISALLOW_COPY_AND_ASSIGN(PrefixMatcher); | |
| 187 }; | |
| 188 | |
| 189 } // namespace | |
| 190 | |
| 191 TokenizedStringMatch::TokenizedStringMatch() | |
| 192 : relevance_(kNoMatchScore) {} | |
| 193 | |
| 194 TokenizedStringMatch::~TokenizedStringMatch() {} | |
| 195 | |
| 196 bool TokenizedStringMatch::Calculate(const TokenizedString& query, | |
| 197 const TokenizedString& text) { | |
| 198 relevance_ = kNoMatchScore; | |
| 199 hits_.clear(); | |
| 200 | |
| 201 PrefixMatcher matcher(query, text); | |
| 202 if (matcher.Match()) { | |
| 203 relevance_ = matcher.relevance(); | |
| 204 hits_.assign(matcher.hits().begin(), matcher.hits().end()); | |
| 205 } | |
| 206 | |
| 207 // Substring match as a fallback. | |
| 208 if (relevance_ == kNoMatchScore) { | |
| 209 size_t substr_match_start = 0; | |
| 210 size_t substr_match_length = 0; | |
| 211 if (base::i18n::StringSearchIgnoringCaseAndAccents(query.text(), | |
| 212 text.text(), | |
| 213 &substr_match_start, | |
| 214 &substr_match_length)) { | |
| 215 relevance_ = kIsSubstringMultiplier * substr_match_length; | |
| 216 hits_.push_back(gfx::Range(substr_match_start, | |
| 217 substr_match_start + substr_match_length)); | |
| 218 } | |
| 219 } | |
| 220 | |
| 221 // Using length() for normalizing is not 100% correct but should be good | |
| 222 // enough compared with using real char count of the text. | |
| 223 if (text.text().length()) | |
| 224 relevance_ /= text.text().length(); | |
| 225 | |
| 226 return relevance_ > kNoMatchScore; | |
| 227 } | |
| 228 | |
| 229 bool TokenizedStringMatch::Calculate(const base::string16& query, | |
| 230 const base::string16& text) { | |
| 231 const TokenizedString tokenized_query(query); | |
| 232 const TokenizedString tokenized_text(text); | |
| 233 return Calculate(tokenized_query, tokenized_text); | |
| 234 } | |
| 235 | |
| 236 } // namespace app_list | |
| OLD | NEW |