OLD | NEW |
| (Empty) |
1 // Copyright 2013 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "chrome/browser/ui/app_list/search/tokenized_string_match.h" | |
6 | |
7 #include "base/i18n/string_search.h" | |
8 #include "base/logging.h" | |
9 #include "chrome/browser/ui/app_list/search/tokenized_string_char_iterator.h" | |
10 | |
11 namespace app_list { | |
12 | |
13 namespace { | |
14 | |
15 // The factors below are applied when the current char of query matches | |
16 // the current char of the text to be matched. Different factors are chosen | |
17 // based on where the match happens. kIsPrefixMultiplier is used when the | |
18 // matched portion is a prefix of both the query and the text, which implies | |
19 // that the matched chars are at the same position in query and text. This is | |
20 // the most preferred case thus it has the highest score. When the current char | |
21 // of the query and the text does not match, the algorithm moves to the next | |
22 // token in the text and try to match from there. kIsFrontOfWordMultipler will | |
23 // be used if the first char of the token matches the current char of the query. | |
24 // Otherwise, the match is considered as weak and kIsWeakHitMultiplier is | |
25 // used. | |
26 // Examples: | |
27 // Suppose the text to be matched is 'Google Chrome'. | |
28 // Query 'go' would yield kIsPrefixMultiplier for each char. | |
29 // Query 'gc' would use kIsPrefixMultiplier for 'g' and | |
30 // kIsFrontOfWordMultipler for 'c'. | |
31 // Query 'ch' would use kIsFrontOfWordMultipler for 'c' and | |
32 // kIsWeakHitMultiplier for 'h'. | |
33 // Query 'oo' does not match any prefix and would use the substring match | |
34 // fallback, thus kIsSubstringMultiplier is used for each char. | |
35 const double kIsPrefixMultiplier = 1.0; | |
36 const double kIsFrontOfWordMultipler = 0.8; | |
37 const double kIsWeakHitMultiplier = 0.6; | |
38 const double kIsSubstringMultiplier = 0.4; | |
39 | |
40 // A relevance score that represents no match. | |
41 const double kNoMatchScore = 0.0; | |
42 | |
43 // PrefixMatcher matches the chars of a given query as prefix of tokens in | |
44 // a given text or as prefix of the acronyms of those text tokens. | |
45 class PrefixMatcher { | |
46 public: | |
47 PrefixMatcher(const TokenizedString& query, | |
48 const TokenizedString& text) | |
49 : query_iter_(query), | |
50 text_iter_(text), | |
51 current_match_(gfx::Range::InvalidRange()), | |
52 current_relevance_(kNoMatchScore) { | |
53 } | |
54 | |
55 // Invokes RunMatch to perform prefix match. Use |states_| as a stack to | |
56 // perform DFS (depth first search) so that all possible matches are | |
57 // attempted. Stops on the first full match and returns true. Otherwise, | |
58 // returns false to indicate no match. | |
59 bool Match() { | |
60 while (!RunMatch()) { | |
61 // No match found and no more states to try. Bail out. | |
62 if (states_.empty()) { | |
63 current_relevance_ = kNoMatchScore; | |
64 current_hits_.clear(); | |
65 return false; | |
66 } | |
67 | |
68 PopState(); | |
69 | |
70 // Skip restored match to try other possibilites. | |
71 AdvanceToNextTextToken(); | |
72 } | |
73 | |
74 if (current_match_.IsValid()) | |
75 current_hits_.push_back(current_match_); | |
76 | |
77 return true; | |
78 } | |
79 | |
80 double relevance() const { return current_relevance_; } | |
81 const TokenizedStringMatch::Hits& hits() const { return current_hits_; } | |
82 | |
83 private: | |
84 // Context record of a match. | |
85 struct State { | |
86 State() : relevance(kNoMatchScore) {} | |
87 State(double relevance, | |
88 const gfx::Range& current_match, | |
89 const TokenizedStringMatch::Hits& hits, | |
90 const TokenizedStringCharIterator& query_iter, | |
91 const TokenizedStringCharIterator& text_iter) | |
92 : relevance(relevance), | |
93 current_match(current_match), | |
94 hits(hits.begin(), hits.end()), | |
95 query_iter_state(query_iter.GetState()), | |
96 text_iter_state(text_iter.GetState()) {} | |
97 | |
98 // The current score of the processed query chars. | |
99 double relevance; | |
100 | |
101 // Current matching range. | |
102 gfx::Range current_match; | |
103 | |
104 // Completed matching ranges of the processed query chars. | |
105 TokenizedStringMatch::Hits hits; | |
106 | |
107 // States of the processed query and text chars. | |
108 TokenizedStringCharIterator::State query_iter_state; | |
109 TokenizedStringCharIterator::State text_iter_state; | |
110 }; | |
111 typedef std::vector<State> States; | |
112 | |
113 // Match chars from the query and text one by one. For each matching char, | |
114 // calculate relevance and matching ranges. And the current stats is | |
115 // recorded so that the match could be skipped later to try other | |
116 // possiblities. Repeat until any of the iterators run out. Return true if | |
117 // query iterator runs out, i.e. all chars in query are matched. | |
118 bool RunMatch() { | |
119 while (!query_iter_.end() && !text_iter_.end()) { | |
120 if (query_iter_.Get() == text_iter_.Get()) { | |
121 PushState(); | |
122 | |
123 if (query_iter_.GetArrayPos() == text_iter_.GetArrayPos()) | |
124 current_relevance_ += kIsPrefixMultiplier; | |
125 else if (text_iter_.IsFirstCharOfToken()) | |
126 current_relevance_ += kIsFrontOfWordMultipler; | |
127 else | |
128 current_relevance_ += kIsWeakHitMultiplier; | |
129 | |
130 if (!current_match_.IsValid()) | |
131 current_match_.set_start(text_iter_.GetArrayPos()); | |
132 current_match_.set_end(text_iter_.GetArrayPos() + | |
133 text_iter_.GetCharSize()); | |
134 | |
135 query_iter_.NextChar(); | |
136 text_iter_.NextChar(); | |
137 } else { | |
138 AdvanceToNextTextToken(); | |
139 } | |
140 } | |
141 | |
142 return query_iter_.end(); | |
143 } | |
144 | |
145 // Skip to the next text token and close current match. Invoked when a | |
146 // mismatch happens or to skip a restored match. | |
147 void AdvanceToNextTextToken() { | |
148 if (current_match_.IsValid()) { | |
149 current_hits_.push_back(current_match_); | |
150 current_match_ = gfx::Range::InvalidRange(); | |
151 } | |
152 | |
153 text_iter_.NextToken(); | |
154 } | |
155 | |
156 void PushState() { | |
157 states_.push_back(State(current_relevance_, | |
158 current_match_, | |
159 current_hits_, | |
160 query_iter_, | |
161 text_iter_)); | |
162 } | |
163 | |
164 void PopState() { | |
165 DCHECK(!states_.empty()); | |
166 | |
167 State& last_match = states_.back(); | |
168 current_relevance_ = last_match.relevance; | |
169 current_match_ = last_match.current_match; | |
170 current_hits_.swap(last_match.hits); | |
171 query_iter_.SetState(last_match.query_iter_state); | |
172 text_iter_.SetState(last_match.text_iter_state); | |
173 | |
174 states_.pop_back(); | |
175 } | |
176 | |
177 TokenizedStringCharIterator query_iter_; | |
178 TokenizedStringCharIterator text_iter_; | |
179 | |
180 States states_; | |
181 gfx::Range current_match_; | |
182 | |
183 double current_relevance_; | |
184 TokenizedStringMatch::Hits current_hits_; | |
185 | |
186 DISALLOW_COPY_AND_ASSIGN(PrefixMatcher); | |
187 }; | |
188 | |
189 } // namespace | |
190 | |
191 TokenizedStringMatch::TokenizedStringMatch() | |
192 : relevance_(kNoMatchScore) {} | |
193 | |
194 TokenizedStringMatch::~TokenizedStringMatch() {} | |
195 | |
196 bool TokenizedStringMatch::Calculate(const TokenizedString& query, | |
197 const TokenizedString& text) { | |
198 relevance_ = kNoMatchScore; | |
199 hits_.clear(); | |
200 | |
201 PrefixMatcher matcher(query, text); | |
202 if (matcher.Match()) { | |
203 relevance_ = matcher.relevance(); | |
204 hits_.assign(matcher.hits().begin(), matcher.hits().end()); | |
205 } | |
206 | |
207 // Substring match as a fallback. | |
208 if (relevance_ == kNoMatchScore) { | |
209 size_t substr_match_start = 0; | |
210 size_t substr_match_length = 0; | |
211 if (base::i18n::StringSearchIgnoringCaseAndAccents(query.text(), | |
212 text.text(), | |
213 &substr_match_start, | |
214 &substr_match_length)) { | |
215 relevance_ = kIsSubstringMultiplier * substr_match_length; | |
216 hits_.push_back(gfx::Range(substr_match_start, | |
217 substr_match_start + substr_match_length)); | |
218 } | |
219 } | |
220 | |
221 // Using length() for normalizing is not 100% correct but should be good | |
222 // enough compared with using real char count of the text. | |
223 if (text.text().length()) | |
224 relevance_ /= text.text().length(); | |
225 | |
226 return relevance_ > kNoMatchScore; | |
227 } | |
228 | |
229 bool TokenizedStringMatch::Calculate(const base::string16& query, | |
230 const base::string16& text) { | |
231 const TokenizedString tokenized_query(query); | |
232 const TokenizedString tokenized_text(text); | |
233 return Calculate(tokenized_query, tokenized_text); | |
234 } | |
235 | |
236 } // namespace app_list | |
OLD | NEW |