chrome/browser/ui/app_list/search/tokenized_string_match.cc - Issue 522683003: Move basic string operations to ui/app_list/search

Side by Side Diff: chrome/browser/ui/app_list/search/tokenized_string_match.cc

Issue 522683003: Move basic string operations to ui/app_list/search (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: gn Created 6 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« no previous file with comments | « chrome/browser/ui/app_list/search/tokenized_string_match.h ('k') | chrome/browser/ui/app_list/search/tokenized_string_match_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
	(Empty)
1 // Copyright 2013 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.

4

5 #include "chrome/browser/ui/app_list/search/tokenized_string_match.h"

6

7 #include "base/i18n/string_search.h"

8 #include "base/logging.h"

9 #include "chrome/browser/ui/app_list/search/tokenized_string_char_iterator.h"

10

11 namespace app_list {

12

13 namespace {

14

15 // The factors below are applied when the current char of query matches

16 // the current char of the text to be matched. Different factors are chosen

17 // based on where the match happens. kIsPrefixMultiplier is used when the

18 // matched portion is a prefix of both the query and the text, which implies

19 // that the matched chars are at the same position in query and text. This is

20 // the most preferred case thus it has the highest score. When the current char

21 // of the query and the text does not match, the algorithm moves to the next

22 // token in the text and try to match from there. kIsFrontOfWordMultipler will

23 // be used if the first char of the token matches the current char of the query.

24 // Otherwise, the match is considered as weak and kIsWeakHitMultiplier is

25 // used.

26 // Examples:

27 // Suppose the text to be matched is 'Google Chrome'.

28 // Query 'go' would yield kIsPrefixMultiplier for each char.

29 // Query 'gc' would use kIsPrefixMultiplier for 'g' and

30 // kIsFrontOfWordMultipler for 'c'.

31 // Query 'ch' would use kIsFrontOfWordMultipler for 'c' and

32 // kIsWeakHitMultiplier for 'h'.

33 // Query 'oo' does not match any prefix and would use the substring match

34 // fallback, thus kIsSubstringMultiplier is used for each char.

35 const double kIsPrefixMultiplier = 1.0;

36 const double kIsFrontOfWordMultipler = 0.8;

37 const double kIsWeakHitMultiplier = 0.6;

38 const double kIsSubstringMultiplier = 0.4;

39

40 // A relevance score that represents no match.

41 const double kNoMatchScore = 0.0;

42

43 // PrefixMatcher matches the chars of a given query as prefix of tokens in

44 // a given text or as prefix of the acronyms of those text tokens.

45 class PrefixMatcher {

46 public:

47 PrefixMatcher(const TokenizedString& query,

48 const TokenizedString& text)

49 : query_iter_(query),

50 text_iter_(text),

51 current_match_(gfx::Range::InvalidRange()),

52 current_relevance_(kNoMatchScore) {

53 }

54

55 // Invokes RunMatch to perform prefix match. Use \|states_\| as a stack to

56 // perform DFS (depth first search) so that all possible matches are

57 // attempted. Stops on the first full match and returns true. Otherwise,

58 // returns false to indicate no match.

59 bool Match() {

60 while (!RunMatch()) {

61 // No match found and no more states to try. Bail out.

62 if (states_.empty()) {

63 current_relevance_ = kNoMatchScore;

64 current_hits_.clear();

65 return false;

66 }

67

68 PopState();

69

70 // Skip restored match to try other possibilites.

71 AdvanceToNextTextToken();

72 }

73

74 if (current_match_.IsValid())

75 current_hits_.push_back(current_match_);

76

77 return true;

78 }

79

80 double relevance() const { return current_relevance_; }

81 const TokenizedStringMatch::Hits& hits() const { return current_hits_; }

82

83 private:

84 // Context record of a match.

85 struct State {

86 State() : relevance(kNoMatchScore) {}

87 State(double relevance,

88 const gfx::Range& current_match,

89 const TokenizedStringMatch::Hits& hits,

90 const TokenizedStringCharIterator& query_iter,

91 const TokenizedStringCharIterator& text_iter)

92 : relevance(relevance),

93 current_match(current_match),

94 hits(hits.begin(), hits.end()),

95 query_iter_state(query_iter.GetState()),

96 text_iter_state(text_iter.GetState()) {}

97

98 // The current score of the processed query chars.

99 double relevance;

100

101 // Current matching range.

102 gfx::Range current_match;

103

104 // Completed matching ranges of the processed query chars.

105 TokenizedStringMatch::Hits hits;

106

107 // States of the processed query and text chars.

108 TokenizedStringCharIterator::State query_iter_state;

109 TokenizedStringCharIterator::State text_iter_state;

110 };

111 typedef std::vector<State> States;

112

113 // Match chars from the query and text one by one. For each matching char,

114 // calculate relevance and matching ranges. And the current stats is

115 // recorded so that the match could be skipped later to try other

116 // possiblities. Repeat until any of the iterators run out. Return true if

117 // query iterator runs out, i.e. all chars in query are matched.

118 bool RunMatch() {

119 while (!query_iter_.end() && !text_iter_.end()) {

120 if (query_iter_.Get() == text_iter_.Get()) {

121 PushState();

122

123 if (query_iter_.GetArrayPos() == text_iter_.GetArrayPos())

124 current_relevance_ += kIsPrefixMultiplier;

125 else if (text_iter_.IsFirstCharOfToken())

126 current_relevance_ += kIsFrontOfWordMultipler;

127 else

128 current_relevance_ += kIsWeakHitMultiplier;

129

130 if (!current_match_.IsValid())

131 current_match_.set_start(text_iter_.GetArrayPos());

132 current_match_.set_end(text_iter_.GetArrayPos() +

133 text_iter_.GetCharSize());

134

135 query_iter_.NextChar();

136 text_iter_.NextChar();

137 } else {

138 AdvanceToNextTextToken();

139 }

140 }

141

142 return query_iter_.end();

143 }

144

145 // Skip to the next text token and close current match. Invoked when a

146 // mismatch happens or to skip a restored match.

147 void AdvanceToNextTextToken() {

148 if (current_match_.IsValid()) {

149 current_hits_.push_back(current_match_);

150 current_match_ = gfx::Range::InvalidRange();

151 }

152

153 text_iter_.NextToken();

154 }

155

156 void PushState() {

157 states_.push_back(State(current_relevance_,

158 current_match_,

159 current_hits_,

160 query_iter_,

161 text_iter_));

162 }

163

164 void PopState() {

165 DCHECK(!states_.empty());

166

167 State& last_match = states_.back();

168 current_relevance_ = last_match.relevance;

169 current_match_ = last_match.current_match;

170 current_hits_.swap(last_match.hits);

171 query_iter_.SetState(last_match.query_iter_state);

172 text_iter_.SetState(last_match.text_iter_state);

173

174 states_.pop_back();

175 }

176

177 TokenizedStringCharIterator query_iter_;

178 TokenizedStringCharIterator text_iter_;

179

180 States states_;

181 gfx::Range current_match_;

182

183 double current_relevance_;

184 TokenizedStringMatch::Hits current_hits_;

185

186 DISALLOW_COPY_AND_ASSIGN(PrefixMatcher);

187 };

188

189 } // namespace

190

191 TokenizedStringMatch::TokenizedStringMatch()

192 : relevance_(kNoMatchScore) {}

193

194 TokenizedStringMatch::~TokenizedStringMatch() {}

195

196 bool TokenizedStringMatch::Calculate(const TokenizedString& query,

197 const TokenizedString& text) {

198 relevance_ = kNoMatchScore;

199 hits_.clear();

200

201 PrefixMatcher matcher(query, text);

202 if (matcher.Match()) {

203 relevance_ = matcher.relevance();

204 hits_.assign(matcher.hits().begin(), matcher.hits().end());

205 }

206

207 // Substring match as a fallback.

208 if (relevance_ == kNoMatchScore) {

209 size_t substr_match_start = 0;

210 size_t substr_match_length = 0;

211 if (base::i18n::StringSearchIgnoringCaseAndAccents(query.text(),

212 text.text(),

213 &substr_match_start,

214 &substr_match_length)) {

215 relevance_ = kIsSubstringMultiplier * substr_match_length;

216 hits_.push_back(gfx::Range(substr_match_start,

217 substr_match_start + substr_match_length));

218 }

219 }

220

221 // Using length() for normalizing is not 100% correct but should be good

222 // enough compared with using real char count of the text.

223 if (text.text().length())

224 relevance_ /= text.text().length();

225

226 return relevance_ > kNoMatchScore;

227 }

228

229 bool TokenizedStringMatch::Calculate(const base::string16& query,

230 const base::string16& text) {

231 const TokenizedString tokenized_query(query);

232 const TokenizedString tokenized_text(text);

233 return Calculate(tokenized_query, tokenized_text);

234 }

235

236 } // namespace app_list

OLD	NEW