OLD | NEW |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/browser/history/query_parser.h" | 5 #include "chrome/browser/history/query_parser.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 | 8 |
9 #include "base/compiler_specific.h" | 9 #include "base/compiler_specific.h" |
10 #include "base/i18n/break_iterator.h" | 10 #include "base/i18n/break_iterator.h" |
11 #include "base/i18n/case_conversion.h" | 11 #include "base/i18n/case_conversion.h" |
12 #include "base/logging.h" | 12 #include "base/logging.h" |
13 #include "base/stl_util.h" | 13 #include "base/stl_util.h" |
| 14 #include "base/strings/utf_string_conversions.h" |
14 | 15 |
15 namespace { | 16 namespace { |
16 | 17 |
17 // Returns true if |mp1.first| is less than |mp2.first|. This is used to | 18 // Returns true if |mp1.first| is less than |mp2.first|. This is used to |
18 // sort match positions. | 19 // sort match positions. |
19 int CompareMatchPosition(const Snippet::MatchPosition& mp1, | 20 int CompareMatchPosition(const Snippet::MatchPosition& mp1, |
20 const Snippet::MatchPosition& mp2) { | 21 const Snippet::MatchPosition& mp2) { |
21 return mp1.first < mp2.first; | 22 return mp1.first < mp2.first; |
22 } | 23 } |
23 | 24 |
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
81 | 82 |
82 void set_literal(bool literal) { literal_ = literal; } | 83 void set_literal(bool literal) { literal_ = literal; } |
83 | 84 |
84 // QueryNode: | 85 // QueryNode: |
85 virtual int AppendToSQLiteQuery(string16* query) const OVERRIDE; | 86 virtual int AppendToSQLiteQuery(string16* query) const OVERRIDE; |
86 virtual bool IsWord() const OVERRIDE; | 87 virtual bool IsWord() const OVERRIDE; |
87 virtual bool Matches(const string16& word, bool exact) const OVERRIDE; | 88 virtual bool Matches(const string16& word, bool exact) const OVERRIDE; |
88 virtual bool HasMatchIn( | 89 virtual bool HasMatchIn( |
89 const std::vector<QueryWord>& words, | 90 const std::vector<QueryWord>& words, |
90 Snippet::MatchPositions* match_positions) const OVERRIDE; | 91 Snippet::MatchPositions* match_positions) const OVERRIDE; |
| 92 virtual bool HasMatchIn( |
| 93 const std::vector<QueryWord>& words) const OVERRIDE; |
91 virtual void AppendWords(std::vector<string16>* words) const OVERRIDE; | 94 virtual void AppendWords(std::vector<string16>* words) const OVERRIDE; |
92 | 95 |
93 private: | 96 private: |
94 string16 word_; | 97 string16 word_; |
95 bool literal_; | 98 bool literal_; |
96 | 99 |
97 DISALLOW_COPY_AND_ASSIGN(QueryNodeWord); | 100 DISALLOW_COPY_AND_ASSIGN(QueryNodeWord); |
98 }; | 101 }; |
99 | 102 |
100 QueryNodeWord::QueryNodeWord(const string16& word) | 103 QueryNodeWord::QueryNodeWord(const string16& word) |
(...skipping 30 matching lines...) Expand all Loading... |
131 size_t match_start = words[i].position; | 134 size_t match_start = words[i].position; |
132 match_positions->push_back( | 135 match_positions->push_back( |
133 Snippet::MatchPosition(match_start, | 136 Snippet::MatchPosition(match_start, |
134 match_start + static_cast<int>(word_.size()))); | 137 match_start + static_cast<int>(word_.size()))); |
135 matched = true; | 138 matched = true; |
136 } | 139 } |
137 } | 140 } |
138 return matched; | 141 return matched; |
139 } | 142 } |
140 | 143 |
| 144 bool QueryNodeWord::HasMatchIn(const std::vector<QueryWord>& words) const { |
| 145 for (size_t i = 0; i < words.size(); ++i) { |
| 146 if (Matches(words[i].word, false)) |
| 147 return true; |
| 148 } |
| 149 return false; |
| 150 } |
| 151 |
141 void QueryNodeWord::AppendWords(std::vector<string16>* words) const { | 152 void QueryNodeWord::AppendWords(std::vector<string16>* words) const { |
142 words->push_back(word_); | 153 words->push_back(word_); |
143 } | 154 } |
144 | 155 |
145 // A QueryNodeList has a collection of QueryNodes which are deleted in the end. | 156 // A QueryNodeList has a collection of QueryNodes which are deleted in the end. |
146 class QueryNodeList : public QueryNode { | 157 class QueryNodeList : public QueryNode { |
147 public: | 158 public: |
148 typedef std::vector<QueryNode*> QueryNodeVector; | 159 typedef std::vector<QueryNode*> QueryNodeVector; |
149 | 160 |
150 QueryNodeList(); | 161 QueryNodeList(); |
151 virtual ~QueryNodeList(); | 162 virtual ~QueryNodeList(); |
152 | 163 |
153 QueryNodeVector* children() { return &children_; } | 164 QueryNodeVector* children() { return &children_; } |
154 | 165 |
155 void AddChild(QueryNode* node); | 166 void AddChild(QueryNode* node); |
156 | 167 |
157 // Remove empty subnodes left over from other parsing. | 168 // Remove empty subnodes left over from other parsing. |
158 void RemoveEmptySubnodes(); | 169 void RemoveEmptySubnodes(); |
159 | 170 |
160 // QueryNode: | 171 // QueryNode: |
161 virtual int AppendToSQLiteQuery(string16* query) const OVERRIDE; | 172 virtual int AppendToSQLiteQuery(string16* query) const OVERRIDE; |
162 virtual bool IsWord() const OVERRIDE; | 173 virtual bool IsWord() const OVERRIDE; |
163 virtual bool Matches(const string16& word, bool exact) const OVERRIDE; | 174 virtual bool Matches(const string16& word, bool exact) const OVERRIDE; |
164 virtual bool HasMatchIn( | 175 virtual bool HasMatchIn( |
165 const std::vector<QueryWord>& words, | 176 const std::vector<QueryWord>& words, |
166 Snippet::MatchPositions* match_positions) const OVERRIDE; | 177 Snippet::MatchPositions* match_positions) const OVERRIDE; |
| 178 virtual bool HasMatchIn( |
| 179 const std::vector<QueryWord>& words) const OVERRIDE; |
167 virtual void AppendWords(std::vector<string16>* words) const OVERRIDE; | 180 virtual void AppendWords(std::vector<string16>* words) const OVERRIDE; |
168 | 181 |
169 protected: | 182 protected: |
170 int AppendChildrenToString(string16* query) const; | 183 int AppendChildrenToString(string16* query) const; |
171 | 184 |
172 QueryNodeVector children_; | 185 QueryNodeVector children_; |
173 | 186 |
174 private: | 187 private: |
175 DISALLOW_COPY_AND_ASSIGN(QueryNodeList); | 188 DISALLOW_COPY_AND_ASSIGN(QueryNodeList); |
176 }; | 189 }; |
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
212 NOTREACHED(); | 225 NOTREACHED(); |
213 return false; | 226 return false; |
214 } | 227 } |
215 | 228 |
216 bool QueryNodeList::HasMatchIn(const std::vector<QueryWord>& words, | 229 bool QueryNodeList::HasMatchIn(const std::vector<QueryWord>& words, |
217 Snippet::MatchPositions* match_positions) const { | 230 Snippet::MatchPositions* match_positions) const { |
218 NOTREACHED(); | 231 NOTREACHED(); |
219 return false; | 232 return false; |
220 } | 233 } |
221 | 234 |
| 235 bool QueryNodeList::HasMatchIn(const std::vector<QueryWord>& words) const { |
| 236 NOTREACHED(); |
| 237 return false; |
| 238 } |
| 239 |
222 void QueryNodeList::AppendWords(std::vector<string16>* words) const { | 240 void QueryNodeList::AppendWords(std::vector<string16>* words) const { |
223 for (size_t i = 0; i < children_.size(); ++i) | 241 for (size_t i = 0; i < children_.size(); ++i) |
224 children_[i]->AppendWords(words); | 242 children_[i]->AppendWords(words); |
225 } | 243 } |
226 | 244 |
227 int QueryNodeList::AppendChildrenToString(string16* query) const { | 245 int QueryNodeList::AppendChildrenToString(string16* query) const { |
228 int num_words = 0; | 246 int num_words = 0; |
229 for (QueryNodeVector::const_iterator node = children_.begin(); | 247 for (QueryNodeVector::const_iterator node = children_.begin(); |
230 node != children_.end(); ++node) { | 248 node != children_.end(); ++node) { |
231 if (node != children_.begin()) | 249 if (node != children_.begin()) |
232 query->push_back(L' '); | 250 query->push_back(L' '); |
233 num_words += (*node)->AppendToSQLiteQuery(query); | 251 num_words += (*node)->AppendToSQLiteQuery(query); |
234 } | 252 } |
235 return num_words; | 253 return num_words; |
236 } | 254 } |
237 | 255 |
238 // A QueryNodePhrase is a phrase query ("quoted"). | 256 // A QueryNodePhrase is a phrase query ("quoted"). |
239 class QueryNodePhrase : public QueryNodeList { | 257 class QueryNodePhrase : public QueryNodeList { |
240 public: | 258 public: |
241 QueryNodePhrase(); | 259 QueryNodePhrase(); |
242 virtual ~QueryNodePhrase(); | 260 virtual ~QueryNodePhrase(); |
243 | 261 |
244 // QueryNodeList: | 262 // QueryNodeList: |
245 virtual int AppendToSQLiteQuery(string16* query) const OVERRIDE; | 263 virtual int AppendToSQLiteQuery(string16* query) const OVERRIDE; |
246 virtual bool HasMatchIn( | 264 virtual bool HasMatchIn( |
247 const std::vector<QueryWord>& words, | 265 const std::vector<QueryWord>& words, |
248 Snippet::MatchPositions* match_positions) const OVERRIDE; | 266 Snippet::MatchPositions* match_positions) const OVERRIDE; |
| 267 virtual bool HasMatchIn( |
| 268 const std::vector<QueryWord>& words) const OVERRIDE; |
249 | 269 |
250 private: | 270 private: |
| 271 bool MatchesAll(const std::vector<QueryWord>& words, |
| 272 const QueryWord** first_word, |
| 273 const QueryWord** last_word) const; |
251 DISALLOW_COPY_AND_ASSIGN(QueryNodePhrase); | 274 DISALLOW_COPY_AND_ASSIGN(QueryNodePhrase); |
252 }; | 275 }; |
253 | 276 |
254 QueryNodePhrase::QueryNodePhrase() {} | 277 QueryNodePhrase::QueryNodePhrase() {} |
255 | 278 |
256 QueryNodePhrase::~QueryNodePhrase() {} | 279 QueryNodePhrase::~QueryNodePhrase() {} |
257 | 280 |
258 int QueryNodePhrase::AppendToSQLiteQuery(string16* query) const { | 281 int QueryNodePhrase::AppendToSQLiteQuery(string16* query) const { |
259 query->push_back(L'"'); | 282 query->push_back(L'"'); |
260 int num_words = AppendChildrenToString(query); | 283 int num_words = AppendChildrenToString(query); |
261 query->push_back(L'"'); | 284 query->push_back(L'"'); |
262 return num_words; | 285 return num_words; |
263 } | 286 } |
264 | 287 |
265 bool QueryNodePhrase::HasMatchIn( | 288 bool QueryNodePhrase::MatchesAll(const std::vector<QueryWord>& words, |
266 const std::vector<QueryWord>& words, | 289 const QueryWord** first_word, |
267 Snippet::MatchPositions* match_positions) const { | 290 const QueryWord** last_word) const { |
268 if (words.size() < children_.size()) | 291 if (words.size() < children_.size()) |
269 return false; | 292 return false; |
270 | 293 |
271 for (size_t i = 0, max = words.size() - children_.size() + 1; i < max; ++i) { | 294 for (size_t i = 0, max = words.size() - children_.size() + 1; i < max; ++i) { |
272 bool matched_all = true; | 295 bool matched_all = true; |
273 for (size_t j = 0; j < children_.size(); ++j) { | 296 for (size_t j = 0; j < children_.size(); ++j) { |
274 if (!children_[j]->Matches(words[i + j].word, true)) { | 297 if (!children_[j]->Matches(words[i + j].word, true)) { |
275 matched_all = false; | 298 matched_all = false; |
276 break; | 299 break; |
277 } | 300 } |
278 } | 301 } |
279 if (matched_all) { | 302 if (matched_all) { |
280 const QueryWord& last_word = words[i + children_.size() - 1]; | 303 *first_word = &words[i]; |
281 match_positions->push_back( | 304 *last_word = &words[i + children_.size() - 1]; |
282 Snippet::MatchPosition(words[i].position, | |
283 last_word.position + last_word.word.length())); | |
284 return true; | 305 return true; |
285 } | 306 } |
286 } | 307 } |
287 return false; | 308 return false; |
288 } | 309 } |
289 | 310 |
| 311 bool QueryNodePhrase::HasMatchIn( |
| 312 const std::vector<QueryWord>& words, |
| 313 Snippet::MatchPositions* match_positions) const { |
| 314 const QueryWord* first_word; |
| 315 const QueryWord* last_word; |
| 316 |
| 317 if (MatchesAll(words, &first_word, &last_word)) { |
| 318 match_positions->push_back( |
| 319 Snippet::MatchPosition(first_word->position, |
| 320 last_word->position + last_word->word.length())); |
| 321 return true; |
| 322 } |
| 323 return false; |
| 324 } |
| 325 |
| 326 bool QueryNodePhrase::HasMatchIn(const std::vector<QueryWord>& words) const { |
| 327 const QueryWord* first_word; |
| 328 const QueryWord* last_word; |
| 329 return MatchesAll(words, &first_word, &last_word); |
| 330 } |
| 331 |
290 QueryParser::QueryParser() {} | 332 QueryParser::QueryParser() {} |
291 | 333 |
292 // static | 334 // static |
293 bool QueryParser::IsWordLongEnoughForPrefixSearch(const string16& word) { | 335 bool QueryParser::IsWordLongEnoughForPrefixSearch(const string16& word) { |
294 DCHECK(!word.empty()); | 336 DCHECK(!word.empty()); |
295 size_t minimum_length = 3; | 337 size_t minimum_length = 3; |
296 // We intentionally exclude Hangul Jamos (both Conjoining and compatibility) | 338 // We intentionally exclude Hangul Jamos (both Conjoining and compatibility) |
297 // because they 'behave like' Latin letters. Moreover, we should | 339 // because they 'behave like' Latin letters. Moreover, we should |
298 // normalize the former before reaching here. | 340 // normalize the former before reaching here. |
299 if (0xAC00 <= word[0] && word[0] <= 0xD7A3) | 341 if (0xAC00 <= word[0] && word[0] <= 0xD7A3) |
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
347 // TODO(sky): we need a better way to align the positions so that we don't | 389 // TODO(sky): we need a better way to align the positions so that we don't |
348 // completely punt here. | 390 // completely punt here. |
349 match_positions->clear(); | 391 match_positions->clear(); |
350 } else { | 392 } else { |
351 CoalseAndSortMatchPositions(&matches); | 393 CoalseAndSortMatchPositions(&matches); |
352 match_positions->swap(matches); | 394 match_positions->swap(matches); |
353 } | 395 } |
354 return true; | 396 return true; |
355 } | 397 } |
356 | 398 |
| 399 bool QueryParser::DoesQueryMatch(const std::vector<QueryWord>& query_words, |
| 400 const std::vector<QueryNode*>& query_nodes) { |
| 401 if (query_nodes.empty() || query_words.empty()) |
| 402 return false; |
| 403 |
| 404 for (size_t i = 0; i < query_nodes.size(); ++i) { |
| 405 if (!query_nodes[i]->HasMatchIn(query_words)) |
| 406 return false; |
| 407 } |
| 408 return true; |
| 409 } |
| 410 |
357 bool QueryParser::ParseQueryImpl(const string16& query, QueryNodeList* root) { | 411 bool QueryParser::ParseQueryImpl(const string16& query, QueryNodeList* root) { |
358 base::i18n::BreakIterator iter(query, base::i18n::BreakIterator::BREAK_WORD); | 412 base::i18n::BreakIterator iter(query, base::i18n::BreakIterator::BREAK_WORD); |
359 // TODO(evanm): support a locale here | 413 // TODO(evanm): support a locale here |
360 if (!iter.Init()) | 414 if (!iter.Init()) |
361 return false; | 415 return false; |
362 | 416 |
363 // To handle nesting, we maintain a stack of QueryNodeLists. | 417 // To handle nesting, we maintain a stack of QueryNodeLists. |
364 // The last element (back) of the stack contains the current, deepest node. | 418 // The last element (back) of the stack contains the current, deepest node. |
365 std::vector<QueryNodeList*> query_stack; | 419 std::vector<QueryNodeList*> query_stack; |
366 query_stack.push_back(root); | 420 query_stack.push_back(root); |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
408 if (iter.IsWord()) { | 462 if (iter.IsWord()) { |
409 string16 word = iter.GetString(); | 463 string16 word = iter.GetString(); |
410 if (!word.empty()) { | 464 if (!word.empty()) { |
411 words->push_back(QueryWord()); | 465 words->push_back(QueryWord()); |
412 words->back().word = word; | 466 words->back().word = word; |
413 words->back().position = iter.prev(); | 467 words->back().position = iter.prev(); |
414 } | 468 } |
415 } | 469 } |
416 } | 470 } |
417 } | 471 } |
OLD | NEW |