OLD | NEW |
---|---|
1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "components/query_parser/query_parser.h" | 5 #include "components/query_parser/query_parser.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 | 8 |
9 #include "base/compiler_specific.h" | 9 #include "base/compiler_specific.h" |
10 #include "base/i18n/break_iterator.h" | 10 #include "base/i18n/break_iterator.h" |
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
64 // or a single word (a QueryNodeWord). | 64 // or a single word (a QueryNodeWord). |
65 | 65 |
66 // A QueryNodeWord is a single word in the query. | 66 // A QueryNodeWord is a single word in the query. |
67 class QueryNodeWord : public QueryNode { | 67 class QueryNodeWord : public QueryNode { |
68 public: | 68 public: |
69 explicit QueryNodeWord(const base::string16& word); | 69 explicit QueryNodeWord(const base::string16& word); |
70 ~QueryNodeWord() override; | 70 ~QueryNodeWord() override; |
71 | 71 |
72 const base::string16& word() const { return word_; } | 72 const base::string16& word() const { return word_; } |
73 | 73 |
74 bool literal() const { return literal_; }; | |
74 void set_literal(bool literal) { literal_ = literal; } | 75 void set_literal(bool literal) { literal_ = literal; } |
75 | 76 |
76 // QueryNode: | 77 // QueryNode: |
77 int AppendToSQLiteQuery(base::string16* query) const override; | 78 int AppendToSQLiteQuery(base::string16* query) const override; |
78 bool IsWord() const override; | 79 bool IsWord() const override; |
79 bool Matches(const base::string16& word, bool exact) const override; | 80 bool Matches(const base::string16& word, bool exact) const override; |
80 bool HasMatchIn(const QueryWordVector& words, | 81 bool HasMatchIn(const QueryWordVector& words, |
81 Snippet::MatchPositions* match_positions) const override; | 82 Snippet::MatchPositions* match_positions) const override; |
82 bool HasMatchIn(const QueryWordVector& words) const override; | 83 bool HasMatchIn(const QueryWordVector& words) const override; |
83 void AppendWords(std::vector<base::string16>* words) const override; | 84 void AppendWords(std::vector<base::string16>* words) const override; |
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
135 if (Matches(words[i].word, false)) | 136 if (Matches(words[i].word, false)) |
136 return true; | 137 return true; |
137 } | 138 } |
138 return false; | 139 return false; |
139 } | 140 } |
140 | 141 |
141 void QueryNodeWord::AppendWords(std::vector<base::string16>* words) const { | 142 void QueryNodeWord::AppendWords(std::vector<base::string16>* words) const { |
142 words->push_back(word_); | 143 words->push_back(word_); |
143 } | 144 } |
144 | 145 |
146 // A QueryNodePrefixWord is a single word in the query that can always be used | |
147 // in a prefix search. By comparison, short words as QueryNodeWord are only | |
148 // checked for exact matches. | |
149 class QueryNodePrefixWord : public QueryNodeWord { | |
Kibeom Kim (inactive)
2014/11/03 18:41:32
It looks like this class is identical to QueryNode
sky
2014/11/03 21:44:05
Why bother with a template and not a field, always
Kibeom Kim (inactive)
2014/11/04 01:28:19
Done.
Since the class is local to this file and n
| |
150 public: | |
151 explicit QueryNodePrefixWord(const base::string16& word); | |
152 ~QueryNodePrefixWord() override; | |
153 | |
154 // QueryNodeWord: | |
155 int AppendToSQLiteQuery(base::string16* query) const override; | |
156 bool Matches(const base::string16& word, bool exact) const override; | |
157 | |
158 DISALLOW_COPY_AND_ASSIGN(QueryNodePrefixWord); | |
159 }; | |
160 | |
161 QueryNodePrefixWord::QueryNodePrefixWord(const base::string16& word) | |
162 : QueryNodeWord(word) {} | |
163 | |
164 QueryNodePrefixWord::~QueryNodePrefixWord() {} | |
165 | |
166 int QueryNodePrefixWord::AppendToSQLiteQuery(base::string16* query) const { | |
167 query->append(this->word()); | |
168 | |
169 // Use prefix search if we're not literal and long enough. | |
170 if (!this->literal()) | |
171 *query += L'*'; | |
172 return 1; | |
173 } | |
174 | |
175 bool QueryNodePrefixWord::Matches(const base::string16& word, | |
176 bool exact) const { | |
177 if (exact) | |
178 return word == this->word(); | |
179 return word.size() >= this->word().size() && | |
180 (this->word().compare( | |
181 0, this->word().size(), word, 0, this->word().size()) == 0); | |
182 } | |
183 | |
145 // A QueryNodeList has a collection of QueryNodes which are deleted in the end. | 184 // A QueryNodeList has a collection of QueryNodes which are deleted in the end. |
146 class QueryNodeList : public QueryNode { | 185 class QueryNodeList : public QueryNode { |
147 public: | 186 public: |
148 QueryNodeList(); | 187 QueryNodeList(); |
149 ~QueryNodeList() override; | 188 ~QueryNodeList() override; |
150 | 189 |
151 QueryNodeStarVector* children() { return &children_; } | 190 QueryNodeStarVector* children() { return &children_; } |
152 | 191 |
153 void AddChild(QueryNode* node); | 192 void AddChild(QueryNode* node); |
154 | 193 |
(...skipping 165 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
320 size_t minimum_length = 3; | 359 size_t minimum_length = 3; |
321 // We intentionally exclude Hangul Jamos (both Conjoining and compatibility) | 360 // We intentionally exclude Hangul Jamos (both Conjoining and compatibility) |
322 // because they 'behave like' Latin letters. Moreover, we should | 361 // because they 'behave like' Latin letters. Moreover, we should |
323 // normalize the former before reaching here. | 362 // normalize the former before reaching here. |
324 if (0xAC00 <= word[0] && word[0] <= 0xD7A3) | 363 if (0xAC00 <= word[0] && word[0] <= 0xD7A3) |
325 minimum_length = 2; | 364 minimum_length = 2; |
326 return word.size() >= minimum_length; | 365 return word.size() >= minimum_length; |
327 } | 366 } |
328 | 367 |
329 int QueryParser::ParseQuery(const base::string16& query, | 368 int QueryParser::ParseQuery(const base::string16& query, |
369 bool always_prefix_search, | |
330 base::string16* sqlite_query) { | 370 base::string16* sqlite_query) { |
331 QueryNodeList root; | 371 QueryNodeList root; |
332 if (!ParseQueryImpl(query, &root)) | 372 if (!ParseQueryImpl(query, always_prefix_search, &root)) |
333 return 0; | 373 return 0; |
334 return root.AppendToSQLiteQuery(sqlite_query); | 374 return root.AppendToSQLiteQuery(sqlite_query); |
335 } | 375 } |
336 | 376 |
337 void QueryParser::ParseQueryWords(const base::string16& query, | 377 void QueryParser::ParseQueryWords(const base::string16& query, |
378 bool always_prefix_search, | |
338 std::vector<base::string16>* words) { | 379 std::vector<base::string16>* words) { |
339 QueryNodeList root; | 380 QueryNodeList root; |
340 if (!ParseQueryImpl(query, &root)) | 381 if (!ParseQueryImpl(query, always_prefix_search, &root)) |
341 return; | 382 return; |
342 root.AppendWords(words); | 383 root.AppendWords(words); |
343 } | 384 } |
344 | 385 |
345 void QueryParser::ParseQueryNodes(const base::string16& query, | 386 void QueryParser::ParseQueryNodes(const base::string16& query, |
387 bool always_prefix_search, | |
346 QueryNodeStarVector* nodes) { | 388 QueryNodeStarVector* nodes) { |
347 QueryNodeList root; | 389 QueryNodeList root; |
348 if (ParseQueryImpl(base::i18n::ToLower(query), &root)) | 390 if (ParseQueryImpl(base::i18n::ToLower(query), always_prefix_search, &root)) |
349 nodes->swap(*root.children()); | 391 nodes->swap(*root.children()); |
350 } | 392 } |
351 | 393 |
352 bool QueryParser::DoesQueryMatch(const base::string16& text, | 394 bool QueryParser::DoesQueryMatch(const base::string16& text, |
353 const QueryNodeStarVector& query_nodes, | 395 const QueryNodeStarVector& query_nodes, |
354 Snippet::MatchPositions* match_positions) { | 396 Snippet::MatchPositions* match_positions) { |
355 if (query_nodes.empty()) | 397 if (query_nodes.empty()) |
356 return false; | 398 return false; |
357 | 399 |
358 QueryWordVector query_words; | 400 QueryWordVector query_words; |
(...skipping 27 matching lines...) Expand all Loading... | |
386 return false; | 428 return false; |
387 | 429 |
388 for (size_t i = 0; i < query_nodes.size(); ++i) { | 430 for (size_t i = 0; i < query_nodes.size(); ++i) { |
389 if (!query_nodes[i]->HasMatchIn(query_words)) | 431 if (!query_nodes[i]->HasMatchIn(query_words)) |
390 return false; | 432 return false; |
391 } | 433 } |
392 return true; | 434 return true; |
393 } | 435 } |
394 | 436 |
395 bool QueryParser::ParseQueryImpl(const base::string16& query, | 437 bool QueryParser::ParseQueryImpl(const base::string16& query, |
438 bool always_prefix_search, | |
396 QueryNodeList* root) { | 439 QueryNodeList* root) { |
397 base::i18n::BreakIterator iter(query, base::i18n::BreakIterator::BREAK_WORD); | 440 base::i18n::BreakIterator iter(query, base::i18n::BreakIterator::BREAK_WORD); |
398 // TODO(evanm): support a locale here | 441 // TODO(evanm): support a locale here |
399 if (!iter.Init()) | 442 if (!iter.Init()) |
400 return false; | 443 return false; |
401 | 444 |
402 // To handle nesting, we maintain a stack of QueryNodeLists. | 445 // To handle nesting, we maintain a stack of QueryNodeLists. |
403 // The last element (back) of the stack contains the current, deepest node. | 446 // The last element (back) of the stack contains the current, deepest node. |
404 std::vector<QueryNodeList*> query_stack; | 447 std::vector<QueryNodeList*> query_stack; |
405 query_stack.push_back(root); | 448 query_stack.push_back(root); |
406 | 449 |
407 bool in_quotes = false; // whether we're currently in a quoted phrase | 450 bool in_quotes = false; // whether we're currently in a quoted phrase |
408 while (iter.Advance()) { | 451 while (iter.Advance()) { |
409 // Just found a span between 'prev' (inclusive) and 'pos' (exclusive). It | 452 // Just found a span between 'prev' (inclusive) and 'pos' (exclusive). It |
410 // is not necessarily a word, but could also be a sequence of punctuation | 453 // is not necessarily a word, but could also be a sequence of punctuation |
411 // or whitespace. | 454 // or whitespace. |
412 if (iter.IsWord()) { | 455 if (iter.IsWord()) { |
413 QueryNodeWord* word_node = new QueryNodeWord(iter.GetString()); | 456 QueryNodeWord* word_node = (always_prefix_search) |
457 ? new QueryNodePrefixWord(iter.GetString()) | |
458 : new QueryNodeWord(iter.GetString()); | |
414 if (in_quotes) | 459 if (in_quotes) |
415 word_node->set_literal(true); | 460 word_node->set_literal(true); |
416 query_stack.back()->AddChild(word_node); | 461 query_stack.back()->AddChild(word_node); |
417 } else { // Punctuation. | 462 } else { // Punctuation. |
418 if (IsQueryQuote(query[iter.prev()])) { | 463 if (IsQueryQuote(query[iter.prev()])) { |
419 if (!in_quotes) { | 464 if (!in_quotes) { |
420 QueryNodeList* quotes_node = new QueryNodePhrase; | 465 QueryNodeList* quotes_node = new QueryNodePhrase; |
421 query_stack.back()->AddChild(quotes_node); | 466 query_stack.back()->AddChild(quotes_node); |
422 query_stack.push_back(quotes_node); | 467 query_stack.push_back(quotes_node); |
423 in_quotes = true; | 468 in_quotes = true; |
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
459 void QueryParser::SortAndCoalesceMatchPositions( | 504 void QueryParser::SortAndCoalesceMatchPositions( |
460 Snippet::MatchPositions* matches) { | 505 Snippet::MatchPositions* matches) { |
461 std::sort(matches->begin(), matches->end(), &CompareMatchPosition); | 506 std::sort(matches->begin(), matches->end(), &CompareMatchPosition); |
462 // WARNING: we don't use iterator here as CoalesceMatchesFrom may remove | 507 // WARNING: we don't use iterator here as CoalesceMatchesFrom may remove |
463 // from matches. | 508 // from matches. |
464 for (size_t i = 0; i < matches->size(); ++i) | 509 for (size_t i = 0; i < matches->size(); ++i) |
465 CoalesceMatchesFrom(i, matches); | 510 CoalesceMatchesFrom(i, matches); |
466 } | 511 } |
467 | 512 |
468 } // namespace query_parser | 513 } // namespace query_parser |
OLD | NEW |