OLD | NEW |
1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "components/query_parser/query_parser.h" | 5 #include "components/query_parser/query_parser.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 | 8 |
9 #include "base/compiler_specific.h" | 9 #include "base/compiler_specific.h" |
10 #include "base/i18n/break_iterator.h" | 10 #include "base/i18n/break_iterator.h" |
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
59 } // namespace | 59 } // namespace |
60 | 60 |
61 // Inheritance structure: | 61 // Inheritance structure: |
62 // Queries are represented as trees of QueryNodes. | 62 // Queries are represented as trees of QueryNodes. |
63 // QueryNodes are either a collection of subnodes (a QueryNodeList) | 63 // QueryNodes are either a collection of subnodes (a QueryNodeList) |
64 // or a single word (a QueryNodeWord). | 64 // or a single word (a QueryNodeWord). |
65 | 65 |
66 // A QueryNodeWord is a single word in the query. | 66 // A QueryNodeWord is a single word in the query. |
67 class QueryNodeWord : public QueryNode { | 67 class QueryNodeWord : public QueryNode { |
68 public: | 68 public: |
69 explicit QueryNodeWord(const base::string16& word); | 69 explicit QueryNodeWord(const base::string16& word, |
| 70 MatchingAlgorithm matching_algorithm); |
70 ~QueryNodeWord() override; | 71 ~QueryNodeWord() override; |
71 | 72 |
72 const base::string16& word() const { return word_; } | 73 const base::string16& word() const { return word_; } |
73 | 74 |
| 75 bool literal() const { return literal_; }; |
74 void set_literal(bool literal) { literal_ = literal; } | 76 void set_literal(bool literal) { literal_ = literal; } |
75 | 77 |
76 // QueryNode: | 78 // QueryNode: |
77 int AppendToSQLiteQuery(base::string16* query) const override; | 79 int AppendToSQLiteQuery(base::string16* query) const override; |
78 bool IsWord() const override; | 80 bool IsWord() const override; |
79 bool Matches(const base::string16& word, bool exact) const override; | 81 bool Matches(const base::string16& word, bool exact) const override; |
80 bool HasMatchIn(const QueryWordVector& words, | 82 bool HasMatchIn(const QueryWordVector& words, |
81 Snippet::MatchPositions* match_positions) const override; | 83 Snippet::MatchPositions* match_positions) const override; |
82 bool HasMatchIn(const QueryWordVector& words) const override; | 84 bool HasMatchIn(const QueryWordVector& words) const override; |
83 void AppendWords(std::vector<base::string16>* words) const override; | 85 void AppendWords(std::vector<base::string16>* words) const override; |
84 | 86 |
85 private: | 87 private: |
86 base::string16 word_; | 88 base::string16 word_; |
87 bool literal_; | 89 bool literal_; |
| 90 const MatchingAlgorithm matching_algorithm_; |
88 | 91 |
89 DISALLOW_COPY_AND_ASSIGN(QueryNodeWord); | 92 DISALLOW_COPY_AND_ASSIGN(QueryNodeWord); |
90 }; | 93 }; |
91 | 94 |
92 QueryNodeWord::QueryNodeWord(const base::string16& word) | 95 QueryNodeWord::QueryNodeWord(const base::string16& word, |
| 96 MatchingAlgorithm matching_algorithm) |
93 : word_(word), | 97 : word_(word), |
94 literal_(false) {} | 98 literal_(false), |
| 99 matching_algorithm_(matching_algorithm) {} |
95 | 100 |
96 QueryNodeWord::~QueryNodeWord() {} | 101 QueryNodeWord::~QueryNodeWord() {} |
97 | 102 |
98 int QueryNodeWord::AppendToSQLiteQuery(base::string16* query) const { | 103 int QueryNodeWord::AppendToSQLiteQuery(base::string16* query) const { |
99 query->append(word_); | 104 query->append(word_); |
100 | 105 |
101 // Use prefix search if we're not literal and long enough. | 106 // Use prefix search if we're not literal and long enough. |
102 if (!literal_ && QueryParser::IsWordLongEnoughForPrefixSearch(word_)) | 107 if (!literal_ && |
| 108 QueryParser::IsWordLongEnoughForPrefixSearch(word_, matching_algorithm_)) |
103 *query += L'*'; | 109 *query += L'*'; |
104 return 1; | 110 return 1; |
105 } | 111 } |
106 | 112 |
107 bool QueryNodeWord::IsWord() const { | 113 bool QueryNodeWord::IsWord() const { |
108 return true; | 114 return true; |
109 } | 115 } |
110 | 116 |
111 bool QueryNodeWord::Matches(const base::string16& word, bool exact) const { | 117 bool QueryNodeWord::Matches(const base::string16& word, bool exact) const { |
112 if (exact || !QueryParser::IsWordLongEnoughForPrefixSearch(word_)) | 118 if (exact || |
| 119 !QueryParser::IsWordLongEnoughForPrefixSearch(word_, matching_algorithm_)) |
113 return word == word_; | 120 return word == word_; |
114 return word.size() >= word_.size() && | 121 return word.size() >= word_.size() && |
115 (word_.compare(0, word_.size(), word, 0, word_.size()) == 0); | 122 (word_.compare(0, word_.size(), word, 0, word_.size()) == 0); |
116 } | 123 } |
117 | 124 |
118 bool QueryNodeWord::HasMatchIn(const QueryWordVector& words, | 125 bool QueryNodeWord::HasMatchIn(const QueryWordVector& words, |
119 Snippet::MatchPositions* match_positions) const { | 126 Snippet::MatchPositions* match_positions) const { |
120 bool matched = false; | 127 bool matched = false; |
121 for (size_t i = 0; i < words.size(); ++i) { | 128 for (size_t i = 0; i < words.size(); ++i) { |
122 if (Matches(words[i].word, false)) { | 129 if (Matches(words[i].word, false)) { |
(...skipping 185 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
308 | 315 |
309 bool QueryNodePhrase::HasMatchIn(const QueryWordVector& words) const { | 316 bool QueryNodePhrase::HasMatchIn(const QueryWordVector& words) const { |
310 const QueryWord* first_word; | 317 const QueryWord* first_word; |
311 const QueryWord* last_word; | 318 const QueryWord* last_word; |
312 return MatchesAll(words, &first_word, &last_word); | 319 return MatchesAll(words, &first_word, &last_word); |
313 } | 320 } |
314 | 321 |
315 QueryParser::QueryParser() {} | 322 QueryParser::QueryParser() {} |
316 | 323 |
317 // static | 324 // static |
318 bool QueryParser::IsWordLongEnoughForPrefixSearch(const base::string16& word) { | 325 bool QueryParser::IsWordLongEnoughForPrefixSearch( |
| 326 const base::string16& word, MatchingAlgorithm matching_algorithm) { |
| 327 if (matching_algorithm == MatchingAlgorithm::ALWAYS_PREFIX_SEARCH) |
| 328 return true; |
| 329 |
319 DCHECK(!word.empty()); | 330 DCHECK(!word.empty()); |
320 size_t minimum_length = 3; | 331 size_t minimum_length = 3; |
321 // We intentionally exclude Hangul Jamos (both Conjoining and compatibility) | 332 // We intentionally exclude Hangul Jamos (both Conjoining and compatibility) |
322 // because they 'behave like' Latin letters. Moreover, we should | 333 // because they 'behave like' Latin letters. Moreover, we should |
323 // normalize the former before reaching here. | 334 // normalize the former before reaching here. |
324 if (0xAC00 <= word[0] && word[0] <= 0xD7A3) | 335 if (0xAC00 <= word[0] && word[0] <= 0xD7A3) |
325 minimum_length = 2; | 336 minimum_length = 2; |
326 return word.size() >= minimum_length; | 337 return word.size() >= minimum_length; |
327 } | 338 } |
328 | 339 |
329 int QueryParser::ParseQuery(const base::string16& query, | 340 int QueryParser::ParseQuery(const base::string16& query, |
| 341 MatchingAlgorithm matching_algorithm, |
330 base::string16* sqlite_query) { | 342 base::string16* sqlite_query) { |
331 QueryNodeList root; | 343 QueryNodeList root; |
332 if (!ParseQueryImpl(query, &root)) | 344 if (!ParseQueryImpl(query, matching_algorithm, &root)) |
333 return 0; | 345 return 0; |
334 return root.AppendToSQLiteQuery(sqlite_query); | 346 return root.AppendToSQLiteQuery(sqlite_query); |
335 } | 347 } |
336 | 348 |
337 void QueryParser::ParseQueryWords(const base::string16& query, | 349 void QueryParser::ParseQueryWords(const base::string16& query, |
| 350 MatchingAlgorithm matching_algorithm, |
338 std::vector<base::string16>* words) { | 351 std::vector<base::string16>* words) { |
339 QueryNodeList root; | 352 QueryNodeList root; |
340 if (!ParseQueryImpl(query, &root)) | 353 if (!ParseQueryImpl(query, matching_algorithm, &root)) |
341 return; | 354 return; |
342 root.AppendWords(words); | 355 root.AppendWords(words); |
343 } | 356 } |
344 | 357 |
345 void QueryParser::ParseQueryNodes(const base::string16& query, | 358 void QueryParser::ParseQueryNodes(const base::string16& query, |
| 359 MatchingAlgorithm matching_algorithm, |
346 QueryNodeStarVector* nodes) { | 360 QueryNodeStarVector* nodes) { |
347 QueryNodeList root; | 361 QueryNodeList root; |
348 if (ParseQueryImpl(base::i18n::ToLower(query), &root)) | 362 if (ParseQueryImpl(base::i18n::ToLower(query), matching_algorithm, &root)) |
349 nodes->swap(*root.children()); | 363 nodes->swap(*root.children()); |
350 } | 364 } |
351 | 365 |
352 bool QueryParser::DoesQueryMatch(const base::string16& text, | 366 bool QueryParser::DoesQueryMatch(const base::string16& text, |
353 const QueryNodeStarVector& query_nodes, | 367 const QueryNodeStarVector& query_nodes, |
354 Snippet::MatchPositions* match_positions) { | 368 Snippet::MatchPositions* match_positions) { |
355 if (query_nodes.empty()) | 369 if (query_nodes.empty()) |
356 return false; | 370 return false; |
357 | 371 |
358 QueryWordVector query_words; | 372 QueryWordVector query_words; |
(...skipping 27 matching lines...) Expand all Loading... |
386 return false; | 400 return false; |
387 | 401 |
388 for (size_t i = 0; i < query_nodes.size(); ++i) { | 402 for (size_t i = 0; i < query_nodes.size(); ++i) { |
389 if (!query_nodes[i]->HasMatchIn(query_words)) | 403 if (!query_nodes[i]->HasMatchIn(query_words)) |
390 return false; | 404 return false; |
391 } | 405 } |
392 return true; | 406 return true; |
393 } | 407 } |
394 | 408 |
395 bool QueryParser::ParseQueryImpl(const base::string16& query, | 409 bool QueryParser::ParseQueryImpl(const base::string16& query, |
| 410 MatchingAlgorithm matching_algorithm, |
396 QueryNodeList* root) { | 411 QueryNodeList* root) { |
397 base::i18n::BreakIterator iter(query, base::i18n::BreakIterator::BREAK_WORD); | 412 base::i18n::BreakIterator iter(query, base::i18n::BreakIterator::BREAK_WORD); |
398 // TODO(evanm): support a locale here | 413 // TODO(evanm): support a locale here |
399 if (!iter.Init()) | 414 if (!iter.Init()) |
400 return false; | 415 return false; |
401 | 416 |
402 // To handle nesting, we maintain a stack of QueryNodeLists. | 417 // To handle nesting, we maintain a stack of QueryNodeLists. |
403 // The last element (back) of the stack contains the current, deepest node. | 418 // The last element (back) of the stack contains the current, deepest node. |
404 std::vector<QueryNodeList*> query_stack; | 419 std::vector<QueryNodeList*> query_stack; |
405 query_stack.push_back(root); | 420 query_stack.push_back(root); |
406 | 421 |
407 bool in_quotes = false; // whether we're currently in a quoted phrase | 422 bool in_quotes = false; // whether we're currently in a quoted phrase |
408 while (iter.Advance()) { | 423 while (iter.Advance()) { |
409 // Just found a span between 'prev' (inclusive) and 'pos' (exclusive). It | 424 // Just found a span between 'prev' (inclusive) and 'pos' (exclusive). It |
410 // is not necessarily a word, but could also be a sequence of punctuation | 425 // is not necessarily a word, but could also be a sequence of punctuation |
411 // or whitespace. | 426 // or whitespace. |
412 if (iter.IsWord()) { | 427 if (iter.IsWord()) { |
413 QueryNodeWord* word_node = new QueryNodeWord(iter.GetString()); | 428 QueryNodeWord* word_node = new QueryNodeWord(iter.GetString(), |
| 429 matching_algorithm); |
414 if (in_quotes) | 430 if (in_quotes) |
415 word_node->set_literal(true); | 431 word_node->set_literal(true); |
416 query_stack.back()->AddChild(word_node); | 432 query_stack.back()->AddChild(word_node); |
417 } else { // Punctuation. | 433 } else { // Punctuation. |
418 if (IsQueryQuote(query[iter.prev()])) { | 434 if (IsQueryQuote(query[iter.prev()])) { |
419 if (!in_quotes) { | 435 if (!in_quotes) { |
420 QueryNodeList* quotes_node = new QueryNodePhrase; | 436 QueryNodeList* quotes_node = new QueryNodePhrase; |
421 query_stack.back()->AddChild(quotes_node); | 437 query_stack.back()->AddChild(quotes_node); |
422 query_stack.push_back(quotes_node); | 438 query_stack.push_back(quotes_node); |
423 in_quotes = true; | 439 in_quotes = true; |
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
459 void QueryParser::SortAndCoalesceMatchPositions( | 475 void QueryParser::SortAndCoalesceMatchPositions( |
460 Snippet::MatchPositions* matches) { | 476 Snippet::MatchPositions* matches) { |
461 std::sort(matches->begin(), matches->end(), &CompareMatchPosition); | 477 std::sort(matches->begin(), matches->end(), &CompareMatchPosition); |
462 // WARNING: we don't use iterator here as CoalesceMatchesFrom may remove | 478 // WARNING: we don't use iterator here as CoalesceMatchesFrom may remove |
463 // from matches. | 479 // from matches. |
464 for (size_t i = 0; i < matches->size(); ++i) | 480 for (size_t i = 0; i < matches->size(); ++i) |
465 CoalesceMatchesFrom(i, matches); | 481 CoalesceMatchesFrom(i, matches); |
466 } | 482 } |
467 | 483 |
468 } // namespace query_parser | 484 } // namespace query_parser |
OLD | NEW |