| OLD | NEW |
| 1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "components/query_parser/query_parser.h" | 5 #include "components/query_parser/query_parser.h" |
| 6 | 6 |
| 7 #include <algorithm> | 7 #include <algorithm> |
| 8 | 8 |
| 9 #include "base/compiler_specific.h" | 9 #include "base/compiler_specific.h" |
| 10 #include "base/i18n/break_iterator.h" | 10 #include "base/i18n/break_iterator.h" |
| (...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 59 } // namespace | 59 } // namespace |
| 60 | 60 |
| 61 // Inheritance structure: | 61 // Inheritance structure: |
| 62 // Queries are represented as trees of QueryNodes. | 62 // Queries are represented as trees of QueryNodes. |
| 63 // QueryNodes are either a collection of subnodes (a QueryNodeList) | 63 // QueryNodes are either a collection of subnodes (a QueryNodeList) |
| 64 // or a single word (a QueryNodeWord). | 64 // or a single word (a QueryNodeWord). |
| 65 | 65 |
| 66 // A QueryNodeWord is a single word in the query. | 66 // A QueryNodeWord is a single word in the query. |
| 67 class QueryNodeWord : public QueryNode { | 67 class QueryNodeWord : public QueryNode { |
| 68 public: | 68 public: |
| 69 explicit QueryNodeWord(const base::string16& word); | 69 explicit QueryNodeWord(const base::string16& word, |
| 70 MatchingAlgorithm matching_algorithm); |
| 70 ~QueryNodeWord() override; | 71 ~QueryNodeWord() override; |
| 71 | 72 |
| 72 const base::string16& word() const { return word_; } | 73 const base::string16& word() const { return word_; } |
| 73 | 74 |
| 75 bool literal() const { return literal_; }; |
| 74 void set_literal(bool literal) { literal_ = literal; } | 76 void set_literal(bool literal) { literal_ = literal; } |
| 75 | 77 |
| 76 // QueryNode: | 78 // QueryNode: |
| 77 int AppendToSQLiteQuery(base::string16* query) const override; | 79 int AppendToSQLiteQuery(base::string16* query) const override; |
| 78 bool IsWord() const override; | 80 bool IsWord() const override; |
| 79 bool Matches(const base::string16& word, bool exact) const override; | 81 bool Matches(const base::string16& word, bool exact) const override; |
| 80 bool HasMatchIn(const QueryWordVector& words, | 82 bool HasMatchIn(const QueryWordVector& words, |
| 81 Snippet::MatchPositions* match_positions) const override; | 83 Snippet::MatchPositions* match_positions) const override; |
| 82 bool HasMatchIn(const QueryWordVector& words) const override; | 84 bool HasMatchIn(const QueryWordVector& words) const override; |
| 83 void AppendWords(std::vector<base::string16>* words) const override; | 85 void AppendWords(std::vector<base::string16>* words) const override; |
| 84 | 86 |
| 85 private: | 87 private: |
| 86 base::string16 word_; | 88 base::string16 word_; |
| 87 bool literal_; | 89 bool literal_; |
| 90 const MatchingAlgorithm matching_algorithm_; |
| 88 | 91 |
| 89 DISALLOW_COPY_AND_ASSIGN(QueryNodeWord); | 92 DISALLOW_COPY_AND_ASSIGN(QueryNodeWord); |
| 90 }; | 93 }; |
| 91 | 94 |
| 92 QueryNodeWord::QueryNodeWord(const base::string16& word) | 95 QueryNodeWord::QueryNodeWord(const base::string16& word, |
| 96 MatchingAlgorithm matching_algorithm) |
| 93 : word_(word), | 97 : word_(word), |
| 94 literal_(false) {} | 98 literal_(false), |
| 99 matching_algorithm_(matching_algorithm) {} |
| 95 | 100 |
| 96 QueryNodeWord::~QueryNodeWord() {} | 101 QueryNodeWord::~QueryNodeWord() {} |
| 97 | 102 |
| 98 int QueryNodeWord::AppendToSQLiteQuery(base::string16* query) const { | 103 int QueryNodeWord::AppendToSQLiteQuery(base::string16* query) const { |
| 99 query->append(word_); | 104 query->append(word_); |
| 100 | 105 |
| 101 // Use prefix search if we're not literal and long enough. | 106 // Use prefix search if we're not literal and long enough. |
| 102 if (!literal_ && QueryParser::IsWordLongEnoughForPrefixSearch(word_)) | 107 if (!literal_ && |
| 108 QueryParser::IsWordLongEnoughForPrefixSearch(word_, matching_algorithm_)) |
| 103 *query += L'*'; | 109 *query += L'*'; |
| 104 return 1; | 110 return 1; |
| 105 } | 111 } |
| 106 | 112 |
| 107 bool QueryNodeWord::IsWord() const { | 113 bool QueryNodeWord::IsWord() const { |
| 108 return true; | 114 return true; |
| 109 } | 115 } |
| 110 | 116 |
| 111 bool QueryNodeWord::Matches(const base::string16& word, bool exact) const { | 117 bool QueryNodeWord::Matches(const base::string16& word, bool exact) const { |
| 112 if (exact || !QueryParser::IsWordLongEnoughForPrefixSearch(word_)) | 118 if (exact || |
| 119 !QueryParser::IsWordLongEnoughForPrefixSearch(word_, matching_algorithm_)) |
| 113 return word == word_; | 120 return word == word_; |
| 114 return word.size() >= word_.size() && | 121 return word.size() >= word_.size() && |
| 115 (word_.compare(0, word_.size(), word, 0, word_.size()) == 0); | 122 (word_.compare(0, word_.size(), word, 0, word_.size()) == 0); |
| 116 } | 123 } |
| 117 | 124 |
| 118 bool QueryNodeWord::HasMatchIn(const QueryWordVector& words, | 125 bool QueryNodeWord::HasMatchIn(const QueryWordVector& words, |
| 119 Snippet::MatchPositions* match_positions) const { | 126 Snippet::MatchPositions* match_positions) const { |
| 120 bool matched = false; | 127 bool matched = false; |
| 121 for (size_t i = 0; i < words.size(); ++i) { | 128 for (size_t i = 0; i < words.size(); ++i) { |
| 122 if (Matches(words[i].word, false)) { | 129 if (Matches(words[i].word, false)) { |
| (...skipping 185 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 308 | 315 |
| 309 bool QueryNodePhrase::HasMatchIn(const QueryWordVector& words) const { | 316 bool QueryNodePhrase::HasMatchIn(const QueryWordVector& words) const { |
| 310 const QueryWord* first_word; | 317 const QueryWord* first_word; |
| 311 const QueryWord* last_word; | 318 const QueryWord* last_word; |
| 312 return MatchesAll(words, &first_word, &last_word); | 319 return MatchesAll(words, &first_word, &last_word); |
| 313 } | 320 } |
| 314 | 321 |
| 315 QueryParser::QueryParser() {} | 322 QueryParser::QueryParser() {} |
| 316 | 323 |
| 317 // static | 324 // static |
| 318 bool QueryParser::IsWordLongEnoughForPrefixSearch(const base::string16& word) { | 325 bool QueryParser::IsWordLongEnoughForPrefixSearch( |
| 326 const base::string16& word, MatchingAlgorithm matching_algorithm) { |
| 327 if (matching_algorithm == MatchingAlgorithm::ALWAYS_PREFIX_SEARCH) |
| 328 return true; |
| 329 |
| 319 DCHECK(!word.empty()); | 330 DCHECK(!word.empty()); |
| 320 size_t minimum_length = 3; | 331 size_t minimum_length = 3; |
| 321 // We intentionally exclude Hangul Jamos (both Conjoining and compatibility) | 332 // We intentionally exclude Hangul Jamos (both Conjoining and compatibility) |
| 322 // because they 'behave like' Latin letters. Moreover, we should | 333 // because they 'behave like' Latin letters. Moreover, we should |
| 323 // normalize the former before reaching here. | 334 // normalize the former before reaching here. |
| 324 if (0xAC00 <= word[0] && word[0] <= 0xD7A3) | 335 if (0xAC00 <= word[0] && word[0] <= 0xD7A3) |
| 325 minimum_length = 2; | 336 minimum_length = 2; |
| 326 return word.size() >= minimum_length; | 337 return word.size() >= minimum_length; |
| 327 } | 338 } |
| 328 | 339 |
| 329 int QueryParser::ParseQuery(const base::string16& query, | 340 int QueryParser::ParseQuery(const base::string16& query, |
| 341 MatchingAlgorithm matching_algorithm, |
| 330 base::string16* sqlite_query) { | 342 base::string16* sqlite_query) { |
| 331 QueryNodeList root; | 343 QueryNodeList root; |
| 332 if (!ParseQueryImpl(query, &root)) | 344 if (!ParseQueryImpl(query, matching_algorithm, &root)) |
| 333 return 0; | 345 return 0; |
| 334 return root.AppendToSQLiteQuery(sqlite_query); | 346 return root.AppendToSQLiteQuery(sqlite_query); |
| 335 } | 347 } |
| 336 | 348 |
| 337 void QueryParser::ParseQueryWords(const base::string16& query, | 349 void QueryParser::ParseQueryWords(const base::string16& query, |
| 350 MatchingAlgorithm matching_algorithm, |
| 338 std::vector<base::string16>* words) { | 351 std::vector<base::string16>* words) { |
| 339 QueryNodeList root; | 352 QueryNodeList root; |
| 340 if (!ParseQueryImpl(query, &root)) | 353 if (!ParseQueryImpl(query, matching_algorithm, &root)) |
| 341 return; | 354 return; |
| 342 root.AppendWords(words); | 355 root.AppendWords(words); |
| 343 } | 356 } |
| 344 | 357 |
| 345 void QueryParser::ParseQueryNodes(const base::string16& query, | 358 void QueryParser::ParseQueryNodes(const base::string16& query, |
| 359 MatchingAlgorithm matching_algorithm, |
| 346 QueryNodeStarVector* nodes) { | 360 QueryNodeStarVector* nodes) { |
| 347 QueryNodeList root; | 361 QueryNodeList root; |
| 348 if (ParseQueryImpl(base::i18n::ToLower(query), &root)) | 362 if (ParseQueryImpl(base::i18n::ToLower(query), matching_algorithm, &root)) |
| 349 nodes->swap(*root.children()); | 363 nodes->swap(*root.children()); |
| 350 } | 364 } |
| 351 | 365 |
| 352 bool QueryParser::DoesQueryMatch(const base::string16& text, | 366 bool QueryParser::DoesQueryMatch(const base::string16& text, |
| 353 const QueryNodeStarVector& query_nodes, | 367 const QueryNodeStarVector& query_nodes, |
| 354 Snippet::MatchPositions* match_positions) { | 368 Snippet::MatchPositions* match_positions) { |
| 355 if (query_nodes.empty()) | 369 if (query_nodes.empty()) |
| 356 return false; | 370 return false; |
| 357 | 371 |
| 358 QueryWordVector query_words; | 372 QueryWordVector query_words; |
| (...skipping 27 matching lines...) Expand all Loading... |
| 386 return false; | 400 return false; |
| 387 | 401 |
| 388 for (size_t i = 0; i < query_nodes.size(); ++i) { | 402 for (size_t i = 0; i < query_nodes.size(); ++i) { |
| 389 if (!query_nodes[i]->HasMatchIn(query_words)) | 403 if (!query_nodes[i]->HasMatchIn(query_words)) |
| 390 return false; | 404 return false; |
| 391 } | 405 } |
| 392 return true; | 406 return true; |
| 393 } | 407 } |
| 394 | 408 |
| 395 bool QueryParser::ParseQueryImpl(const base::string16& query, | 409 bool QueryParser::ParseQueryImpl(const base::string16& query, |
| 410 MatchingAlgorithm matching_algorithm, |
| 396 QueryNodeList* root) { | 411 QueryNodeList* root) { |
| 397 base::i18n::BreakIterator iter(query, base::i18n::BreakIterator::BREAK_WORD); | 412 base::i18n::BreakIterator iter(query, base::i18n::BreakIterator::BREAK_WORD); |
| 398 // TODO(evanm): support a locale here | 413 // TODO(evanm): support a locale here |
| 399 if (!iter.Init()) | 414 if (!iter.Init()) |
| 400 return false; | 415 return false; |
| 401 | 416 |
| 402 // To handle nesting, we maintain a stack of QueryNodeLists. | 417 // To handle nesting, we maintain a stack of QueryNodeLists. |
| 403 // The last element (back) of the stack contains the current, deepest node. | 418 // The last element (back) of the stack contains the current, deepest node. |
| 404 std::vector<QueryNodeList*> query_stack; | 419 std::vector<QueryNodeList*> query_stack; |
| 405 query_stack.push_back(root); | 420 query_stack.push_back(root); |
| 406 | 421 |
| 407 bool in_quotes = false; // whether we're currently in a quoted phrase | 422 bool in_quotes = false; // whether we're currently in a quoted phrase |
| 408 while (iter.Advance()) { | 423 while (iter.Advance()) { |
| 409 // Just found a span between 'prev' (inclusive) and 'pos' (exclusive). It | 424 // Just found a span between 'prev' (inclusive) and 'pos' (exclusive). It |
| 410 // is not necessarily a word, but could also be a sequence of punctuation | 425 // is not necessarily a word, but could also be a sequence of punctuation |
| 411 // or whitespace. | 426 // or whitespace. |
| 412 if (iter.IsWord()) { | 427 if (iter.IsWord()) { |
| 413 QueryNodeWord* word_node = new QueryNodeWord(iter.GetString()); | 428 QueryNodeWord* word_node = new QueryNodeWord(iter.GetString(), |
| 429 matching_algorithm); |
| 414 if (in_quotes) | 430 if (in_quotes) |
| 415 word_node->set_literal(true); | 431 word_node->set_literal(true); |
| 416 query_stack.back()->AddChild(word_node); | 432 query_stack.back()->AddChild(word_node); |
| 417 } else { // Punctuation. | 433 } else { // Punctuation. |
| 418 if (IsQueryQuote(query[iter.prev()])) { | 434 if (IsQueryQuote(query[iter.prev()])) { |
| 419 if (!in_quotes) { | 435 if (!in_quotes) { |
| 420 QueryNodeList* quotes_node = new QueryNodePhrase; | 436 QueryNodeList* quotes_node = new QueryNodePhrase; |
| 421 query_stack.back()->AddChild(quotes_node); | 437 query_stack.back()->AddChild(quotes_node); |
| 422 query_stack.push_back(quotes_node); | 438 query_stack.push_back(quotes_node); |
| 423 in_quotes = true; | 439 in_quotes = true; |
| (...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 459 void QueryParser::SortAndCoalesceMatchPositions( | 475 void QueryParser::SortAndCoalesceMatchPositions( |
| 460 Snippet::MatchPositions* matches) { | 476 Snippet::MatchPositions* matches) { |
| 461 std::sort(matches->begin(), matches->end(), &CompareMatchPosition); | 477 std::sort(matches->begin(), matches->end(), &CompareMatchPosition); |
| 462 // WARNING: we don't use iterator here as CoalesceMatchesFrom may remove | 478 // WARNING: we don't use iterator here as CoalesceMatchesFrom may remove |
| 463 // from matches. | 479 // from matches. |
| 464 for (size_t i = 0; i < matches->size(); ++i) | 480 for (size_t i = 0; i < matches->size(); ++i) |
| 465 CoalesceMatchesFrom(i, matches); | 481 CoalesceMatchesFrom(i, matches); |
| 466 } | 482 } |
| 467 | 483 |
| 468 } // namespace query_parser | 484 } // namespace query_parser |
| OLD | NEW |