| OLD | NEW |
| 1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "components/query_parser/query_parser.h" | 5 #include "components/query_parser/query_parser.h" |
| 6 | 6 |
| 7 #include <algorithm> | 7 #include <algorithm> |
| 8 | 8 |
| 9 #include "base/compiler_specific.h" | 9 #include "base/compiler_specific.h" |
| 10 #include "base/i18n/break_iterator.h" | 10 #include "base/i18n/break_iterator.h" |
| 11 #include "base/i18n/case_conversion.h" | 11 #include "base/i18n/case_conversion.h" |
| 12 #include "base/logging.h" | 12 #include "base/logging.h" |
| 13 #include "base/macros.h" | 13 #include "base/macros.h" |
| 14 #include "base/stl_util.h" | 14 #include "base/memory/ptr_util.h" |
| 15 #include "base/strings/utf_string_conversions.h" | 15 #include "base/strings/utf_string_conversions.h" |
| 16 | 16 |
| 17 namespace query_parser { | 17 namespace query_parser { |
| 18 namespace { | 18 namespace { |
| 19 | 19 |
| 20 // Returns true if |mp1.first| is less than |mp2.first|. This is used to | 20 // Returns true if |mp1.first| is less than |mp2.first|. This is used to |
| 21 // sort match positions. | 21 // sort match positions. |
| 22 int CompareMatchPosition(const Snippet::MatchPosition& mp1, | 22 int CompareMatchPosition(const Snippet::MatchPosition& mp1, |
| 23 const Snippet::MatchPosition& mp2) { | 23 const Snippet::MatchPosition& mp2) { |
| 24 return mp1.first < mp2.first; | 24 return mp1.first < mp2.first; |
| (...skipping 124 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 149 void QueryNodeWord::AppendWords(std::vector<base::string16>* words) const { | 149 void QueryNodeWord::AppendWords(std::vector<base::string16>* words) const { |
| 150 words->push_back(word_); | 150 words->push_back(word_); |
| 151 } | 151 } |
| 152 | 152 |
| 153 // A QueryNodeList has a collection of QueryNodes which are deleted in the end. | 153 // A QueryNodeList has a collection of QueryNodes which are deleted in the end. |
| 154 class QueryNodeList : public QueryNode { | 154 class QueryNodeList : public QueryNode { |
| 155 public: | 155 public: |
| 156 QueryNodeList(); | 156 QueryNodeList(); |
| 157 ~QueryNodeList() override; | 157 ~QueryNodeList() override; |
| 158 | 158 |
| 159 QueryNodeStarVector* children() { return &children_; } | 159 QueryNodeVector* children() { return &children_; } |
| 160 | 160 |
| 161 void AddChild(QueryNode* node); | 161 void AddChild(std::unique_ptr<QueryNode> node); |
| 162 | 162 |
| 163 // Remove empty subnodes left over from other parsing. | 163 // Remove empty subnodes left over from other parsing. |
| 164 void RemoveEmptySubnodes(); | 164 void RemoveEmptySubnodes(); |
| 165 | 165 |
| 166 // QueryNode: | 166 // QueryNode: |
| 167 int AppendToSQLiteQuery(base::string16* query) const override; | 167 int AppendToSQLiteQuery(base::string16* query) const override; |
| 168 bool IsWord() const override; | 168 bool IsWord() const override; |
| 169 bool Matches(const base::string16& word, bool exact) const override; | 169 bool Matches(const base::string16& word, bool exact) const override; |
| 170 bool HasMatchIn(const QueryWordVector& words, | 170 bool HasMatchIn(const QueryWordVector& words, |
| 171 Snippet::MatchPositions* match_positions) const override; | 171 Snippet::MatchPositions* match_positions) const override; |
| 172 bool HasMatchIn(const QueryWordVector& words) const override; | 172 bool HasMatchIn(const QueryWordVector& words) const override; |
| 173 void AppendWords(std::vector<base::string16>* words) const override; | 173 void AppendWords(std::vector<base::string16>* words) const override; |
| 174 | 174 |
| 175 protected: | 175 protected: |
| 176 int AppendChildrenToString(base::string16* query) const; | 176 int AppendChildrenToString(base::string16* query) const; |
| 177 | 177 |
| 178 QueryNodeStarVector children_; | 178 QueryNodeVector children_; |
| 179 | 179 |
| 180 private: | 180 private: |
| 181 DISALLOW_COPY_AND_ASSIGN(QueryNodeList); | 181 DISALLOW_COPY_AND_ASSIGN(QueryNodeList); |
| 182 }; | 182 }; |
| 183 | 183 |
| 184 QueryNodeList::QueryNodeList() {} | 184 QueryNodeList::QueryNodeList() {} |
| 185 | 185 |
| 186 QueryNodeList::~QueryNodeList() { | 186 QueryNodeList::~QueryNodeList() { |
| 187 base::STLDeleteElements(&children_); | |
| 188 } | 187 } |
| 189 | 188 |
| 190 void QueryNodeList::AddChild(QueryNode* node) { | 189 void QueryNodeList::AddChild(std::unique_ptr<QueryNode> node) { |
| 191 children_.push_back(node); | 190 children_.push_back(std::move(node)); |
| 192 } | 191 } |
| 193 | 192 |
| 194 void QueryNodeList::RemoveEmptySubnodes() { | 193 void QueryNodeList::RemoveEmptySubnodes() { |
| 195 for (size_t i = 0; i < children_.size(); ++i) { | 194 for (size_t i = 0; i < children_.size(); ++i) { |
| 196 if (children_[i]->IsWord()) | 195 if (children_[i]->IsWord()) |
| 197 continue; | 196 continue; |
| 198 | 197 |
| 199 QueryNodeList* list_node = static_cast<QueryNodeList*>(children_[i]); | 198 QueryNodeList* list_node = static_cast<QueryNodeList*>(children_[i].get()); |
| 200 list_node->RemoveEmptySubnodes(); | 199 list_node->RemoveEmptySubnodes(); |
| 201 if (list_node->children()->empty()) { | 200 if (list_node->children()->empty()) { |
| 202 children_.erase(children_.begin() + i); | 201 children_.erase(children_.begin() + i); |
| 203 --i; | 202 --i; |
| 204 delete list_node; | |
| 205 } | 203 } |
| 206 } | 204 } |
| 207 } | 205 } |
| 208 | 206 |
| 209 int QueryNodeList::AppendToSQLiteQuery(base::string16* query) const { | 207 int QueryNodeList::AppendToSQLiteQuery(base::string16* query) const { |
| 210 return AppendChildrenToString(query); | 208 return AppendChildrenToString(query); |
| 211 } | 209 } |
| 212 | 210 |
| 213 bool QueryNodeList::IsWord() const { | 211 bool QueryNodeList::IsWord() const { |
| 214 return false; | 212 return false; |
| (...skipping 15 matching lines...) Expand all Loading... |
| 230 return false; | 228 return false; |
| 231 } | 229 } |
| 232 | 230 |
| 233 void QueryNodeList::AppendWords(std::vector<base::string16>* words) const { | 231 void QueryNodeList::AppendWords(std::vector<base::string16>* words) const { |
| 234 for (size_t i = 0; i < children_.size(); ++i) | 232 for (size_t i = 0; i < children_.size(); ++i) |
| 235 children_[i]->AppendWords(words); | 233 children_[i]->AppendWords(words); |
| 236 } | 234 } |
| 237 | 235 |
| 238 int QueryNodeList::AppendChildrenToString(base::string16* query) const { | 236 int QueryNodeList::AppendChildrenToString(base::string16* query) const { |
| 239 int num_words = 0; | 237 int num_words = 0; |
| 240 for (QueryNodeStarVector::const_iterator node = children_.begin(); | 238 for (auto node = children_.begin(); node != children_.end(); ++node) { |
| 241 node != children_.end(); ++node) { | |
| 242 if (node != children_.begin()) | 239 if (node != children_.begin()) |
| 243 query->push_back(L' '); | 240 query->push_back(L' '); |
| 244 num_words += (*node)->AppendToSQLiteQuery(query); | 241 num_words += (*node)->AppendToSQLiteQuery(query); |
| 245 } | 242 } |
| 246 return num_words; | 243 return num_words; |
| 247 } | 244 } |
| 248 | 245 |
| 249 // A QueryNodePhrase is a phrase query ("quoted"). | 246 // A QueryNodePhrase is a phrase query ("quoted"). |
| 250 class QueryNodePhrase : public QueryNodeList { | 247 class QueryNodePhrase : public QueryNodeList { |
| 251 public: | 248 public: |
| (...skipping 99 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 351 MatchingAlgorithm matching_algorithm, | 348 MatchingAlgorithm matching_algorithm, |
| 352 std::vector<base::string16>* words) { | 349 std::vector<base::string16>* words) { |
| 353 QueryNodeList root; | 350 QueryNodeList root; |
| 354 if (!ParseQueryImpl(query, matching_algorithm, &root)) | 351 if (!ParseQueryImpl(query, matching_algorithm, &root)) |
| 355 return; | 352 return; |
| 356 root.AppendWords(words); | 353 root.AppendWords(words); |
| 357 } | 354 } |
| 358 | 355 |
| 359 void QueryParser::ParseQueryNodes(const base::string16& query, | 356 void QueryParser::ParseQueryNodes(const base::string16& query, |
| 360 MatchingAlgorithm matching_algorithm, | 357 MatchingAlgorithm matching_algorithm, |
| 361 QueryNodeStarVector* nodes) { | 358 QueryNodeVector* nodes) { |
| 362 QueryNodeList root; | 359 QueryNodeList root; |
| 363 if (ParseQueryImpl(base::i18n::ToLower(query), matching_algorithm, &root)) | 360 if (ParseQueryImpl(base::i18n::ToLower(query), matching_algorithm, &root)) |
| 364 nodes->swap(*root.children()); | 361 nodes->swap(*root.children()); |
| 365 } | 362 } |
| 366 | 363 |
| 367 bool QueryParser::DoesQueryMatch(const base::string16& text, | 364 bool QueryParser::DoesQueryMatch(const base::string16& text, |
| 368 const QueryNodeStarVector& query_nodes, | 365 const QueryNodeVector& query_nodes, |
| 369 Snippet::MatchPositions* match_positions) { | 366 Snippet::MatchPositions* match_positions) { |
| 370 if (query_nodes.empty()) | 367 if (query_nodes.empty()) |
| 371 return false; | 368 return false; |
| 372 | 369 |
| 373 QueryWordVector query_words; | 370 QueryWordVector query_words; |
| 374 base::string16 lower_text = base::i18n::ToLower(text); | 371 base::string16 lower_text = base::i18n::ToLower(text); |
| 375 ExtractQueryWords(lower_text, &query_words); | 372 ExtractQueryWords(lower_text, &query_words); |
| 376 | 373 |
| 377 if (query_words.empty()) | 374 if (query_words.empty()) |
| 378 return false; | 375 return false; |
| (...skipping 10 matching lines...) Expand all Loading... |
| 389 // completely punt here. | 386 // completely punt here. |
| 390 match_positions->clear(); | 387 match_positions->clear(); |
| 391 } else { | 388 } else { |
| 392 SortAndCoalesceMatchPositions(&matches); | 389 SortAndCoalesceMatchPositions(&matches); |
| 393 match_positions->swap(matches); | 390 match_positions->swap(matches); |
| 394 } | 391 } |
| 395 return true; | 392 return true; |
| 396 } | 393 } |
| 397 | 394 |
| 398 bool QueryParser::DoesQueryMatch(const QueryWordVector& query_words, | 395 bool QueryParser::DoesQueryMatch(const QueryWordVector& query_words, |
| 399 const QueryNodeStarVector& query_nodes) { | 396 const QueryNodeVector& query_nodes) { |
| 400 if (query_nodes.empty() || query_words.empty()) | 397 if (query_nodes.empty() || query_words.empty()) |
| 401 return false; | 398 return false; |
| 402 | 399 |
| 403 for (size_t i = 0; i < query_nodes.size(); ++i) { | 400 for (size_t i = 0; i < query_nodes.size(); ++i) { |
| 404 if (!query_nodes[i]->HasMatchIn(query_words)) | 401 if (!query_nodes[i]->HasMatchIn(query_words)) |
| 405 return false; | 402 return false; |
| 406 } | 403 } |
| 407 return true; | 404 return true; |
| 408 } | 405 } |
| 409 | 406 |
| 410 bool QueryParser::ParseQueryImpl(const base::string16& query, | 407 bool QueryParser::ParseQueryImpl(const base::string16& query, |
| 411 MatchingAlgorithm matching_algorithm, | 408 MatchingAlgorithm matching_algorithm, |
| 412 QueryNodeList* root) { | 409 QueryNodeList* root) { |
| 413 base::i18n::BreakIterator iter(query, base::i18n::BreakIterator::BREAK_WORD); | 410 base::i18n::BreakIterator iter(query, base::i18n::BreakIterator::BREAK_WORD); |
| 414 // TODO(evanm): support a locale here | 411 // TODO(evanm): support a locale here |
| 415 if (!iter.Init()) | 412 if (!iter.Init()) |
| 416 return false; | 413 return false; |
| 417 | 414 |
| 418 // To handle nesting, we maintain a stack of QueryNodeLists. | 415 // To handle nesting, we maintain a stack of QueryNodeLists. |
| 419 // The last element (back) of the stack contains the current, deepest node. | 416 // The last element (back) of the stack contains the current, deepest node. |
| 420 std::vector<QueryNodeList*> query_stack; | 417 std::vector<QueryNodeList*> query_stack; |
| 421 query_stack.push_back(root); | 418 query_stack.push_back(root); |
| 422 | 419 |
| 423 bool in_quotes = false; // whether we're currently in a quoted phrase | 420 bool in_quotes = false; // whether we're currently in a quoted phrase |
| 424 while (iter.Advance()) { | 421 while (iter.Advance()) { |
| 425 // Just found a span between 'prev' (inclusive) and 'pos' (exclusive). It | 422 // Just found a span between 'prev' (inclusive) and 'pos' (exclusive). It |
| 426 // is not necessarily a word, but could also be a sequence of punctuation | 423 // is not necessarily a word, but could also be a sequence of punctuation |
| 427 // or whitespace. | 424 // or whitespace. |
| 428 if (iter.IsWord()) { | 425 if (iter.IsWord()) { |
| 429 QueryNodeWord* word_node = new QueryNodeWord(iter.GetString(), | 426 std::unique_ptr<QueryNodeWord> word_node = |
| 430 matching_algorithm); | 427 base::MakeUnique<QueryNodeWord>(iter.GetString(), matching_algorithm); |
| 431 if (in_quotes) | 428 if (in_quotes) |
| 432 word_node->set_literal(true); | 429 word_node->set_literal(true); |
| 433 query_stack.back()->AddChild(word_node); | 430 query_stack.back()->AddChild(std::move(word_node)); |
| 434 } else { // Punctuation. | 431 } else { // Punctuation. |
| 435 if (IsQueryQuote(query[iter.prev()])) { | 432 if (IsQueryQuote(query[iter.prev()])) { |
| 436 if (!in_quotes) { | 433 if (!in_quotes) { |
| 437 QueryNodeList* quotes_node = new QueryNodePhrase; | 434 std::unique_ptr<QueryNodeList> quotes_node = |
| 438 query_stack.back()->AddChild(quotes_node); | 435 base::MakeUnique<QueryNodePhrase>(); |
| 439 query_stack.push_back(quotes_node); | 436 QueryNodeList* quotes_node_ptr = quotes_node.get(); |
| 437 query_stack.back()->AddChild(std::move(quotes_node)); |
| 438 query_stack.push_back(quotes_node_ptr); |
| 440 in_quotes = true; | 439 in_quotes = true; |
| 441 } else { | 440 } else { |
| 442 query_stack.pop_back(); // Stop adding to the quoted phrase. | 441 query_stack.pop_back(); // Stop adding to the quoted phrase. |
| 443 in_quotes = false; | 442 in_quotes = false; |
| 444 } | 443 } |
| 445 } | 444 } |
| 446 } | 445 } |
| 447 } | 446 } |
| 448 | 447 |
| 449 root->RemoveEmptySubnodes(); | 448 root->RemoveEmptySubnodes(); |
| (...skipping 26 matching lines...) Expand all Loading... |
| 476 void QueryParser::SortAndCoalesceMatchPositions( | 475 void QueryParser::SortAndCoalesceMatchPositions( |
| 477 Snippet::MatchPositions* matches) { | 476 Snippet::MatchPositions* matches) { |
| 478 std::sort(matches->begin(), matches->end(), &CompareMatchPosition); | 477 std::sort(matches->begin(), matches->end(), &CompareMatchPosition); |
| 479 // WARNING: we don't use iterator here as CoalesceMatchesFrom may remove | 478 // WARNING: we don't use iterator here as CoalesceMatchesFrom may remove |
| 480 // from matches. | 479 // from matches. |
| 481 for (size_t i = 0; i < matches->size(); ++i) | 480 for (size_t i = 0; i < matches->size(); ++i) |
| 482 CoalesceMatchesFrom(i, matches); | 481 CoalesceMatchesFrom(i, matches); |
| 483 } | 482 } |
| 484 | 483 |
| 485 } // namespace query_parser | 484 } // namespace query_parser |
| OLD | NEW |