OLD | NEW |
1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "components/query_parser/query_parser.h" | 5 #include "components/query_parser/query_parser.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 | 8 |
9 #include "base/compiler_specific.h" | 9 #include "base/compiler_specific.h" |
10 #include "base/i18n/break_iterator.h" | 10 #include "base/i18n/break_iterator.h" |
11 #include "base/i18n/case_conversion.h" | 11 #include "base/i18n/case_conversion.h" |
12 #include "base/logging.h" | 12 #include "base/logging.h" |
13 #include "base/macros.h" | 13 #include "base/macros.h" |
14 #include "base/stl_util.h" | 14 #include "base/memory/ptr_util.h" |
15 #include "base/strings/utf_string_conversions.h" | 15 #include "base/strings/utf_string_conversions.h" |
16 | 16 |
17 namespace query_parser { | 17 namespace query_parser { |
18 namespace { | 18 namespace { |
19 | 19 |
20 // Returns true if |mp1.first| is less than |mp2.first|. This is used to | 20 // Returns true if |mp1.first| is less than |mp2.first|. This is used to |
21 // sort match positions. | 21 // sort match positions. |
22 int CompareMatchPosition(const Snippet::MatchPosition& mp1, | 22 int CompareMatchPosition(const Snippet::MatchPosition& mp1, |
23 const Snippet::MatchPosition& mp2) { | 23 const Snippet::MatchPosition& mp2) { |
24 return mp1.first < mp2.first; | 24 return mp1.first < mp2.first; |
(...skipping 124 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
149 void QueryNodeWord::AppendWords(std::vector<base::string16>* words) const { | 149 void QueryNodeWord::AppendWords(std::vector<base::string16>* words) const { |
150 words->push_back(word_); | 150 words->push_back(word_); |
151 } | 151 } |
152 | 152 |
153 // A QueryNodeList has a collection of QueryNodes which are deleted in the end. | 153 // A QueryNodeList has a collection of QueryNodes which are deleted in the end. |
154 class QueryNodeList : public QueryNode { | 154 class QueryNodeList : public QueryNode { |
155 public: | 155 public: |
156 QueryNodeList(); | 156 QueryNodeList(); |
157 ~QueryNodeList() override; | 157 ~QueryNodeList() override; |
158 | 158 |
159 QueryNodeStarVector* children() { return &children_; } | 159 QueryNodeVector* children() { return &children_; } |
160 | 160 |
161 void AddChild(QueryNode* node); | 161 void AddChild(std::unique_ptr<QueryNode> node); |
162 | 162 |
163 // Remove empty subnodes left over from other parsing. | 163 // Remove empty subnodes left over from other parsing. |
164 void RemoveEmptySubnodes(); | 164 void RemoveEmptySubnodes(); |
165 | 165 |
166 // QueryNode: | 166 // QueryNode: |
167 int AppendToSQLiteQuery(base::string16* query) const override; | 167 int AppendToSQLiteQuery(base::string16* query) const override; |
168 bool IsWord() const override; | 168 bool IsWord() const override; |
169 bool Matches(const base::string16& word, bool exact) const override; | 169 bool Matches(const base::string16& word, bool exact) const override; |
170 bool HasMatchIn(const QueryWordVector& words, | 170 bool HasMatchIn(const QueryWordVector& words, |
171 Snippet::MatchPositions* match_positions) const override; | 171 Snippet::MatchPositions* match_positions) const override; |
172 bool HasMatchIn(const QueryWordVector& words) const override; | 172 bool HasMatchIn(const QueryWordVector& words) const override; |
173 void AppendWords(std::vector<base::string16>* words) const override; | 173 void AppendWords(std::vector<base::string16>* words) const override; |
174 | 174 |
175 protected: | 175 protected: |
176 int AppendChildrenToString(base::string16* query) const; | 176 int AppendChildrenToString(base::string16* query) const; |
177 | 177 |
178 QueryNodeStarVector children_; | 178 QueryNodeVector children_; |
179 | 179 |
180 private: | 180 private: |
181 DISALLOW_COPY_AND_ASSIGN(QueryNodeList); | 181 DISALLOW_COPY_AND_ASSIGN(QueryNodeList); |
182 }; | 182 }; |
183 | 183 |
184 QueryNodeList::QueryNodeList() {} | 184 QueryNodeList::QueryNodeList() {} |
185 | 185 |
186 QueryNodeList::~QueryNodeList() { | 186 QueryNodeList::~QueryNodeList() { |
187 base::STLDeleteElements(&children_); | |
188 } | 187 } |
189 | 188 |
190 void QueryNodeList::AddChild(QueryNode* node) { | 189 void QueryNodeList::AddChild(std::unique_ptr<QueryNode> node) { |
191 children_.push_back(node); | 190 children_.push_back(std::move(node)); |
192 } | 191 } |
193 | 192 |
194 void QueryNodeList::RemoveEmptySubnodes() { | 193 void QueryNodeList::RemoveEmptySubnodes() { |
195 for (size_t i = 0; i < children_.size(); ++i) { | 194 for (size_t i = 0; i < children_.size(); ++i) { |
196 if (children_[i]->IsWord()) | 195 if (children_[i]->IsWord()) |
197 continue; | 196 continue; |
198 | 197 |
199 QueryNodeList* list_node = static_cast<QueryNodeList*>(children_[i]); | 198 QueryNodeList* list_node = static_cast<QueryNodeList*>(children_[i].get()); |
200 list_node->RemoveEmptySubnodes(); | 199 list_node->RemoveEmptySubnodes(); |
201 if (list_node->children()->empty()) { | 200 if (list_node->children()->empty()) { |
202 children_.erase(children_.begin() + i); | 201 children_.erase(children_.begin() + i); |
203 --i; | 202 --i; |
204 delete list_node; | |
205 } | 203 } |
206 } | 204 } |
207 } | 205 } |
208 | 206 |
209 int QueryNodeList::AppendToSQLiteQuery(base::string16* query) const { | 207 int QueryNodeList::AppendToSQLiteQuery(base::string16* query) const { |
210 return AppendChildrenToString(query); | 208 return AppendChildrenToString(query); |
211 } | 209 } |
212 | 210 |
213 bool QueryNodeList::IsWord() const { | 211 bool QueryNodeList::IsWord() const { |
214 return false; | 212 return false; |
(...skipping 15 matching lines...) Expand all Loading... |
230 return false; | 228 return false; |
231 } | 229 } |
232 | 230 |
233 void QueryNodeList::AppendWords(std::vector<base::string16>* words) const { | 231 void QueryNodeList::AppendWords(std::vector<base::string16>* words) const { |
234 for (size_t i = 0; i < children_.size(); ++i) | 232 for (size_t i = 0; i < children_.size(); ++i) |
235 children_[i]->AppendWords(words); | 233 children_[i]->AppendWords(words); |
236 } | 234 } |
237 | 235 |
238 int QueryNodeList::AppendChildrenToString(base::string16* query) const { | 236 int QueryNodeList::AppendChildrenToString(base::string16* query) const { |
239 int num_words = 0; | 237 int num_words = 0; |
240 for (QueryNodeStarVector::const_iterator node = children_.begin(); | 238 for (auto node = children_.begin(); node != children_.end(); ++node) { |
241 node != children_.end(); ++node) { | |
242 if (node != children_.begin()) | 239 if (node != children_.begin()) |
243 query->push_back(L' '); | 240 query->push_back(L' '); |
244 num_words += (*node)->AppendToSQLiteQuery(query); | 241 num_words += (*node)->AppendToSQLiteQuery(query); |
245 } | 242 } |
246 return num_words; | 243 return num_words; |
247 } | 244 } |
248 | 245 |
249 // A QueryNodePhrase is a phrase query ("quoted"). | 246 // A QueryNodePhrase is a phrase query ("quoted"). |
250 class QueryNodePhrase : public QueryNodeList { | 247 class QueryNodePhrase : public QueryNodeList { |
251 public: | 248 public: |
(...skipping 99 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
351 MatchingAlgorithm matching_algorithm, | 348 MatchingAlgorithm matching_algorithm, |
352 std::vector<base::string16>* words) { | 349 std::vector<base::string16>* words) { |
353 QueryNodeList root; | 350 QueryNodeList root; |
354 if (!ParseQueryImpl(query, matching_algorithm, &root)) | 351 if (!ParseQueryImpl(query, matching_algorithm, &root)) |
355 return; | 352 return; |
356 root.AppendWords(words); | 353 root.AppendWords(words); |
357 } | 354 } |
358 | 355 |
359 void QueryParser::ParseQueryNodes(const base::string16& query, | 356 void QueryParser::ParseQueryNodes(const base::string16& query, |
360 MatchingAlgorithm matching_algorithm, | 357 MatchingAlgorithm matching_algorithm, |
361 QueryNodeStarVector* nodes) { | 358 QueryNodeVector* nodes) { |
362 QueryNodeList root; | 359 QueryNodeList root; |
363 if (ParseQueryImpl(base::i18n::ToLower(query), matching_algorithm, &root)) | 360 if (ParseQueryImpl(base::i18n::ToLower(query), matching_algorithm, &root)) |
364 nodes->swap(*root.children()); | 361 nodes->swap(*root.children()); |
365 } | 362 } |
366 | 363 |
367 bool QueryParser::DoesQueryMatch(const base::string16& text, | 364 bool QueryParser::DoesQueryMatch(const base::string16& text, |
368 const QueryNodeStarVector& query_nodes, | 365 const QueryNodeVector& query_nodes, |
369 Snippet::MatchPositions* match_positions) { | 366 Snippet::MatchPositions* match_positions) { |
370 if (query_nodes.empty()) | 367 if (query_nodes.empty()) |
371 return false; | 368 return false; |
372 | 369 |
373 QueryWordVector query_words; | 370 QueryWordVector query_words; |
374 base::string16 lower_text = base::i18n::ToLower(text); | 371 base::string16 lower_text = base::i18n::ToLower(text); |
375 ExtractQueryWords(lower_text, &query_words); | 372 ExtractQueryWords(lower_text, &query_words); |
376 | 373 |
377 if (query_words.empty()) | 374 if (query_words.empty()) |
378 return false; | 375 return false; |
(...skipping 10 matching lines...) Expand all Loading... |
389 // completely punt here. | 386 // completely punt here. |
390 match_positions->clear(); | 387 match_positions->clear(); |
391 } else { | 388 } else { |
392 SortAndCoalesceMatchPositions(&matches); | 389 SortAndCoalesceMatchPositions(&matches); |
393 match_positions->swap(matches); | 390 match_positions->swap(matches); |
394 } | 391 } |
395 return true; | 392 return true; |
396 } | 393 } |
397 | 394 |
398 bool QueryParser::DoesQueryMatch(const QueryWordVector& query_words, | 395 bool QueryParser::DoesQueryMatch(const QueryWordVector& query_words, |
399 const QueryNodeStarVector& query_nodes) { | 396 const QueryNodeVector& query_nodes) { |
400 if (query_nodes.empty() || query_words.empty()) | 397 if (query_nodes.empty() || query_words.empty()) |
401 return false; | 398 return false; |
402 | 399 |
403 for (size_t i = 0; i < query_nodes.size(); ++i) { | 400 for (size_t i = 0; i < query_nodes.size(); ++i) { |
404 if (!query_nodes[i]->HasMatchIn(query_words)) | 401 if (!query_nodes[i]->HasMatchIn(query_words)) |
405 return false; | 402 return false; |
406 } | 403 } |
407 return true; | 404 return true; |
408 } | 405 } |
409 | 406 |
410 bool QueryParser::ParseQueryImpl(const base::string16& query, | 407 bool QueryParser::ParseQueryImpl(const base::string16& query, |
411 MatchingAlgorithm matching_algorithm, | 408 MatchingAlgorithm matching_algorithm, |
412 QueryNodeList* root) { | 409 QueryNodeList* root) { |
413 base::i18n::BreakIterator iter(query, base::i18n::BreakIterator::BREAK_WORD); | 410 base::i18n::BreakIterator iter(query, base::i18n::BreakIterator::BREAK_WORD); |
414 // TODO(evanm): support a locale here | 411 // TODO(evanm): support a locale here |
415 if (!iter.Init()) | 412 if (!iter.Init()) |
416 return false; | 413 return false; |
417 | 414 |
418 // To handle nesting, we maintain a stack of QueryNodeLists. | 415 // To handle nesting, we maintain a stack of QueryNodeLists. |
419 // The last element (back) of the stack contains the current, deepest node. | 416 // The last element (back) of the stack contains the current, deepest node. |
420 std::vector<QueryNodeList*> query_stack; | 417 std::vector<QueryNodeList*> query_stack; |
421 query_stack.push_back(root); | 418 query_stack.push_back(root); |
422 | 419 |
423 bool in_quotes = false; // whether we're currently in a quoted phrase | 420 bool in_quotes = false; // whether we're currently in a quoted phrase |
424 while (iter.Advance()) { | 421 while (iter.Advance()) { |
425 // Just found a span between 'prev' (inclusive) and 'pos' (exclusive). It | 422 // Just found a span between 'prev' (inclusive) and 'pos' (exclusive). It |
426 // is not necessarily a word, but could also be a sequence of punctuation | 423 // is not necessarily a word, but could also be a sequence of punctuation |
427 // or whitespace. | 424 // or whitespace. |
428 if (iter.IsWord()) { | 425 if (iter.IsWord()) { |
429 QueryNodeWord* word_node = new QueryNodeWord(iter.GetString(), | 426 std::unique_ptr<QueryNodeWord> word_node = |
430 matching_algorithm); | 427 base::MakeUnique<QueryNodeWord>(iter.GetString(), matching_algorithm); |
431 if (in_quotes) | 428 if (in_quotes) |
432 word_node->set_literal(true); | 429 word_node->set_literal(true); |
433 query_stack.back()->AddChild(word_node); | 430 query_stack.back()->AddChild(std::move(word_node)); |
434 } else { // Punctuation. | 431 } else { // Punctuation. |
435 if (IsQueryQuote(query[iter.prev()])) { | 432 if (IsQueryQuote(query[iter.prev()])) { |
436 if (!in_quotes) { | 433 if (!in_quotes) { |
437 QueryNodeList* quotes_node = new QueryNodePhrase; | 434 std::unique_ptr<QueryNodeList> quotes_node = |
438 query_stack.back()->AddChild(quotes_node); | 435 base::MakeUnique<QueryNodePhrase>(); |
439 query_stack.push_back(quotes_node); | 436 QueryNodeList* quotes_node_ptr = quotes_node.get(); |
| 437 query_stack.back()->AddChild(std::move(quotes_node)); |
| 438 query_stack.push_back(quotes_node_ptr); |
440 in_quotes = true; | 439 in_quotes = true; |
441 } else { | 440 } else { |
442 query_stack.pop_back(); // Stop adding to the quoted phrase. | 441 query_stack.pop_back(); // Stop adding to the quoted phrase. |
443 in_quotes = false; | 442 in_quotes = false; |
444 } | 443 } |
445 } | 444 } |
446 } | 445 } |
447 } | 446 } |
448 | 447 |
449 root->RemoveEmptySubnodes(); | 448 root->RemoveEmptySubnodes(); |
(...skipping 26 matching lines...) Expand all Loading... |
476 void QueryParser::SortAndCoalesceMatchPositions( | 475 void QueryParser::SortAndCoalesceMatchPositions( |
477 Snippet::MatchPositions* matches) { | 476 Snippet::MatchPositions* matches) { |
478 std::sort(matches->begin(), matches->end(), &CompareMatchPosition); | 477 std::sort(matches->begin(), matches->end(), &CompareMatchPosition); |
479 // WARNING: we don't use iterator here as CoalesceMatchesFrom may remove | 478 // WARNING: we don't use iterator here as CoalesceMatchesFrom may remove |
480 // from matches. | 479 // from matches. |
481 for (size_t i = 0; i < matches->size(); ++i) | 480 for (size_t i = 0; i < matches->size(); ++i) |
482 CoalesceMatchesFrom(i, matches); | 481 CoalesceMatchesFrom(i, matches); |
483 } | 482 } |
484 | 483 |
485 } // namespace query_parser | 484 } // namespace query_parser |
OLD | NEW |