OLD | NEW |
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/browser/history/query_parser.h" | 5 #include "chrome/browser/history/query_parser.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 | 8 |
9 #include "app/l10n_util.h" | 9 #include "app/l10n_util.h" |
10 #include "base/i18n/word_iterator.h" | 10 #include "base/i18n/break_iterator.h" |
11 #include "base/logging.h" | 11 #include "base/logging.h" |
12 #include "base/scoped_vector.h" | 12 #include "base/scoped_vector.h" |
13 #include "base/string_util.h" | 13 #include "base/string_util.h" |
14 #include "base/utf_string_conversions.h" | 14 #include "base/utf_string_conversions.h" |
15 #include "unicode/uscript.h" | 15 #include "unicode/uscript.h" |
16 | 16 |
17 namespace { | 17 namespace { |
18 | 18 |
19 // Returns true if |mp1.first| is less than |mp2.first|. This is used to | 19 // Returns true if |mp1.first| is less than |mp2.first|. This is used to |
20 // sort match positions. | 20 // sort match positions. |
(...skipping 294 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
315 match_positions->clear(); | 315 match_positions->clear(); |
316 } else { | 316 } else { |
317 CoalseAndSortMatchPositions(&matches); | 317 CoalseAndSortMatchPositions(&matches); |
318 match_positions->swap(matches); | 318 match_positions->swap(matches); |
319 } | 319 } |
320 return true; | 320 return true; |
321 } | 321 } |
322 | 322 |
323 bool QueryParser::ParseQueryImpl(const string16& query, | 323 bool QueryParser::ParseQueryImpl(const string16& query, |
324 QueryNodeList* root) { | 324 QueryNodeList* root) { |
325 WordIterator iter(&query, WordIterator::BREAK_WORD); | 325 base::BreakIterator iter(&query, base::BreakIterator::BREAK_WORD); |
326 // TODO(evanm): support a locale here | 326 // TODO(evanm): support a locale here |
327 if (!iter.Init()) | 327 if (!iter.Init()) |
328 return false; | 328 return false; |
329 | 329 |
330 // To handle nesting, we maintain a stack of QueryNodeLists. | 330 // To handle nesting, we maintain a stack of QueryNodeLists. |
331 // The last element (back) of the stack contains the current, deepest node. | 331 // The last element (back) of the stack contains the current, deepest node. |
332 std::vector<QueryNodeList*> query_stack; | 332 std::vector<QueryNodeList*> query_stack; |
333 query_stack.push_back(root); | 333 query_stack.push_back(root); |
334 | 334 |
335 bool in_quotes = false; // whether we're currently in a quoted phrase | 335 bool in_quotes = false; // whether we're currently in a quoted phrase |
336 while (iter.Advance()) { | 336 while (iter.Advance()) { |
337 // Just found a span between 'prev' (inclusive) and 'pos' (exclusive). It | 337 // Just found a span between 'prev' (inclusive) and 'pos' (exclusive). It |
338 // is not necessarily a word, but could also be a sequence of punctuation | 338 // is not necessarily a word, but could also be a sequence of punctuation |
339 // or whitespace. | 339 // or whitespace. |
340 if (iter.IsWord()) { | 340 if (iter.IsWord()) { |
341 string16 word = iter.GetWord(); | 341 string16 word = iter.GetString(); |
342 | 342 |
343 QueryNodeWord* word_node = new QueryNodeWord(word); | 343 QueryNodeWord* word_node = new QueryNodeWord(word); |
344 if (in_quotes) | 344 if (in_quotes) |
345 word_node->set_literal(true); | 345 word_node->set_literal(true); |
346 query_stack.back()->AddChild(word_node); | 346 query_stack.back()->AddChild(word_node); |
347 } else { // Punctuation. | 347 } else { // Punctuation. |
348 if (IsQueryQuote(query[iter.prev()])) { | 348 if (IsQueryQuote(query[iter.prev()])) { |
349 if (!in_quotes) { | 349 if (!in_quotes) { |
350 QueryNodeList* quotes_node = new QueryNodePhrase; | 350 QueryNodeList* quotes_node = new QueryNodePhrase; |
351 query_stack.back()->AddChild(quotes_node); | 351 query_stack.back()->AddChild(quotes_node); |
352 query_stack.push_back(quotes_node); | 352 query_stack.push_back(quotes_node); |
353 in_quotes = true; | 353 in_quotes = true; |
354 } else { | 354 } else { |
355 query_stack.pop_back(); // Stop adding to the quoted phrase. | 355 query_stack.pop_back(); // Stop adding to the quoted phrase. |
356 in_quotes = false; | 356 in_quotes = false; |
357 } | 357 } |
358 } | 358 } |
359 } | 359 } |
360 } | 360 } |
361 | 361 |
362 root->RemoveEmptySubnodes(); | 362 root->RemoveEmptySubnodes(); |
363 return true; | 363 return true; |
364 } | 364 } |
365 | 365 |
366 void QueryParser::ExtractQueryWords(const string16& text, | 366 void QueryParser::ExtractQueryWords(const string16& text, |
367 std::vector<QueryWord>* words) { | 367 std::vector<QueryWord>* words) { |
368 WordIterator iter(&text, WordIterator::BREAK_WORD); | 368 base::BreakIterator iter(&text, base::BreakIterator::BREAK_WORD); |
369 // TODO(evanm): support a locale here | 369 // TODO(evanm): support a locale here |
370 if (!iter.Init()) | 370 if (!iter.Init()) |
371 return; | 371 return; |
372 | 372 |
373 while (iter.Advance()) { | 373 while (iter.Advance()) { |
374 // Just found a span between 'prev' (inclusive) and 'pos' (exclusive). It | 374 // Just found a span between 'prev' (inclusive) and 'pos' (exclusive). It |
375 // is not necessarily a word, but could also be a sequence of punctuation | 375 // is not necessarily a word, but could also be a sequence of punctuation |
376 // or whitespace. | 376 // or whitespace. |
377 if (iter.IsWord()) { | 377 if (iter.IsWord()) { |
378 string16 word = iter.GetWord(); | 378 string16 word = iter.GetString(); |
379 if (!word.empty()) { | 379 if (!word.empty()) { |
380 words->push_back(QueryWord()); | 380 words->push_back(QueryWord()); |
381 words->back().word = word; | 381 words->back().word = word; |
382 words->back().position = iter.prev(); | 382 words->back().position = iter.prev(); |
383 } | 383 } |
384 } | 384 } |
385 } | 385 } |
386 } | 386 } |
OLD | NEW |