Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(148)

Side by Side Diff: components/search_engines/template_url.cc

Issue 1088523002: Fixed ExtractSearchTermsFromURL for search engines with encoding != "UTF-8". (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Escaping is used instead of writing non-latin characters in unittests Created 5 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | components/search_engines/template_url_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2014 The Chromium Authors. All rights reserved. 1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/search_engines/template_url.h" 5 #include "components/search_engines/template_url.h"
6 6
7 #include <string> 7 #include <string>
8 #include <vector> 8 #include <vector>
9 9
10 #include "base/basictypes.h" 10 #include "base/basictypes.h"
(...skipping 441 matching lines...) Expand 10 before | Expand all | Expand 10 after
452 const SearchTermsData& search_terms_data) const { 452 const SearchTermsData& search_terms_data) const {
453 ParseIfNecessary(search_terms_data); 453 ParseIfNecessary(search_terms_data);
454 return search_term_key_location_; 454 return search_term_key_location_;
455 } 455 }
456 456
457 base::string16 TemplateURLRef::SearchTermToString16( 457 base::string16 TemplateURLRef::SearchTermToString16(
458 const std::string& term) const { 458 const std::string& term) const {
459 const std::vector<std::string>& encodings = owner_->input_encodings(); 459 const std::vector<std::string>& encodings = owner_->input_encodings();
460 base::string16 result; 460 base::string16 result;
461 461
462 std::string unescaped = net::UnescapeURLComponent( 462 net::UnescapeRule::Type unescape_rules =
463 term, 463 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS;
464 net::UnescapeRule::REPLACE_PLUS_WITH_SPACE | 464 if (search_term_key_location_ != url::Parsed::PATH)
465 net::UnescapeRule::URL_SPECIAL_CHARS); 465 unescape_rules |= net::UnescapeRule::REPLACE_PLUS_WITH_SPACE;
466
467 std::string unescaped = net::UnescapeURLComponent(term, unescape_rules);
466 for (size_t i = 0; i < encodings.size(); ++i) { 468 for (size_t i = 0; i < encodings.size(); ++i) {
467 if (base::CodepageToUTF16(unescaped, encodings[i].c_str(), 469 if (base::CodepageToUTF16(unescaped, encodings[i].c_str(),
468 base::OnStringConversionError::FAIL, &result)) 470 base::OnStringConversionError::FAIL, &result))
469 return result; 471 return result;
470 } 472 }
471 473
472 // Always fall back on UTF-8 if it works. 474 // Always fall back on UTF-8 if it works.
473 if (base::CodepageToUTF16(unescaped, base::kCodepageUTF8, 475 if (base::CodepageToUTF16(unescaped, base::kCodepageUTF8,
474 base::OnStringConversionError::FAIL, &result)) 476 base::OnStringConversionError::FAIL, &result))
475 return result; 477 return result;
476 478
477 // When nothing worked, just use the escaped text. We have no idea what the 479 // When nothing worked, just use the escaped text. We have no idea what the
478 // encoding is. We need to substitute spaces for pluses ourselves since we're 480 // encoding is. We need to substitute spaces for pluses ourselves since we're
479 // not sending it through an unescaper. 481 // not sending it through an unescaper.
480 result = base::UTF8ToUTF16(term); 482 result = base::UTF8ToUTF16(term);
481 std::replace(result.begin(), result.end(), '+', ' '); 483 if (unescape_rules & net::UnescapeRule::REPLACE_PLUS_WITH_SPACE)
484 std::replace(result.begin(), result.end(), '+', ' ');
482 return result; 485 return result;
483 } 486 }
484 487
485 bool TemplateURLRef::HasGoogleBaseURLs( 488 bool TemplateURLRef::HasGoogleBaseURLs(
486 const SearchTermsData& search_terms_data) const { 489 const SearchTermsData& search_terms_data) const {
487 ParseIfNecessary(search_terms_data); 490 ParseIfNecessary(search_terms_data);
488 for (size_t i = 0; i < replacements_.size(); ++i) { 491 for (size_t i = 0; i < replacements_.size(); ++i) {
489 if ((replacements_[i].type == GOOGLE_BASE_URL) || 492 if ((replacements_[i].type == GOOGLE_BASE_URL) ||
490 (replacements_[i].type == GOOGLE_BASE_SUGGEST_URL)) 493 (replacements_[i].type == GOOGLE_BASE_SUGGEST_URL))
491 return true; 494 return true;
(...skipping 25 matching lines...) Expand all
517 // Host, path and port must match. 520 // Host, path and port must match.
518 if ((url.port() != pattern.port()) || 521 if ((url.port() != pattern.port()) ||
519 (url.host() != host_) || 522 (url.host() != host_) ||
520 ((url.path() != path_) && 523 ((url.path() != path_) &&
521 (search_term_key_location_ != url::Parsed::PATH))) { 524 (search_term_key_location_ != url::Parsed::PATH))) {
522 return false; 525 return false;
523 } 526 }
524 527
525 std::string source; 528 std::string source;
526 url::Component position; 529 url::Component position;
527 net::UnescapeRule::Type unescape_rules =
528 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS;
529 530
530 if (search_term_key_location_ == url::Parsed::PATH) { 531 if (search_term_key_location_ == url::Parsed::PATH) {
531 source = url.path(); 532 source = url.path();
532 533
533 // Characters in the path before and after search terms must match. 534 // Characters in the path before and after search terms must match.
534 if (source.length() < path_.length()) 535 if (source.length() < path_.length())
535 return false; 536 return false;
536 position.begin = search_term_position_in_path_; 537 position.begin = search_term_position_in_path_;
537 position.len = source.length() - path_.length(); 538 position.len = source.length() - path_.length();
538 if (source.substr(0, position.begin) + source.substr(position.end()) != 539 if (source.substr(0, position.begin) + source.substr(position.end()) !=
(...skipping 14 matching lines...) Expand all
553 // Fail if search term key is found twice. 554 // Fail if search term key is found twice.
554 if (key_found) 555 if (key_found)
555 return false; 556 return false;
556 key_found = true; 557 key_found = true;
557 position = value; 558 position = value;
558 } 559 }
559 } 560 }
560 } 561 }
561 if (!key_found) 562 if (!key_found)
562 return false; 563 return false;
563 unescape_rules |= net::UnescapeRule::REPLACE_PLUS_WITH_SPACE;
564 } 564 }
565 565
566 // Extract the search term. 566 // Extract the search term.
567 *search_terms = net::UnescapeAndDecodeUTF8URLComponent( 567 *search_terms = SearchTermToString16(
568 source.substr(position.begin, position.len), unescape_rules); 568 source.substr(position.begin, position.len));
569 if (search_terms_component) 569 if (search_terms_component)
570 *search_terms_component = search_term_key_location_; 570 *search_terms_component = search_term_key_location_;
571 if (search_terms_position) 571 if (search_terms_position)
572 *search_terms_position = position; 572 *search_terms_position = position;
573 return true; 573 return true;
574 } 574 }
575 575
576 void TemplateURLRef::InvalidateCachedValues() const { 576 void TemplateURLRef::InvalidateCachedValues() const {
577 supports_replacements_ = valid_ = parsed_ = false; 577 supports_replacements_ = valid_ = parsed_ = false;
578 host_.clear(); 578 host_.clear();
(...skipping 964 matching lines...) Expand 10 before | Expand all | Expand 10 after
1543 // patterns. This means that given patterns 1543 // patterns. This means that given patterns
1544 // [ "http://foo/#q={searchTerms}", "http://foo/?q={searchTerms}" ], 1544 // [ "http://foo/#q={searchTerms}", "http://foo/?q={searchTerms}" ],
1545 // calling ExtractSearchTermsFromURL() on "http://foo/?q=bar#q=' would 1545 // calling ExtractSearchTermsFromURL() on "http://foo/?q=bar#q=' would
1546 // return false. This is important for at least Google, where such URLs 1546 // return false. This is important for at least Google, where such URLs
1547 // are invalid. 1547 // are invalid.
1548 return !search_terms->empty(); 1548 return !search_terms->empty();
1549 } 1549 }
1550 } 1550 }
1551 return false; 1551 return false;
1552 } 1552 }
OLDNEW
« no previous file with comments | « no previous file | components/search_engines/template_url_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698