OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/browser/search_engines/template_url.h" | 5 #include "chrome/browser/search_engines/template_url.h" |
6 | 6 |
7 #include "base/format_macros.h" | 7 #include "base/format_macros.h" |
8 #include "base/guid.h" | 8 #include "base/guid.h" |
9 #include "base/i18n/case_conversion.h" | 9 #include "base/i18n/case_conversion.h" |
10 #include "base/i18n/icu_string_conversions.h" | 10 #include "base/i18n/icu_string_conversions.h" |
(...skipping 220 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
231 for (Replacements::iterator i = replacements_.begin(); | 231 for (Replacements::iterator i = replacements_.begin(); |
232 i != replacements_.end(); ++i) { | 232 i != replacements_.end(); ++i) { |
233 if (i->type == SEARCH_TERMS) { | 233 if (i->type == SEARCH_TERMS) { |
234 string16::size_type query_start = parsed_url_.find('?'); | 234 string16::size_type query_start = parsed_url_.find('?'); |
235 is_in_query = query_start != string16::npos && | 235 is_in_query = query_start != string16::npos && |
236 (static_cast<string16::size_type>(i->index) > query_start); | 236 (static_cast<string16::size_type>(i->index) > query_start); |
237 break; | 237 break; |
238 } | 238 } |
239 } | 239 } |
240 | 240 |
| 241 std::string input_encoding; |
241 string16 encoded_terms; | 242 string16 encoded_terms; |
242 string16 encoded_original_query; | 243 string16 encoded_original_query; |
243 std::string input_encoding; | 244 owner_->EncodeSearchTerms(search_terms_args, is_in_query, &input_encoding, |
244 // Encode the search terms so that we know the encoding. | 245 &encoded_terms, &encoded_original_query); |
245 for (std::vector<std::string>::const_iterator i( | |
246 owner_->input_encodings().begin()); | |
247 i != owner_->input_encodings().end(); ++i) { | |
248 if (TryEncoding(search_terms_args.search_terms, | |
249 search_terms_args.original_query, i->c_str(), | |
250 is_in_query, &encoded_terms, &encoded_original_query)) { | |
251 input_encoding = *i; | |
252 break; | |
253 } | |
254 } | |
255 if (input_encoding.empty()) { | |
256 input_encoding = "UTF-8"; | |
257 if (!TryEncoding(search_terms_args.search_terms, | |
258 search_terms_args.original_query, | |
259 input_encoding.c_str(), is_in_query, &encoded_terms, | |
260 &encoded_original_query)) | |
261 NOTREACHED(); | |
262 } | |
263 | 246 |
264 std::string url = parsed_url_; | 247 std::string url = parsed_url_; |
265 | 248 |
266 // replacements_ is ordered in ascending order, as such we need to iterate | 249 // replacements_ is ordered in ascending order, as such we need to iterate |
267 // from the back. | 250 // from the back. |
268 for (Replacements::reverse_iterator i = replacements_.rbegin(); | 251 for (Replacements::reverse_iterator i = replacements_.rbegin(); |
269 i != replacements_.rend(); ++i) { | 252 i != replacements_.rend(); ++i) { |
270 switch (i->type) { | 253 switch (i->type) { |
271 case ENCODING: | 254 case ENCODING: |
272 url.insert(i->index, input_encoding); | 255 url.insert(i->index, input_encoding); |
(...skipping 197 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
470 if ((replacements_[i].type == GOOGLE_BASE_URL) || | 453 if ((replacements_[i].type == GOOGLE_BASE_URL) || |
471 (replacements_[i].type == GOOGLE_BASE_SUGGEST_URL)) | 454 (replacements_[i].type == GOOGLE_BASE_SUGGEST_URL)) |
472 return true; | 455 return true; |
473 } | 456 } |
474 return false; | 457 return false; |
475 } | 458 } |
476 | 459 |
477 bool TemplateURLRef::ExtractSearchTermsFromURL( | 460 bool TemplateURLRef::ExtractSearchTermsFromURL( |
478 const GURL& url, | 461 const GURL& url, |
479 string16* search_terms, | 462 string16* search_terms, |
480 const SearchTermsData& search_terms_data) const { | 463 const SearchTermsData& search_terms_data, |
| 464 url_parse::Parsed::ComponentType* search_terms_component, |
| 465 url_parse::Component* search_terms_position) const { |
481 DCHECK(search_terms); | 466 DCHECK(search_terms); |
482 search_terms->clear(); | 467 search_terms->clear(); |
483 | 468 |
484 ParseIfNecessaryUsingTermsData(search_terms_data); | 469 ParseIfNecessaryUsingTermsData(search_terms_data); |
485 | 470 |
486 // We need a search term in the template URL to extract something. | 471 // We need a search term in the template URL to extract something. |
487 if (search_term_key_.empty()) | 472 if (search_term_key_.empty()) |
488 return false; | 473 return false; |
489 | 474 |
490 // TODO(beaudoin): Support patterns of the form http://foo/{searchTerms}/ | 475 // TODO(beaudoin): Support patterns of the form http://foo/{searchTerms}/ |
(...skipping 21 matching lines...) Expand all Loading... |
512 &value)) { | 497 &value)) { |
513 if (key.is_nonempty()) { | 498 if (key.is_nonempty()) { |
514 if (params.substr(key.begin, key.len) == search_term_key_) { | 499 if (params.substr(key.begin, key.len) == search_term_key_) { |
515 // Extract the search term. | 500 // Extract the search term. |
516 *search_terms = net::UnescapeAndDecodeUTF8URLComponent( | 501 *search_terms = net::UnescapeAndDecodeUTF8URLComponent( |
517 params.substr(value.begin, value.len), | 502 params.substr(value.begin, value.len), |
518 net::UnescapeRule::SPACES | | 503 net::UnescapeRule::SPACES | |
519 net::UnescapeRule::URL_SPECIAL_CHARS | | 504 net::UnescapeRule::URL_SPECIAL_CHARS | |
520 net::UnescapeRule::REPLACE_PLUS_WITH_SPACE, | 505 net::UnescapeRule::REPLACE_PLUS_WITH_SPACE, |
521 NULL); | 506 NULL); |
| 507 if (search_terms_component) |
| 508 *search_terms_component = search_term_key_location_; |
| 509 if (search_terms_position) |
| 510 *search_terms_position = value; |
522 return true; | 511 return true; |
523 } | 512 } |
524 } | 513 } |
525 } | 514 } |
526 return false; | 515 return false; |
527 } | 516 } |
528 | 517 |
529 void TemplateURLRef::InvalidateCachedValues() const { | 518 void TemplateURLRef::InvalidateCachedValues() const { |
530 supports_replacements_ = valid_ = parsed_ = false; | 519 supports_replacements_ = valid_ = parsed_ = false; |
531 host_.clear(); | 520 host_.clear(); |
(...skipping 312 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
844 string16* search_terms) { | 833 string16* search_terms) { |
845 UIThreadSearchTermsData search_terms_data(profile_); | 834 UIThreadSearchTermsData search_terms_data(profile_); |
846 return ExtractSearchTermsFromURLUsingTermsData(url, search_terms, | 835 return ExtractSearchTermsFromURLUsingTermsData(url, search_terms, |
847 search_terms_data); | 836 search_terms_data); |
848 } | 837 } |
849 | 838 |
850 bool TemplateURL::ExtractSearchTermsFromURLUsingTermsData( | 839 bool TemplateURL::ExtractSearchTermsFromURLUsingTermsData( |
851 const GURL& url, | 840 const GURL& url, |
852 string16* search_terms, | 841 string16* search_terms, |
853 const SearchTermsData& search_terms_data) { | 842 const SearchTermsData& search_terms_data) { |
854 DCHECK(search_terms); | 843 return FindSearchTermsInURL(url, search_terms_data, search_terms, NULL, NULL); |
855 search_terms->clear(); | 844 } |
856 | 845 |
857 // Then try to match with every pattern. | |
858 for (size_t i = 0; i < URLCount(); ++i) { | |
859 TemplateURLRef ref(this, i); | |
860 if (ref.ExtractSearchTermsFromURL(url, search_terms, search_terms_data)) { | |
861 // If ExtractSearchTermsFromURL() returns true and |search_terms| is empty | |
862 // it means the pattern matched but no search terms were present. In this | |
863 // case we fail immediately without looking for matches in subsequent | |
864 // patterns. This means that given patterns | |
865 // [ "http://foo/#q={searchTerms}", "http://foo/?q={searchTerms}" ], | |
866 // calling ExtractSearchTermsFromURL() on "http://foo/?q=bar#q=' would | |
867 // return false. This is important for at least Google, where such URLs | |
868 // are invalid. | |
869 return !search_terms->empty(); | |
870 } | |
871 } | |
872 return false; | |
873 } | |
874 | 846 |
875 bool TemplateURL::IsSearchURL(const GURL& url) { | 847 bool TemplateURL::IsSearchURL(const GURL& url) { |
876 UIThreadSearchTermsData search_terms_data(profile_); | 848 UIThreadSearchTermsData search_terms_data(profile_); |
877 return IsSearchURLUsingTermsData(url, search_terms_data); | 849 return IsSearchURLUsingTermsData(url, search_terms_data); |
878 } | 850 } |
879 | 851 |
880 bool TemplateURL::IsSearchURLUsingTermsData( | 852 bool TemplateURL::IsSearchURLUsingTermsData( |
881 const GURL& url, | 853 const GURL& url, |
882 const SearchTermsData& search_terms_data) { | 854 const SearchTermsData& search_terms_data) { |
883 string16 search_terms; | 855 string16 search_terms; |
(...skipping 13 matching lines...) Expand all Loading... |
897 if (key.is_nonempty() && | 869 if (key.is_nonempty() && |
898 params[i].substr(key.begin, key.len) == | 870 params[i].substr(key.begin, key.len) == |
899 search_terms_replacement_key()) { | 871 search_terms_replacement_key()) { |
900 return true; | 872 return true; |
901 } | 873 } |
902 } | 874 } |
903 } | 875 } |
904 return false; | 876 return false; |
905 } | 877 } |
906 | 878 |
| 879 bool TemplateURL::ReplaceSearchTermsInURL( |
| 880 const GURL& url, |
| 881 const TemplateURLRef::SearchTermsArgs& search_terms_args, |
| 882 GURL* result) { |
| 883 UIThreadSearchTermsData search_terms_data(profile_); |
| 884 // TODO(beaudoin): Use AQS from |search_terms_args| too. |
| 885 url_parse::Parsed::ComponentType search_term_component; |
| 886 url_parse::Component search_terms_position; |
| 887 string16 search_terms; |
| 888 if (!FindSearchTermsInURL(url, search_terms_data, &search_terms, |
| 889 &search_term_component, &search_terms_position)) { |
| 890 return false; |
| 891 } |
| 892 DCHECK(search_terms_position.is_nonempty()); |
| 893 |
| 894 // FindSearchTermsInURL only returns true for search terms in the query or |
| 895 // ref, so we can call EncodeSearchTerm with |is_in_query| = true, since query |
| 896 // and ref are encoded in the same way. |
| 897 std::string input_encoding; |
| 898 string16 encoded_terms; |
| 899 string16 encoded_original_query; |
| 900 EncodeSearchTerms(search_terms_args, true, &input_encoding, |
| 901 &encoded_terms, &encoded_original_query); |
| 902 |
| 903 std::string old_params((search_term_component == url_parse::Parsed::REF) ? |
| 904 url.ref() : url.query()); |
| 905 std::string new_params(old_params, 0, search_terms_position.begin); |
| 906 new_params += UTF16ToUTF8(search_terms_args.search_terms); |
| 907 new_params += old_params.substr(search_terms_position.end()); |
| 908 url_canon::StdStringReplacements<std::string> replacements; |
| 909 if (search_term_component == url_parse::Parsed::REF) |
| 910 replacements.SetRefStr(new_params); |
| 911 else |
| 912 replacements.SetQueryStr(new_params); |
| 913 *result = url.ReplaceComponents(replacements); |
| 914 return true; |
| 915 } |
| 916 |
| 917 void TemplateURL::EncodeSearchTerms( |
| 918 const TemplateURLRef::SearchTermsArgs& search_terms_args, |
| 919 bool is_in_query, |
| 920 std::string* input_encoding, |
| 921 string16* encoded_terms, |
| 922 string16* encoded_original_query) const { |
| 923 |
| 924 std::vector<std::string> encodings(input_encodings()); |
| 925 if (std::find(encodings.begin(), encodings.end(), "UTF-8") == encodings.end()) |
| 926 encodings.push_back("UTF-8"); |
| 927 for (std::vector<std::string>::const_iterator i(encodings.begin()); |
| 928 i != encodings.end(); ++i) { |
| 929 if (TryEncoding(search_terms_args.search_terms, |
| 930 search_terms_args.original_query, i->c_str(), |
| 931 is_in_query, encoded_terms, encoded_original_query)) { |
| 932 *input_encoding = *i; |
| 933 return; |
| 934 } |
| 935 } |
| 936 NOTREACHED(); |
| 937 } |
| 938 |
907 void TemplateURL::CopyFrom(const TemplateURL& other) { | 939 void TemplateURL::CopyFrom(const TemplateURL& other) { |
908 if (this == &other) | 940 if (this == &other) |
909 return; | 941 return; |
910 | 942 |
911 profile_ = other.profile_; | 943 profile_ = other.profile_; |
912 data_ = other.data_; | 944 data_ = other.data_; |
913 url_ref_.InvalidateCachedValues(); | 945 url_ref_.InvalidateCachedValues(); |
914 suggestions_url_ref_.InvalidateCachedValues(); | 946 suggestions_url_ref_.InvalidateCachedValues(); |
915 instant_url_ref_.InvalidateCachedValues(); | 947 instant_url_ref_.InvalidateCachedValues(); |
916 SetPrepopulateId(other.data_.prepopulate_id); | 948 SetPrepopulateId(other.data_.prepopulate_id); |
(...skipping 13 matching lines...) Expand all Loading... |
930 } | 962 } |
931 | 963 |
932 void TemplateURL::ResetKeywordIfNecessary(bool force) { | 964 void TemplateURL::ResetKeywordIfNecessary(bool force) { |
933 if (IsGoogleSearchURLWithReplaceableKeyword() || force) { | 965 if (IsGoogleSearchURLWithReplaceableKeyword() || force) { |
934 DCHECK(!IsExtensionKeyword()); | 966 DCHECK(!IsExtensionKeyword()); |
935 GURL url(TemplateURLService::GenerateSearchURL(this)); | 967 GURL url(TemplateURLService::GenerateSearchURL(this)); |
936 if (url.is_valid()) | 968 if (url.is_valid()) |
937 data_.SetKeyword(TemplateURLService::GenerateKeyword(url)); | 969 data_.SetKeyword(TemplateURLService::GenerateKeyword(url)); |
938 } | 970 } |
939 } | 971 } |
| 972 |
| 973 bool TemplateURL::FindSearchTermsInURL( |
| 974 const GURL& url, |
| 975 const SearchTermsData& search_terms_data, |
| 976 string16* search_terms, |
| 977 url_parse::Parsed::ComponentType* search_term_component, |
| 978 url_parse::Component* search_terms_position) { |
| 979 DCHECK(search_terms); |
| 980 search_terms->clear(); |
| 981 |
| 982 // Try to match with every pattern. |
| 983 for (size_t i = 0; i < URLCount(); ++i) { |
| 984 TemplateURLRef ref(this, i); |
| 985 if (ref.ExtractSearchTermsFromURL(url, search_terms, search_terms_data, |
| 986 search_term_component, search_terms_position)) { |
| 987 // If ExtractSearchTermsFromURL() returns true and |search_terms| is empty |
| 988 // it means the pattern matched but no search terms were present. In this |
| 989 // case we fail immediately without looking for matches in subsequent |
| 990 // patterns. This means that given patterns |
| 991 // [ "http://foo/#q={searchTerms}", "http://foo/?q={searchTerms}" ], |
| 992 // calling ExtractSearchTermsFromURL() on "http://foo/?q=bar#q=' would |
| 993 // return false. This is important for at least Google, where such URLs |
| 994 // are invalid. |
| 995 return !search_terms->empty(); |
| 996 } |
| 997 } |
| 998 return false; |
| 999 } |
OLD | NEW |