OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/browser/search_engines/template_url.h" | 5 #include "chrome/browser/search_engines/template_url.h" |
6 | 6 |
7 #include "base/format_macros.h" | 7 #include "base/format_macros.h" |
8 #include "base/guid.h" | 8 #include "base/guid.h" |
9 #include "base/i18n/case_conversion.h" | 9 #include "base/i18n/case_conversion.h" |
10 #include "base/i18n/icu_string_conversions.h" | 10 #include "base/i18n/icu_string_conversions.h" |
(...skipping 219 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
230 for (Replacements::iterator i = replacements_.begin(); | 230 for (Replacements::iterator i = replacements_.begin(); |
231 i != replacements_.end(); ++i) { | 231 i != replacements_.end(); ++i) { |
232 if (i->type == SEARCH_TERMS) { | 232 if (i->type == SEARCH_TERMS) { |
233 string16::size_type query_start = parsed_url_.find('?'); | 233 string16::size_type query_start = parsed_url_.find('?'); |
234 is_in_query = query_start != string16::npos && | 234 is_in_query = query_start != string16::npos && |
235 (static_cast<string16::size_type>(i->index) > query_start); | 235 (static_cast<string16::size_type>(i->index) > query_start); |
236 break; | 236 break; |
237 } | 237 } |
238 } | 238 } |
239 | 239 |
240 std::string input_encoding; | |
240 string16 encoded_terms; | 241 string16 encoded_terms; |
241 string16 encoded_original_query; | 242 string16 encoded_original_query; |
242 std::string input_encoding; | 243 owner_->EncodeSearchTerms(search_terms_args, is_in_query, &input_encoding, |
243 // Encode the search terms so that we know the encoding. | 244 &encoded_terms, &encoded_original_query); |
244 for (std::vector<std::string>::const_iterator i( | |
245 owner_->input_encodings().begin()); | |
246 i != owner_->input_encodings().end(); ++i) { | |
247 if (TryEncoding(search_terms_args.search_terms, | |
248 search_terms_args.original_query, i->c_str(), | |
249 is_in_query, &encoded_terms, &encoded_original_query)) { | |
250 input_encoding = *i; | |
251 break; | |
252 } | |
253 } | |
254 if (input_encoding.empty()) { | |
255 input_encoding = "UTF-8"; | |
256 if (!TryEncoding(search_terms_args.search_terms, | |
257 search_terms_args.original_query, | |
258 input_encoding.c_str(), is_in_query, &encoded_terms, | |
259 &encoded_original_query)) | |
260 NOTREACHED(); | |
261 } | |
262 | 245 |
263 std::string url = parsed_url_; | 246 std::string url = parsed_url_; |
264 | 247 |
265 // replacements_ is ordered in ascending order, as such we need to iterate | 248 // replacements_ is ordered in ascending order, as such we need to iterate |
266 // from the back. | 249 // from the back. |
267 for (Replacements::reverse_iterator i = replacements_.rbegin(); | 250 for (Replacements::reverse_iterator i = replacements_.rbegin(); |
268 i != replacements_.rend(); ++i) { | 251 i != replacements_.rend(); ++i) { |
269 switch (i->type) { | 252 switch (i->type) { |
270 case ENCODING: | 253 case ENCODING: |
271 url.insert(i->index, input_encoding); | 254 url.insert(i->index, input_encoding); |
(...skipping 193 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
465 if ((replacements_[i].type == GOOGLE_BASE_URL) || | 448 if ((replacements_[i].type == GOOGLE_BASE_URL) || |
466 (replacements_[i].type == GOOGLE_BASE_SUGGEST_URL)) | 449 (replacements_[i].type == GOOGLE_BASE_SUGGEST_URL)) |
467 return true; | 450 return true; |
468 } | 451 } |
469 return false; | 452 return false; |
470 } | 453 } |
471 | 454 |
472 bool TemplateURLRef::ExtractSearchTermsFromURL( | 455 bool TemplateURLRef::ExtractSearchTermsFromURL( |
473 const GURL& url, | 456 const GURL& url, |
474 string16* search_terms, | 457 string16* search_terms, |
475 const SearchTermsData& search_terms_data) const { | 458 const SearchTermsData& search_terms_data, |
459 url_parse::Parsed::ComponentType* search_terms_location, | |
460 url_parse::Component* search_terms_position) const { | |
476 DCHECK(search_terms); | 461 DCHECK(search_terms); |
477 search_terms->clear(); | 462 search_terms->clear(); |
478 | 463 |
479 ParseIfNecessaryUsingTermsData(search_terms_data); | 464 ParseIfNecessaryUsingTermsData(search_terms_data); |
480 | 465 |
481 // We need a search term in the template URL to extract something. | 466 // We need a search term in the template URL to extract something. |
482 if (search_term_key_.empty()) | 467 if (search_term_key_.empty()) |
483 return false; | 468 return false; |
484 | 469 |
485 // TODO(beaudoin): Support patterns of the form http://foo/{searchTerms}/ | 470 // TODO(beaudoin): Support patterns of the form http://foo/{searchTerms}/ |
(...skipping 21 matching lines...) Expand all Loading... | |
507 &value)) { | 492 &value)) { |
508 if (key.is_nonempty()) { | 493 if (key.is_nonempty()) { |
509 if (params.substr(key.begin, key.len) == search_term_key_) { | 494 if (params.substr(key.begin, key.len) == search_term_key_) { |
510 // Extract the search term. | 495 // Extract the search term. |
511 *search_terms = net::UnescapeAndDecodeUTF8URLComponent( | 496 *search_terms = net::UnescapeAndDecodeUTF8URLComponent( |
512 params.substr(value.begin, value.len), | 497 params.substr(value.begin, value.len), |
513 net::UnescapeRule::SPACES | | 498 net::UnescapeRule::SPACES | |
514 net::UnescapeRule::URL_SPECIAL_CHARS | | 499 net::UnescapeRule::URL_SPECIAL_CHARS | |
515 net::UnescapeRule::REPLACE_PLUS_WITH_SPACE, | 500 net::UnescapeRule::REPLACE_PLUS_WITH_SPACE, |
516 NULL); | 501 NULL); |
502 if (search_terms_location) | |
503 *search_terms_location = search_term_key_location_; | |
504 if (search_terms_position) | |
505 *search_terms_position = value; | |
517 return true; | 506 return true; |
518 } | 507 } |
519 } | 508 } |
520 } | 509 } |
521 return false; | 510 return false; |
522 } | 511 } |
523 | 512 |
524 void TemplateURLRef::InvalidateCachedValues() const { | 513 void TemplateURLRef::InvalidateCachedValues() const { |
525 supports_replacements_ = valid_ = parsed_ = false; | 514 supports_replacements_ = valid_ = parsed_ = false; |
526 host_.clear(); | 515 host_.clear(); |
(...skipping 310 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
837 string16* search_terms) { | 826 string16* search_terms) { |
838 UIThreadSearchTermsData search_terms_data(profile_); | 827 UIThreadSearchTermsData search_terms_data(profile_); |
839 return ExtractSearchTermsFromURLUsingTermsData(url, search_terms, | 828 return ExtractSearchTermsFromURLUsingTermsData(url, search_terms, |
840 search_terms_data); | 829 search_terms_data); |
841 } | 830 } |
842 | 831 |
843 bool TemplateURL::ExtractSearchTermsFromURLUsingTermsData( | 832 bool TemplateURL::ExtractSearchTermsFromURLUsingTermsData( |
844 const GURL& url, | 833 const GURL& url, |
845 string16* search_terms, | 834 string16* search_terms, |
846 const SearchTermsData& search_terms_data) { | 835 const SearchTermsData& search_terms_data) { |
847 DCHECK(search_terms); | 836 return FindSearchTermsInURL(url, search_terms_data, search_terms, NULL, NULL); |
848 search_terms->clear(); | 837 } |
849 | 838 |
850 // Then try to match with every pattern. | |
851 for (size_t i = 0; i < URLCount(); ++i) { | |
852 TemplateURLRef ref(this, i); | |
853 if (ref.ExtractSearchTermsFromURL(url, search_terms, search_terms_data)) { | |
854 // If ExtractSearchTermsFromURL() returns true and |search_terms| is empty | |
855 // it means the pattern matched but no search terms were present. In this | |
856 // case we fail immediately without looking for matches in subsequent | |
857 // patterns. This means that given patterns | |
858 // [ "http://foo/#q={searchTerms}", "http://foo/?q={searchTerms}" ], | |
859 // calling ExtractSearchTermsFromURL() on "http://foo/?q=bar#q=' would | |
860 // return false. This is important for at least Google, where such URLs | |
861 // are invalid. | |
862 return !search_terms->empty(); | |
863 } | |
864 } | |
865 return false; | |
866 } | |
867 | 839 |
868 bool TemplateURL::IsSearchURL(const GURL& url) { | 840 bool TemplateURL::IsSearchURL(const GURL& url) { |
869 UIThreadSearchTermsData search_terms_data(profile_); | 841 UIThreadSearchTermsData search_terms_data(profile_); |
870 return IsSearchURLUsingTermsData(url, search_terms_data); | 842 return IsSearchURLUsingTermsData(url, search_terms_data); |
871 } | 843 } |
872 | 844 |
873 bool TemplateURL::IsSearchURLUsingTermsData( | 845 bool TemplateURL::IsSearchURLUsingTermsData( |
874 const GURL& url, | 846 const GURL& url, |
875 const SearchTermsData& search_terms_data) { | 847 const SearchTermsData& search_terms_data) { |
876 string16 search_terms; | 848 string16 search_terms; |
(...skipping 13 matching lines...) Expand all Loading... | |
890 if (key.is_nonempty() && | 862 if (key.is_nonempty() && |
891 params[i].substr(key.begin, key.len) == | 863 params[i].substr(key.begin, key.len) == |
892 search_terms_replacement_key()) { | 864 search_terms_replacement_key()) { |
893 return true; | 865 return true; |
894 } | 866 } |
895 } | 867 } |
896 } | 868 } |
897 return false; | 869 return false; |
898 } | 870 } |
899 | 871 |
872 bool TemplateURL::ReplaceSearchTermsInURL( | |
873 const GURL& url, | |
874 const TemplateURLRef::SearchTermsArgs& search_terms_args, | |
875 GURL* result) { | |
876 UIThreadSearchTermsData search_terms_data(profile_); | |
877 // TODO(beaudoin): Use AQS from |search_terms_args| too. | |
878 url_parse::Parsed::ComponentType search_term_location; | |
879 url_parse::Component search_terms_position; | |
880 string16 search_terms; | |
881 if (!FindSearchTermsInURL(url, search_terms_data, &search_terms, | |
882 &search_term_location, &search_terms_position)) | |
Peter Kasting
2013/03/22 20:20:06
Nit: Lines of args should all be aligned. Indent
beaudoin
2013/03/22 23:10:43
Done.
| |
883 return false; | |
884 DCHECK(search_terms_position.is_nonempty()); | |
885 | |
886 // FindSearchTermsInURL only returns true for search terms in the query or | |
887 // ref, so we can call EncodeSearchTerm with |is_in_query| = true, since query | |
888 // and ref are encoded in the same way. | |
889 std::string input_encoding; | |
890 string16 encoded_terms; | |
891 string16 encoded_original_query; | |
892 EncodeSearchTerms(search_terms_args, true, &input_encoding, | |
893 &encoded_terms, &encoded_original_query); | |
894 | |
895 url_canon::StdStringReplacements<std::string> replacements; | |
896 std::string new_params; | |
897 switch (search_term_location) { | |
Peter Kasting
2013/03/22 20:20:06
This switch can be eliminated, and the function sh
beaudoin
2013/03/22 23:10:43
Done.
| |
898 case url_parse::Parsed::QUERY: | |
899 new_params += url.query().substr(0, search_terms_position.begin); | |
900 new_params += UTF16ToUTF8(search_terms_args.search_terms); | |
901 new_params.append(url.query().substr(search_terms_position.begin + | |
902 search_terms_position.len)); | |
903 replacements.SetQueryStr(new_params); | |
904 break; | |
905 | |
906 case url_parse::Parsed::REF: | |
907 new_params = url.ref().substr(0, search_terms_position.begin); | |
908 new_params += UTF16ToUTF8(search_terms_args.search_terms); | |
909 new_params += url.ref().substr(search_terms_position.begin + | |
910 search_terms_position.len); | |
911 replacements.SetRefStr(new_params); | |
912 break; | |
913 | |
914 default: | |
915 NOTREACHED(); | |
916 } | |
917 | |
918 *result = url.ReplaceComponents(replacements); | |
919 return true; | |
920 } | |
921 | |
922 void TemplateURL::EncodeSearchTerms( | |
923 const TemplateURLRef::SearchTermsArgs& search_terms_args, | |
924 bool is_in_query, | |
925 std::string* input_encoding, | |
926 string16* encoded_terms, | |
927 string16* encoded_original_query) const { | |
928 for (std::vector<std::string>::const_iterator i(input_encodings().begin()); | |
929 i != input_encodings().end(); ++i) { | |
930 if (TryEncoding(search_terms_args.search_terms, | |
931 search_terms_args.original_query, i->c_str(), | |
932 is_in_query, encoded_terms, encoded_original_query)) { | |
933 *input_encoding = *i; | |
934 break; | |
935 } | |
936 } | |
937 if (input_encoding->empty()) { | |
Peter Kasting
2013/03/22 20:20:06
This last block can be avoided by rewriting the fu
beaudoin
2013/03/22 23:10:43
Done.
| |
938 *input_encoding = "UTF-8"; | |
939 if (!TryEncoding(search_terms_args.search_terms, | |
940 search_terms_args.original_query, | |
941 input_encoding->c_str(), is_in_query, encoded_terms, | |
942 encoded_original_query)) | |
943 NOTREACHED(); | |
944 } | |
945 } | |
946 | |
900 void TemplateURL::CopyFrom(const TemplateURL& other) { | 947 void TemplateURL::CopyFrom(const TemplateURL& other) { |
901 if (this == &other) | 948 if (this == &other) |
902 return; | 949 return; |
903 | 950 |
904 profile_ = other.profile_; | 951 profile_ = other.profile_; |
905 data_ = other.data_; | 952 data_ = other.data_; |
906 url_ref_.InvalidateCachedValues(); | 953 url_ref_.InvalidateCachedValues(); |
907 suggestions_url_ref_.InvalidateCachedValues(); | 954 suggestions_url_ref_.InvalidateCachedValues(); |
908 instant_url_ref_.InvalidateCachedValues(); | 955 instant_url_ref_.InvalidateCachedValues(); |
909 SetPrepopulateId(other.data_.prepopulate_id); | 956 SetPrepopulateId(other.data_.prepopulate_id); |
(...skipping 13 matching lines...) Expand all Loading... | |
923 } | 970 } |
924 | 971 |
925 void TemplateURL::ResetKeywordIfNecessary(bool force) { | 972 void TemplateURL::ResetKeywordIfNecessary(bool force) { |
926 if (IsGoogleSearchURLWithReplaceableKeyword() || force) { | 973 if (IsGoogleSearchURLWithReplaceableKeyword() || force) { |
927 DCHECK(!IsExtensionKeyword()); | 974 DCHECK(!IsExtensionKeyword()); |
928 GURL url(TemplateURLService::GenerateSearchURL(this)); | 975 GURL url(TemplateURLService::GenerateSearchURL(this)); |
929 if (url.is_valid()) | 976 if (url.is_valid()) |
930 data_.SetKeyword(TemplateURLService::GenerateKeyword(url)); | 977 data_.SetKeyword(TemplateURLService::GenerateKeyword(url)); |
931 } | 978 } |
932 } | 979 } |
980 | |
981 bool TemplateURL::FindSearchTermsInURL( | |
982 const GURL& url, | |
983 const SearchTermsData& search_terms_data, | |
984 string16* search_terms, | |
985 url_parse::Parsed::ComponentType* search_term_location, | |
986 url_parse::Component* search_terms_position) { | |
987 DCHECK(search_terms); | |
988 search_terms->clear(); | |
989 | |
990 // Try to match with every pattern. | |
991 for (size_t i = 0; i < URLCount(); ++i) { | |
992 TemplateURLRef ref(this, i); | |
993 if (ref.ExtractSearchTermsFromURL(url, search_terms, search_terms_data, | |
994 search_term_location, search_terms_position)) { | |
995 // If ExtractSearchTermsFromURL() returns true and |search_terms| is empty | |
996 // it means the pattern matched but no search terms were present. In this | |
997 // case we fail immediately without looking for matches in subsequent | |
998 // patterns. This means that given patterns | |
999 // [ "http://foo/#q={searchTerms}", "http://foo/?q={searchTerms}" ], | |
1000 // calling ExtractSearchTermsFromURL() on "http://foo/?q=bar#q=' would | |
1001 // return false. This is important for at least Google, where such URLs | |
1002 // are invalid. | |
1003 return !search_terms->empty(); | |
1004 } | |
1005 } | |
1006 return false; | |
1007 } | |
OLD | NEW |