Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(41)

Side by Side Diff: chrome/browser/search_engines/template_url.cc

Issue 12623029: Upstreaming mechanism to add query refinement to omnibox searches. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Applied Peter's comments. Created 7 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/search_engines/template_url.h" 5 #include "chrome/browser/search_engines/template_url.h"
6 6
7 #include "base/format_macros.h" 7 #include "base/format_macros.h"
8 #include "base/guid.h" 8 #include "base/guid.h"
9 #include "base/i18n/case_conversion.h" 9 #include "base/i18n/case_conversion.h"
10 #include "base/i18n/icu_string_conversions.h" 10 #include "base/i18n/icu_string_conversions.h"
(...skipping 220 matching lines...) Expand 10 before | Expand all | Expand 10 after
231 for (Replacements::iterator i = replacements_.begin(); 231 for (Replacements::iterator i = replacements_.begin();
232 i != replacements_.end(); ++i) { 232 i != replacements_.end(); ++i) {
233 if (i->type == SEARCH_TERMS) { 233 if (i->type == SEARCH_TERMS) {
234 string16::size_type query_start = parsed_url_.find('?'); 234 string16::size_type query_start = parsed_url_.find('?');
235 is_in_query = query_start != string16::npos && 235 is_in_query = query_start != string16::npos &&
236 (static_cast<string16::size_type>(i->index) > query_start); 236 (static_cast<string16::size_type>(i->index) > query_start);
237 break; 237 break;
238 } 238 }
239 } 239 }
240 240
241 std::string input_encoding;
241 string16 encoded_terms; 242 string16 encoded_terms;
242 string16 encoded_original_query; 243 string16 encoded_original_query;
243 std::string input_encoding; 244 owner_->EncodeSearchTerms(search_terms_args, is_in_query, &input_encoding,
244 // Encode the search terms so that we know the encoding. 245 &encoded_terms, &encoded_original_query);
245 for (std::vector<std::string>::const_iterator i(
246 owner_->input_encodings().begin());
247 i != owner_->input_encodings().end(); ++i) {
248 if (TryEncoding(search_terms_args.search_terms,
249 search_terms_args.original_query, i->c_str(),
250 is_in_query, &encoded_terms, &encoded_original_query)) {
251 input_encoding = *i;
252 break;
253 }
254 }
255 if (input_encoding.empty()) {
256 input_encoding = "UTF-8";
257 if (!TryEncoding(search_terms_args.search_terms,
258 search_terms_args.original_query,
259 input_encoding.c_str(), is_in_query, &encoded_terms,
260 &encoded_original_query))
261 NOTREACHED();
262 }
263 246
264 std::string url = parsed_url_; 247 std::string url = parsed_url_;
265 248
266 // replacements_ is ordered in ascending order, as such we need to iterate 249 // replacements_ is ordered in ascending order, as such we need to iterate
267 // from the back. 250 // from the back.
268 for (Replacements::reverse_iterator i = replacements_.rbegin(); 251 for (Replacements::reverse_iterator i = replacements_.rbegin();
269 i != replacements_.rend(); ++i) { 252 i != replacements_.rend(); ++i) {
270 switch (i->type) { 253 switch (i->type) {
271 case ENCODING: 254 case ENCODING:
272 url.insert(i->index, input_encoding); 255 url.insert(i->index, input_encoding);
(...skipping 197 matching lines...) Expand 10 before | Expand all | Expand 10 after
470 if ((replacements_[i].type == GOOGLE_BASE_URL) || 453 if ((replacements_[i].type == GOOGLE_BASE_URL) ||
471 (replacements_[i].type == GOOGLE_BASE_SUGGEST_URL)) 454 (replacements_[i].type == GOOGLE_BASE_SUGGEST_URL))
472 return true; 455 return true;
473 } 456 }
474 return false; 457 return false;
475 } 458 }
476 459
477 bool TemplateURLRef::ExtractSearchTermsFromURL( 460 bool TemplateURLRef::ExtractSearchTermsFromURL(
478 const GURL& url, 461 const GURL& url,
479 string16* search_terms, 462 string16* search_terms,
480 const SearchTermsData& search_terms_data) const { 463 const SearchTermsData& search_terms_data,
464 url_parse::Parsed::ComponentType* search_terms_component,
465 url_parse::Component* search_terms_position) const {
481 DCHECK(search_terms); 466 DCHECK(search_terms);
482 search_terms->clear(); 467 search_terms->clear();
483 468
484 ParseIfNecessaryUsingTermsData(search_terms_data); 469 ParseIfNecessaryUsingTermsData(search_terms_data);
485 470
486 // We need a search term in the template URL to extract something. 471 // We need a search term in the template URL to extract something.
487 if (search_term_key_.empty()) 472 if (search_term_key_.empty())
488 return false; 473 return false;
489 474
490 // TODO(beaudoin): Support patterns of the form http://foo/{searchTerms}/ 475 // TODO(beaudoin): Support patterns of the form http://foo/{searchTerms}/
(...skipping 21 matching lines...) Expand all
512 &value)) { 497 &value)) {
513 if (key.is_nonempty()) { 498 if (key.is_nonempty()) {
514 if (params.substr(key.begin, key.len) == search_term_key_) { 499 if (params.substr(key.begin, key.len) == search_term_key_) {
515 // Extract the search term. 500 // Extract the search term.
516 *search_terms = net::UnescapeAndDecodeUTF8URLComponent( 501 *search_terms = net::UnescapeAndDecodeUTF8URLComponent(
517 params.substr(value.begin, value.len), 502 params.substr(value.begin, value.len),
518 net::UnescapeRule::SPACES | 503 net::UnescapeRule::SPACES |
519 net::UnescapeRule::URL_SPECIAL_CHARS | 504 net::UnescapeRule::URL_SPECIAL_CHARS |
520 net::UnescapeRule::REPLACE_PLUS_WITH_SPACE, 505 net::UnescapeRule::REPLACE_PLUS_WITH_SPACE,
521 NULL); 506 NULL);
507 if (search_terms_component)
508 *search_terms_component = search_term_key_location_;
509 if (search_terms_position)
510 *search_terms_position = value;
522 return true; 511 return true;
523 } 512 }
524 } 513 }
525 } 514 }
526 return false; 515 return false;
527 } 516 }
528 517
529 void TemplateURLRef::InvalidateCachedValues() const { 518 void TemplateURLRef::InvalidateCachedValues() const {
530 supports_replacements_ = valid_ = parsed_ = false; 519 supports_replacements_ = valid_ = parsed_ = false;
531 host_.clear(); 520 host_.clear();
(...skipping 312 matching lines...) Expand 10 before | Expand all | Expand 10 after
844 string16* search_terms) { 833 string16* search_terms) {
845 UIThreadSearchTermsData search_terms_data(profile_); 834 UIThreadSearchTermsData search_terms_data(profile_);
846 return ExtractSearchTermsFromURLUsingTermsData(url, search_terms, 835 return ExtractSearchTermsFromURLUsingTermsData(url, search_terms,
847 search_terms_data); 836 search_terms_data);
848 } 837 }
849 838
850 bool TemplateURL::ExtractSearchTermsFromURLUsingTermsData( 839 bool TemplateURL::ExtractSearchTermsFromURLUsingTermsData(
851 const GURL& url, 840 const GURL& url,
852 string16* search_terms, 841 string16* search_terms,
853 const SearchTermsData& search_terms_data) { 842 const SearchTermsData& search_terms_data) {
854 DCHECK(search_terms); 843 return FindSearchTermsInURL(url, search_terms_data, search_terms, NULL, NULL);
855 search_terms->clear(); 844 }
856 845
857 // Then try to match with every pattern.
858 for (size_t i = 0; i < URLCount(); ++i) {
859 TemplateURLRef ref(this, i);
860 if (ref.ExtractSearchTermsFromURL(url, search_terms, search_terms_data)) {
861 // If ExtractSearchTermsFromURL() returns true and |search_terms| is empty
862 // it means the pattern matched but no search terms were present. In this
863 // case we fail immediately without looking for matches in subsequent
864 // patterns. This means that given patterns
865 // [ "http://foo/#q={searchTerms}", "http://foo/?q={searchTerms}" ],
866 // calling ExtractSearchTermsFromURL() on "http://foo/?q=bar#q=' would
867 // return false. This is important for at least Google, where such URLs
868 // are invalid.
869 return !search_terms->empty();
870 }
871 }
872 return false;
873 }
874 846
875 bool TemplateURL::IsSearchURL(const GURL& url) { 847 bool TemplateURL::IsSearchURL(const GURL& url) {
876 UIThreadSearchTermsData search_terms_data(profile_); 848 UIThreadSearchTermsData search_terms_data(profile_);
877 return IsSearchURLUsingTermsData(url, search_terms_data); 849 return IsSearchURLUsingTermsData(url, search_terms_data);
878 } 850 }
879 851
880 bool TemplateURL::IsSearchURLUsingTermsData( 852 bool TemplateURL::IsSearchURLUsingTermsData(
881 const GURL& url, 853 const GURL& url,
882 const SearchTermsData& search_terms_data) { 854 const SearchTermsData& search_terms_data) {
883 string16 search_terms; 855 string16 search_terms;
(...skipping 13 matching lines...) Expand all
897 if (key.is_nonempty() && 869 if (key.is_nonempty() &&
898 params[i].substr(key.begin, key.len) == 870 params[i].substr(key.begin, key.len) ==
899 search_terms_replacement_key()) { 871 search_terms_replacement_key()) {
900 return true; 872 return true;
901 } 873 }
902 } 874 }
903 } 875 }
904 return false; 876 return false;
905 } 877 }
906 878
879 bool TemplateURL::ReplaceSearchTermsInURL(
880 const GURL& url,
881 const TemplateURLRef::SearchTermsArgs& search_terms_args,
882 GURL* result) {
883 UIThreadSearchTermsData search_terms_data(profile_);
884 // TODO(beaudoin): Use AQS from |search_terms_args| too.
885 url_parse::Parsed::ComponentType search_term_component;
886 url_parse::Component search_terms_position;
887 string16 search_terms;
888 if (!FindSearchTermsInURL(url, search_terms_data, &search_terms,
889 &search_term_component, &search_terms_position)) {
890 return false;
891 }
892 DCHECK(search_terms_position.is_nonempty());
893
894 // FindSearchTermsInURL only returns true for search terms in the query or
895 // ref, so we can call EncodeSearchTerm with |is_in_query| = true, since query
896 // and ref are encoded in the same way.
897 std::string input_encoding;
898 string16 encoded_terms;
899 string16 encoded_original_query;
900 EncodeSearchTerms(search_terms_args, true, &input_encoding,
901 &encoded_terms, &encoded_original_query);
902
903 std::string old_params((search_term_component == url_parse::Parsed::REF) ?
904 url.ref() : url.query());
905 std::string new_params(old_params, 0, search_terms_position.begin);
906 new_params += UTF16ToUTF8(search_terms_args.search_terms);
907 new_params += old_params.substr(search_terms_position.end());
908 url_canon::StdStringReplacements<std::string> replacements;
909 if (search_term_component == url_parse::Parsed::REF)
910 replacements.SetRefStr(new_params);
911 else
912 replacements.SetQueryStr(new_params);
913 *result = url.ReplaceComponents(replacements);
914 return true;
915 }
916
917 void TemplateURL::EncodeSearchTerms(
918 const TemplateURLRef::SearchTermsArgs& search_terms_args,
919 bool is_in_query,
920 std::string* input_encoding,
921 string16* encoded_terms,
922 string16* encoded_original_query) const {
923
924 std::vector<std::string> encodings(input_encodings());
925 if (std::find(encodings.begin(), encodings.end(), "UTF-8") == encodings.end())
926 encodings.push_back("UTF-8");
927 for (std::vector<std::string>::const_iterator i(encodings.begin());
928 i != encodings.end(); ++i) {
929 if (TryEncoding(search_terms_args.search_terms,
930 search_terms_args.original_query, i->c_str(),
931 is_in_query, encoded_terms, encoded_original_query)) {
932 *input_encoding = *i;
933 return;
934 }
935 }
936 NOTREACHED();
937 }
938
907 void TemplateURL::CopyFrom(const TemplateURL& other) { 939 void TemplateURL::CopyFrom(const TemplateURL& other) {
908 if (this == &other) 940 if (this == &other)
909 return; 941 return;
910 942
911 profile_ = other.profile_; 943 profile_ = other.profile_;
912 data_ = other.data_; 944 data_ = other.data_;
913 url_ref_.InvalidateCachedValues(); 945 url_ref_.InvalidateCachedValues();
914 suggestions_url_ref_.InvalidateCachedValues(); 946 suggestions_url_ref_.InvalidateCachedValues();
915 instant_url_ref_.InvalidateCachedValues(); 947 instant_url_ref_.InvalidateCachedValues();
916 SetPrepopulateId(other.data_.prepopulate_id); 948 SetPrepopulateId(other.data_.prepopulate_id);
(...skipping 13 matching lines...) Expand all
930 } 962 }
931 963
932 void TemplateURL::ResetKeywordIfNecessary(bool force) { 964 void TemplateURL::ResetKeywordIfNecessary(bool force) {
933 if (IsGoogleSearchURLWithReplaceableKeyword() || force) { 965 if (IsGoogleSearchURLWithReplaceableKeyword() || force) {
934 DCHECK(!IsExtensionKeyword()); 966 DCHECK(!IsExtensionKeyword());
935 GURL url(TemplateURLService::GenerateSearchURL(this)); 967 GURL url(TemplateURLService::GenerateSearchURL(this));
936 if (url.is_valid()) 968 if (url.is_valid())
937 data_.SetKeyword(TemplateURLService::GenerateKeyword(url)); 969 data_.SetKeyword(TemplateURLService::GenerateKeyword(url));
938 } 970 }
939 } 971 }
972
973 bool TemplateURL::FindSearchTermsInURL(
974 const GURL& url,
975 const SearchTermsData& search_terms_data,
976 string16* search_terms,
977 url_parse::Parsed::ComponentType* search_term_component,
978 url_parse::Component* search_terms_position) {
979 DCHECK(search_terms);
980 search_terms->clear();
981
982 // Try to match with every pattern.
983 for (size_t i = 0; i < URLCount(); ++i) {
984 TemplateURLRef ref(this, i);
985 if (ref.ExtractSearchTermsFromURL(url, search_terms, search_terms_data,
986 search_term_component, search_terms_position)) {
987 // If ExtractSearchTermsFromURL() returns true and |search_terms| is empty
988 // it means the pattern matched but no search terms were present. In this
989 // case we fail immediately without looking for matches in subsequent
990 // patterns. This means that given patterns
991 // [ "http://foo/#q={searchTerms}", "http://foo/?q={searchTerms}" ],
992 // calling ExtractSearchTermsFromURL() on "http://foo/?q=bar#q=' would
993 // return false. This is important for at least Google, where such URLs
994 // are invalid.
995 return !search_terms->empty();
996 }
997 }
998 return false;
999 }
OLDNEW
« no previous file with comments | « chrome/browser/search_engines/template_url.h ('k') | chrome/browser/search_engines/template_url_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698