Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(139)

Side by Side Diff: chrome/browser/search_engines/template_url.cc

Issue 12623029: Upstreaming mechanism to add query refinement to omnibox searches. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Fixed ChromeOS. Created 7 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/search_engines/template_url.h" 5 #include "chrome/browser/search_engines/template_url.h"
6 6
7 #include "base/format_macros.h" 7 #include "base/format_macros.h"
8 #include "base/guid.h" 8 #include "base/guid.h"
9 #include "base/i18n/case_conversion.h" 9 #include "base/i18n/case_conversion.h"
10 #include "base/i18n/icu_string_conversions.h" 10 #include "base/i18n/icu_string_conversions.h"
(...skipping 219 matching lines...) Expand 10 before | Expand all | Expand 10 after
230 for (Replacements::iterator i = replacements_.begin(); 230 for (Replacements::iterator i = replacements_.begin();
231 i != replacements_.end(); ++i) { 231 i != replacements_.end(); ++i) {
232 if (i->type == SEARCH_TERMS) { 232 if (i->type == SEARCH_TERMS) {
233 string16::size_type query_start = parsed_url_.find('?'); 233 string16::size_type query_start = parsed_url_.find('?');
234 is_in_query = query_start != string16::npos && 234 is_in_query = query_start != string16::npos &&
235 (static_cast<string16::size_type>(i->index) > query_start); 235 (static_cast<string16::size_type>(i->index) > query_start);
236 break; 236 break;
237 } 237 }
238 } 238 }
239 239
240 std::string input_encoding;
240 string16 encoded_terms; 241 string16 encoded_terms;
241 string16 encoded_original_query; 242 string16 encoded_original_query;
242 std::string input_encoding; 243 owner_->EncodeSearchTerms(search_terms_args, is_in_query, &input_encoding,
243 // Encode the search terms so that we know the encoding. 244 &encoded_terms, &encoded_original_query);
244 for (std::vector<std::string>::const_iterator i(
245 owner_->input_encodings().begin());
246 i != owner_->input_encodings().end(); ++i) {
247 if (TryEncoding(search_terms_args.search_terms,
248 search_terms_args.original_query, i->c_str(),
249 is_in_query, &encoded_terms, &encoded_original_query)) {
250 input_encoding = *i;
251 break;
252 }
253 }
254 if (input_encoding.empty()) {
255 input_encoding = "UTF-8";
256 if (!TryEncoding(search_terms_args.search_terms,
257 search_terms_args.original_query,
258 input_encoding.c_str(), is_in_query, &encoded_terms,
259 &encoded_original_query))
260 NOTREACHED();
261 }
262 245
263 std::string url = parsed_url_; 246 std::string url = parsed_url_;
264 247
265 // replacements_ is ordered in ascending order, as such we need to iterate 248 // replacements_ is ordered in ascending order, as such we need to iterate
266 // from the back. 249 // from the back.
267 for (Replacements::reverse_iterator i = replacements_.rbegin(); 250 for (Replacements::reverse_iterator i = replacements_.rbegin();
268 i != replacements_.rend(); ++i) { 251 i != replacements_.rend(); ++i) {
269 switch (i->type) { 252 switch (i->type) {
270 case ENCODING: 253 case ENCODING:
271 url.insert(i->index, input_encoding); 254 url.insert(i->index, input_encoding);
(...skipping 193 matching lines...) Expand 10 before | Expand all | Expand 10 after
465 if ((replacements_[i].type == GOOGLE_BASE_URL) || 448 if ((replacements_[i].type == GOOGLE_BASE_URL) ||
466 (replacements_[i].type == GOOGLE_BASE_SUGGEST_URL)) 449 (replacements_[i].type == GOOGLE_BASE_SUGGEST_URL))
467 return true; 450 return true;
468 } 451 }
469 return false; 452 return false;
470 } 453 }
471 454
472 bool TemplateURLRef::ExtractSearchTermsFromURL( 455 bool TemplateURLRef::ExtractSearchTermsFromURL(
473 const GURL& url, 456 const GURL& url,
474 string16* search_terms, 457 string16* search_terms,
475 const SearchTermsData& search_terms_data) const { 458 const SearchTermsData& search_terms_data,
459 url_parse::Parsed::ComponentType* search_terms_location,
460 url_parse::Component* search_terms_position) const {
476 DCHECK(search_terms); 461 DCHECK(search_terms);
477 search_terms->clear(); 462 search_terms->clear();
478 463
479 ParseIfNecessaryUsingTermsData(search_terms_data); 464 ParseIfNecessaryUsingTermsData(search_terms_data);
480 465
481 // We need a search term in the template URL to extract something. 466 // We need a search term in the template URL to extract something.
482 if (search_term_key_.empty()) 467 if (search_term_key_.empty())
483 return false; 468 return false;
484 469
485 // TODO(beaudoin): Support patterns of the form http://foo/{searchTerms}/ 470 // TODO(beaudoin): Support patterns of the form http://foo/{searchTerms}/
(...skipping 21 matching lines...) Expand all
507 &value)) { 492 &value)) {
508 if (key.is_nonempty()) { 493 if (key.is_nonempty()) {
509 if (params.substr(key.begin, key.len) == search_term_key_) { 494 if (params.substr(key.begin, key.len) == search_term_key_) {
510 // Extract the search term. 495 // Extract the search term.
511 *search_terms = net::UnescapeAndDecodeUTF8URLComponent( 496 *search_terms = net::UnescapeAndDecodeUTF8URLComponent(
512 params.substr(value.begin, value.len), 497 params.substr(value.begin, value.len),
513 net::UnescapeRule::SPACES | 498 net::UnescapeRule::SPACES |
514 net::UnescapeRule::URL_SPECIAL_CHARS | 499 net::UnescapeRule::URL_SPECIAL_CHARS |
515 net::UnescapeRule::REPLACE_PLUS_WITH_SPACE, 500 net::UnescapeRule::REPLACE_PLUS_WITH_SPACE,
516 NULL); 501 NULL);
502 if (search_terms_location)
503 *search_terms_location = search_term_key_location_;
504 if (search_terms_position)
505 *search_terms_position = value;
517 return true; 506 return true;
518 } 507 }
519 } 508 }
520 } 509 }
521 return false; 510 return false;
522 } 511 }
523 512
524 void TemplateURLRef::InvalidateCachedValues() const { 513 void TemplateURLRef::InvalidateCachedValues() const {
525 supports_replacements_ = valid_ = parsed_ = false; 514 supports_replacements_ = valid_ = parsed_ = false;
526 host_.clear(); 515 host_.clear();
(...skipping 310 matching lines...) Expand 10 before | Expand all | Expand 10 after
837 string16* search_terms) { 826 string16* search_terms) {
838 UIThreadSearchTermsData search_terms_data(profile_); 827 UIThreadSearchTermsData search_terms_data(profile_);
839 return ExtractSearchTermsFromURLUsingTermsData(url, search_terms, 828 return ExtractSearchTermsFromURLUsingTermsData(url, search_terms,
840 search_terms_data); 829 search_terms_data);
841 } 830 }
842 831
843 bool TemplateURL::ExtractSearchTermsFromURLUsingTermsData( 832 bool TemplateURL::ExtractSearchTermsFromURLUsingTermsData(
844 const GURL& url, 833 const GURL& url,
845 string16* search_terms, 834 string16* search_terms,
846 const SearchTermsData& search_terms_data) { 835 const SearchTermsData& search_terms_data) {
847 DCHECK(search_terms); 836 return FindSearchTermsInURL(url, search_terms_data, search_terms, NULL, NULL);
848 search_terms->clear(); 837 }
849 838
850 // Then try to match with every pattern.
851 for (size_t i = 0; i < URLCount(); ++i) {
852 TemplateURLRef ref(this, i);
853 if (ref.ExtractSearchTermsFromURL(url, search_terms, search_terms_data)) {
854 // If ExtractSearchTermsFromURL() returns true and |search_terms| is empty
855 // it means the pattern matched but no search terms were present. In this
856 // case we fail immediately without looking for matches in subsequent
857 // patterns. This means that given patterns
858 // [ "http://foo/#q={searchTerms}", "http://foo/?q={searchTerms}" ],
859 // calling ExtractSearchTermsFromURL() on "http://foo/?q=bar#q=' would
860 // return false. This is important for at least Google, where such URLs
861 // are invalid.
862 return !search_terms->empty();
863 }
864 }
865 return false;
866 }
867 839
868 bool TemplateURL::IsSearchURL(const GURL& url) { 840 bool TemplateURL::IsSearchURL(const GURL& url) {
869 UIThreadSearchTermsData search_terms_data(profile_); 841 UIThreadSearchTermsData search_terms_data(profile_);
870 return IsSearchURLUsingTermsData(url, search_terms_data); 842 return IsSearchURLUsingTermsData(url, search_terms_data);
871 } 843 }
872 844
873 bool TemplateURL::IsSearchURLUsingTermsData( 845 bool TemplateURL::IsSearchURLUsingTermsData(
874 const GURL& url, 846 const GURL& url,
875 const SearchTermsData& search_terms_data) { 847 const SearchTermsData& search_terms_data) {
876 string16 search_terms; 848 string16 search_terms;
(...skipping 13 matching lines...) Expand all
890 if (key.is_nonempty() && 862 if (key.is_nonempty() &&
891 params[i].substr(key.begin, key.len) == 863 params[i].substr(key.begin, key.len) ==
892 search_terms_replacement_key()) { 864 search_terms_replacement_key()) {
893 return true; 865 return true;
894 } 866 }
895 } 867 }
896 } 868 }
897 return false; 869 return false;
898 } 870 }
899 871
872 bool TemplateURL::ReplaceSearchTermsInURL(
873 const GURL& url,
874 const TemplateURLRef::SearchTermsArgs& search_terms_args,
875 GURL* result) {
876 UIThreadSearchTermsData search_terms_data(profile_);
877 // TODO(beaudoin): Use AQS from |search_terms_args| too.
878 url_parse::Parsed::ComponentType search_term_location;
879 url_parse::Component search_terms_position;
880 string16 search_terms;
881 if (!FindSearchTermsInURL(url, search_terms_data, &search_terms,
882 &search_term_location, &search_terms_position))
Peter Kasting 2013/03/22 20:20:06 Nit: Lines of args should all be aligned. Indent
beaudoin 2013/03/22 23:10:43 Done.
883 return false;
884 DCHECK(search_terms_position.is_nonempty());
885
886 // FindSearchTermsInURL only returns true for search terms in the query or
887 // ref, so we can call EncodeSearchTerm with |is_in_query| = true, since query
888 // and ref are encoded in the same way.
889 std::string input_encoding;
890 string16 encoded_terms;
891 string16 encoded_original_query;
892 EncodeSearchTerms(search_terms_args, true, &input_encoding,
893 &encoded_terms, &encoded_original_query);
894
895 url_canon::StdStringReplacements<std::string> replacements;
896 std::string new_params;
897 switch (search_term_location) {
Peter Kasting 2013/03/22 20:20:06 This switch can be eliminated, and the function sh
beaudoin 2013/03/22 23:10:43 Done.
898 case url_parse::Parsed::QUERY:
899 new_params += url.query().substr(0, search_terms_position.begin);
900 new_params += UTF16ToUTF8(search_terms_args.search_terms);
901 new_params.append(url.query().substr(search_terms_position.begin +
902 search_terms_position.len));
903 replacements.SetQueryStr(new_params);
904 break;
905
906 case url_parse::Parsed::REF:
907 new_params = url.ref().substr(0, search_terms_position.begin);
908 new_params += UTF16ToUTF8(search_terms_args.search_terms);
909 new_params += url.ref().substr(search_terms_position.begin +
910 search_terms_position.len);
911 replacements.SetRefStr(new_params);
912 break;
913
914 default:
915 NOTREACHED();
916 }
917
918 *result = url.ReplaceComponents(replacements);
919 return true;
920 }
921
922 void TemplateURL::EncodeSearchTerms(
923 const TemplateURLRef::SearchTermsArgs& search_terms_args,
924 bool is_in_query,
925 std::string* input_encoding,
926 string16* encoded_terms,
927 string16* encoded_original_query) const {
928 for (std::vector<std::string>::const_iterator i(input_encodings().begin());
929 i != input_encodings().end(); ++i) {
930 if (TryEncoding(search_terms_args.search_terms,
931 search_terms_args.original_query, i->c_str(),
932 is_in_query, encoded_terms, encoded_original_query)) {
933 *input_encoding = *i;
934 break;
935 }
936 }
937 if (input_encoding->empty()) {
Peter Kasting 2013/03/22 20:20:06 This last block can be avoided by rewriting the fu
beaudoin 2013/03/22 23:10:43 Done.
938 *input_encoding = "UTF-8";
939 if (!TryEncoding(search_terms_args.search_terms,
940 search_terms_args.original_query,
941 input_encoding->c_str(), is_in_query, encoded_terms,
942 encoded_original_query))
943 NOTREACHED();
944 }
945 }
946
900 void TemplateURL::CopyFrom(const TemplateURL& other) { 947 void TemplateURL::CopyFrom(const TemplateURL& other) {
901 if (this == &other) 948 if (this == &other)
902 return; 949 return;
903 950
904 profile_ = other.profile_; 951 profile_ = other.profile_;
905 data_ = other.data_; 952 data_ = other.data_;
906 url_ref_.InvalidateCachedValues(); 953 url_ref_.InvalidateCachedValues();
907 suggestions_url_ref_.InvalidateCachedValues(); 954 suggestions_url_ref_.InvalidateCachedValues();
908 instant_url_ref_.InvalidateCachedValues(); 955 instant_url_ref_.InvalidateCachedValues();
909 SetPrepopulateId(other.data_.prepopulate_id); 956 SetPrepopulateId(other.data_.prepopulate_id);
(...skipping 13 matching lines...) Expand all
923 } 970 }
924 971
925 void TemplateURL::ResetKeywordIfNecessary(bool force) { 972 void TemplateURL::ResetKeywordIfNecessary(bool force) {
926 if (IsGoogleSearchURLWithReplaceableKeyword() || force) { 973 if (IsGoogleSearchURLWithReplaceableKeyword() || force) {
927 DCHECK(!IsExtensionKeyword()); 974 DCHECK(!IsExtensionKeyword());
928 GURL url(TemplateURLService::GenerateSearchURL(this)); 975 GURL url(TemplateURLService::GenerateSearchURL(this));
929 if (url.is_valid()) 976 if (url.is_valid())
930 data_.SetKeyword(TemplateURLService::GenerateKeyword(url)); 977 data_.SetKeyword(TemplateURLService::GenerateKeyword(url));
931 } 978 }
932 } 979 }
980
981 bool TemplateURL::FindSearchTermsInURL(
982 const GURL& url,
983 const SearchTermsData& search_terms_data,
984 string16* search_terms,
985 url_parse::Parsed::ComponentType* search_term_location,
986 url_parse::Component* search_terms_position) {
987 DCHECK(search_terms);
988 search_terms->clear();
989
990 // Try to match with every pattern.
991 for (size_t i = 0; i < URLCount(); ++i) {
992 TemplateURLRef ref(this, i);
993 if (ref.ExtractSearchTermsFromURL(url, search_terms, search_terms_data,
994 search_term_location, search_terms_position)) {
995 // If ExtractSearchTermsFromURL() returns true and |search_terms| is empty
996 // it means the pattern matched but no search terms were present. In this
997 // case we fail immediately without looking for matches in subsequent
998 // patterns. This means that given patterns
999 // [ "http://foo/#q={searchTerms}", "http://foo/?q={searchTerms}" ],
1000 // calling ExtractSearchTermsFromURL() on "http://foo/?q=bar#q=' would
1001 // return false. This is important for at least Google, where such URLs
1002 // are invalid.
1003 return !search_terms->empty();
1004 }
1005 }
1006 return false;
1007 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698