Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(302)

Side by Side Diff: chrome/browser/search_engines/template_url.cc

Issue 10908226: Introduces a search term extraction mechanism working for arbitrary search providers. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Removed version_46.sql, committed separately. Created 8 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/search_engines/template_url.h" 5 #include "chrome/browser/search_engines/template_url.h"
6 6
7 #include "base/guid.h" 7 #include "base/guid.h"
8 #include "base/i18n/case_conversion.h" 8 #include "base/i18n/case_conversion.h"
9 #include "base/i18n/icu_string_conversions.h" 9 #include "base/i18n/icu_string_conversions.h"
10 #include "base/i18n/rtl.h" 10 #include "base/i18n/rtl.h"
(...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after
100 return true; 100 return true;
101 std::string encoded_original_query; 101 std::string encoded_original_query;
102 if (!base::UTF16ToCodepage(original_query, encoding, 102 if (!base::UTF16ToCodepage(original_query, encoding,
103 base::OnStringConversionError::SKIP, &encoded_original_query)) 103 base::OnStringConversionError::SKIP, &encoded_original_query))
104 return false; 104 return false;
105 *escaped_original_query = 105 *escaped_original_query =
106 UTF8ToUTF16(net::EscapeQueryParamValue(encoded_original_query, true)); 106 UTF8ToUTF16(net::EscapeQueryParamValue(encoded_original_query, true));
107 return true; 107 return true;
108 } 108 }
109 109
110 // Extract query key and host given a list of parameters coming from the URL
111 // query or ref.
112 std::string FindSearchTermsKey(const std::string& params) {
113 if (params.empty())
114 return std::string();
115 url_parse::Component query, key, value;
116 query.len = static_cast<int>(params.size());
117 while (url_parse::ExtractQueryKeyValue(params.c_str(), &query, &key,
118 &value)) {
119 if (key.is_nonempty() && value.is_nonempty()) {
120 std::string value_string = params.substr(value.begin, value.len);
121 if (value_string.find(kSearchTermsParameterFull, 0) !=
122 std::string::npos ||
123 value_string.find(kGoogleUnescapedSearchTermsParameterFull, 0) !=
124 std::string::npos) {
125 return params.substr(key.begin, key.len);
126 }
127 }
128 }
129 return std::string();
130 }
131
110 } // namespace 132 } // namespace
111 133
112 134
113 // TemplateURLRef::SearchTermsArgs -------------------------------------------- 135 // TemplateURLRef::SearchTermsArgs --------------------------------------------
114 136
115 TemplateURLRef::SearchTermsArgs::SearchTermsArgs(const string16& search_terms) 137 TemplateURLRef::SearchTermsArgs::SearchTermsArgs(const string16& search_terms)
116 : search_terms(search_terms), 138 : search_terms(search_terms),
117 accepted_suggestion(NO_SUGGESTIONS_AVAILABLE) { 139 accepted_suggestion(NO_SUGGESTIONS_AVAILABLE) {
118 } 140 }
119 141
120 142
121 // TemplateURLRef ------------------------------------------------------------- 143 // TemplateURLRef -------------------------------------------------------------
122 144
123 TemplateURLRef::TemplateURLRef(TemplateURL* owner, Type type) 145 TemplateURLRef::TemplateURLRef(TemplateURL* owner, Type type)
124 : owner_(owner), 146 : owner_(owner),
125 type_(type), 147 type_(type),
148 index_in_owner_(-1),
126 parsed_(false), 149 parsed_(false),
127 valid_(false), 150 valid_(false),
128 supports_replacements_(false), 151 supports_replacements_(false),
152 search_term_key_location_(url_parse::Parsed::QUERY),
129 prepopulated_(false) { 153 prepopulated_(false) {
130 DCHECK(owner_); 154 DCHECK(owner_);
155 DCHECK_NE(INDEXED, type_);
156 }
157
158 TemplateURLRef::TemplateURLRef(TemplateURL* owner, size_t index_in_owner)
159 : owner_(owner),
160 type_(INDEXED),
161 index_in_owner_(index_in_owner),
162 parsed_(false),
163 valid_(false),
164 supports_replacements_(false),
165 search_term_key_location_(url_parse::Parsed::QUERY),
166 prepopulated_(false) {
167 DCHECK(owner_);
168 DCHECK_LT(index_in_owner_, owner_->URLCount());
131 } 169 }
132 170
133 TemplateURLRef::~TemplateURLRef() { 171 TemplateURLRef::~TemplateURLRef() {
134 } 172 }
135 173
136 std::string TemplateURLRef::GetURL() const { 174 std::string TemplateURLRef::GetURL() const {
137 switch (type_) { 175 switch (type_) {
138 case SEARCH: return owner_->url(); 176 case SEARCH: return owner_->url();
139 case SUGGEST: return owner_->suggestions_url(); 177 case SUGGEST: return owner_->suggestions_url();
140 case INSTANT: return owner_->instant_url(); 178 case INSTANT: return owner_->instant_url();
179 case INDEXED: return owner_->GetURL(index_in_owner_);
141 default: NOTREACHED(); return std::string(); // NOLINT 180 default: NOTREACHED(); return std::string(); // NOLINT
142 } 181 }
143 } 182 }
144 183
145 bool TemplateURLRef::SupportsReplacement() const { 184 bool TemplateURLRef::SupportsReplacement() const {
146 UIThreadSearchTermsData search_terms_data(owner_->profile()); 185 UIThreadSearchTermsData search_terms_data(owner_->profile());
147 return SupportsReplacementUsingTermsData(search_terms_data); 186 return SupportsReplacementUsingTermsData(search_terms_data);
148 } 187 }
149 188
150 bool TemplateURLRef::SupportsReplacementUsingTermsData( 189 bool TemplateURLRef::SupportsReplacementUsingTermsData(
(...skipping 246 matching lines...) Expand 10 before | Expand all | Expand 10 after
397 bool TemplateURLRef::HasGoogleBaseURLs() const { 436 bool TemplateURLRef::HasGoogleBaseURLs() const {
398 ParseIfNecessary(); 437 ParseIfNecessary();
399 for (size_t i = 0; i < replacements_.size(); ++i) { 438 for (size_t i = 0; i < replacements_.size(); ++i) {
400 if ((replacements_[i].type == GOOGLE_BASE_URL) || 439 if ((replacements_[i].type == GOOGLE_BASE_URL) ||
401 (replacements_[i].type == GOOGLE_BASE_SUGGEST_URL)) 440 (replacements_[i].type == GOOGLE_BASE_SUGGEST_URL))
402 return true; 441 return true;
403 } 442 }
404 return false; 443 return false;
405 } 444 }
406 445
446 bool TemplateURLRef::ExtractSearchTermsFromURL(const GURL& url,
447 string16* search_terms) const {
448 DCHECK(search_terms);
449 search_terms->clear();
450
451 ParseIfNecessary();
452
453 // We need a search term in the template URL to extract something.
454 if (search_term_key_.empty())
455 return false;
456
457 // TODO(beaudoin): Support patterns of the form http://foo/{searchTerms}/
458 // See crbug.com/153798
459
460 // Fill-in the replacements. We don't care about search terms in the pattern,
461 // so we use the empty string.
462 GURL pattern(ReplaceSearchTerms(SearchTermsArgs(string16())));
463 // Scheme, host, path and port must match.
464 if (!url.SchemeIs(pattern.scheme().c_str()) ||
465 url.port() != pattern.port() ||
466 url.host() != host_ ||
467 url.path() != path_) {
468 return false;
469 }
470
471 // Parameter must be present either in the query or the ref.
472 const std::string& params(
473 (search_term_key_location_ == url_parse::Parsed::QUERY) ?
474 url.query() : url.ref());
475
476 url_parse::Component query, key, value;
477 query.len = static_cast<int>(params.size());
478 while (url_parse::ExtractQueryKeyValue(params.c_str(), &query, &key,
479 &value)) {
480 if (key.is_nonempty()) {
481 if (params.substr(key.begin, key.len) == search_term_key_) {
482 // Extract the search term.
483 *search_terms = net::UnescapeAndDecodeUTF8URLComponent(
484 params.substr(value.begin, value.len),
485 net::UnescapeRule::SPACES |
486 net::UnescapeRule::URL_SPECIAL_CHARS |
487 net::UnescapeRule::REPLACE_PLUS_WITH_SPACE,
488 NULL);
489 return true;
490 }
491 }
492 }
493 return false;
494 }
495
407 void TemplateURLRef::InvalidateCachedValues() const { 496 void TemplateURLRef::InvalidateCachedValues() const {
408 supports_replacements_ = valid_ = parsed_ = false; 497 supports_replacements_ = valid_ = parsed_ = false;
409 host_.clear(); 498 host_.clear();
410 path_.clear(); 499 path_.clear();
411 search_term_key_.clear(); 500 search_term_key_.clear();
412 replacements_.clear(); 501 replacements_.clear();
413 } 502 }
414 503
415 bool TemplateURLRef::ParseParameter(size_t start, 504 bool TemplateURLRef::ParseParameter(size_t start,
416 size_t end, 505 size_t end,
(...skipping 132 matching lines...) Expand 10 before | Expand all | Expand 10 after
549 void TemplateURLRef::ParseHostAndSearchTermKey( 638 void TemplateURLRef::ParseHostAndSearchTermKey(
550 const SearchTermsData& search_terms_data) const { 639 const SearchTermsData& search_terms_data) const {
551 std::string url_string(GetURL()); 640 std::string url_string(GetURL());
552 ReplaceSubstringsAfterOffset(&url_string, 0, 641 ReplaceSubstringsAfterOffset(&url_string, 0,
553 kGoogleBaseURLParameterFull, 642 kGoogleBaseURLParameterFull,
554 search_terms_data.GoogleBaseURLValue()); 643 search_terms_data.GoogleBaseURLValue());
555 ReplaceSubstringsAfterOffset(&url_string, 0, 644 ReplaceSubstringsAfterOffset(&url_string, 0,
556 kGoogleBaseSuggestURLParameterFull, 645 kGoogleBaseSuggestURLParameterFull,
557 search_terms_data.GoogleBaseSuggestURLValue()); 646 search_terms_data.GoogleBaseSuggestURLValue());
558 647
648 search_term_key_.clear();
649 host_.clear();
650 path_.clear();
651 search_term_key_location_ = url_parse::Parsed::REF;
652
559 GURL url(url_string); 653 GURL url(url_string);
560 if (!url.is_valid()) 654 if (!url.is_valid())
561 return; 655 return;
562 656
563 std::string query_string = url.query(); 657 std::string query_key = FindSearchTermsKey(url.query());
564 if (query_string.empty()) 658 std::string ref_key = FindSearchTermsKey(url.ref());
565 return; 659 if (query_key.empty() == ref_key.empty())
566 660 return; // No key or multiple keys found. We only handle having one key.
567 url_parse::Component query, key, value; 661 search_term_key_ = query_key.empty() ? ref_key : query_key;
568 query.len = static_cast<int>(query_string.size()); 662 search_term_key_location_ = query_key.empty() ?
569 while (url_parse::ExtractQueryKeyValue(query_string.c_str(), &query, &key, 663 url_parse::Parsed::REF : url_parse::Parsed::QUERY;
570 &value)) { 664 host_ = url.host();
571 if (key.is_nonempty() && value.is_nonempty()) { 665 path_ = url.path();
572 std::string value_string = query_string.substr(value.begin, value.len);
573 if (value_string.find(kSearchTermsParameterFull, 0) !=
574 std::string::npos ||
575 value_string.find(kGoogleUnescapedSearchTermsParameterFull, 0) !=
576 std::string::npos) {
577 search_term_key_ = query_string.substr(key.begin, key.len);
578 host_ = url.host();
579 path_ = url.path();
580 break;
581 }
582 }
583 }
584 } 666 }
585 667
586 668
587 // TemplateURLData ------------------------------------------------------------ 669 // TemplateURLData ------------------------------------------------------------
588 670
589 TemplateURLData::TemplateURLData() 671 TemplateURLData::TemplateURLData()
590 : show_in_default_list(false), 672 : show_in_default_list(false),
591 safe_for_autoreplace(false), 673 safe_for_autoreplace(false),
592 id(0), 674 id(0),
593 date_created(base::Time::Now()), 675 date_created(base::Time::Now()),
(...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after
683 765
684 std::string TemplateURL::GetExtensionId() const { 766 std::string TemplateURL::GetExtensionId() const {
685 DCHECK(IsExtensionKeyword()); 767 DCHECK(IsExtensionKeyword());
686 return GURL(data_.url()).host(); 768 return GURL(data_.url()).host();
687 } 769 }
688 770
689 bool TemplateURL::IsExtensionKeyword() const { 771 bool TemplateURL::IsExtensionKeyword() const {
690 return GURL(data_.url()).SchemeIs(chrome::kExtensionScheme); 772 return GURL(data_.url()).SchemeIs(chrome::kExtensionScheme);
691 } 773 }
692 774
775 size_t TemplateURL::URLCount() const {
776 // Add 1 for the regular search URL.
777 return data_.alternate_urls.size() + 1;
778 }
779
780 const std::string& TemplateURL::GetURL(size_t index) const {
781 DCHECK_LT(index, URLCount());
782
783 return (index < data_.alternate_urls.size()) ?
784 data_.alternate_urls[index] : url();
785 }
786
787 bool TemplateURL::ExtractSearchTermsFromURL(
788 const GURL& url, string16* search_terms) {
789 DCHECK(search_terms);
790 search_terms->clear();
791
792 // Then try to match with every pattern.
793 for (size_t i = 0; i < URLCount(); ++i) {
794 TemplateURLRef ref(this, i);
795 if (ref.ExtractSearchTermsFromURL(url, search_terms)) {
796 // If ExtractSearchTermsFromURL() returns true and |search_terms| is empty
797 // it means the pattern matched but no search terms were present. In this
798 // case we fail immediately without looking for matches in subsequent
799 // patterns. This means that given patterns
800 // [ "http://foo/#q={searchTerms}", "http://foo/?q={searchTerms}" ],
801 // calling ExtractSearchTermsFromURL() on "http://foo/?q=bar#q=' would
802 // return false. This is important for at least Google, where such URLs
803 // are invalid.
804 return !search_terms->empty();
805 }
806 }
807 return false;
808 }
809
693 void TemplateURL::CopyFrom(const TemplateURL& other) { 810 void TemplateURL::CopyFrom(const TemplateURL& other) {
694 if (this == &other) 811 if (this == &other)
695 return; 812 return;
696 813
697 profile_ = other.profile_; 814 profile_ = other.profile_;
698 data_ = other.data_; 815 data_ = other.data_;
699 url_ref_.InvalidateCachedValues(); 816 url_ref_.InvalidateCachedValues();
700 suggestions_url_ref_.InvalidateCachedValues(); 817 suggestions_url_ref_.InvalidateCachedValues();
701 instant_url_ref_.InvalidateCachedValues(); 818 instant_url_ref_.InvalidateCachedValues();
702 SetPrepopulateId(other.data_.prepopulate_id); 819 SetPrepopulateId(other.data_.prepopulate_id);
(...skipping 13 matching lines...) Expand all
716 } 833 }
717 834
718 void TemplateURL::ResetKeywordIfNecessary(bool force) { 835 void TemplateURL::ResetKeywordIfNecessary(bool force) {
719 if (IsGoogleSearchURLWithReplaceableKeyword() || force) { 836 if (IsGoogleSearchURLWithReplaceableKeyword() || force) {
720 DCHECK(!IsExtensionKeyword()); 837 DCHECK(!IsExtensionKeyword());
721 GURL url(TemplateURLService::GenerateSearchURL(this)); 838 GURL url(TemplateURLService::GenerateSearchURL(this));
722 if (url.is_valid()) 839 if (url.is_valid())
723 data_.SetKeyword(TemplateURLService::GenerateKeyword(url)); 840 data_.SetKeyword(TemplateURLService::GenerateKeyword(url));
724 } 841 }
725 } 842 }
OLDNEW
« no previous file with comments | « chrome/browser/search_engines/template_url.h ('k') | chrome/browser/search_engines/template_url_prepopulate_data.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698