Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(458)

Side by Side Diff: chrome/browser/search_engines/template_url.cc

Issue 10908226: Introduces a search term extraction mechanism working for arbitrary search providers. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Answered code review comments. Created 8 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/search_engines/template_url.h" 5 #include "chrome/browser/search_engines/template_url.h"
6 6
7 #include "base/guid.h" 7 #include "base/guid.h"
8 #include "base/i18n/case_conversion.h" 8 #include "base/i18n/case_conversion.h"
9 #include "base/i18n/icu_string_conversions.h" 9 #include "base/i18n/icu_string_conversions.h"
10 #include "base/i18n/rtl.h" 10 #include "base/i18n/rtl.h"
11 #include "base/logging.h" 11 #include "base/logging.h"
12 #include "base/metrics/field_trial.h" 12 #include "base/metrics/field_trial.h"
13 #include "base/string_number_conversions.h" 13 #include "base/string_number_conversions.h"
14 #include "base/string_split.h"
14 #include "base/string_util.h" 15 #include "base/string_util.h"
15 #include "base/stringprintf.h" 16 #include "base/stringprintf.h"
16 #include "base/utf_string_conversions.h" 17 #include "base/utf_string_conversions.h"
17 #include "chrome/browser/autocomplete/autocomplete_field_trial.h" 18 #include "chrome/browser/autocomplete/autocomplete_field_trial.h"
18 #include "chrome/browser/google/google_util.h" 19 #include "chrome/browser/google/google_util.h"
19 #include "chrome/browser/search_engines/search_terms_data.h" 20 #include "chrome/browser/search_engines/search_terms_data.h"
20 #include "chrome/browser/search_engines/template_url_service.h" 21 #include "chrome/browser/search_engines/template_url_service.h"
21 #include "chrome/common/url_constants.h" 22 #include "chrome/common/url_constants.h"
22 #include "google_apis/google_api_keys.h" 23 #include "google_apis/google_api_keys.h"
23 #include "net/base/escape.h" 24 #include "net/base/escape.h"
(...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after
116 : search_terms(search_terms), 117 : search_terms(search_terms),
117 accepted_suggestion(NO_SUGGESTIONS_AVAILABLE) { 118 accepted_suggestion(NO_SUGGESTIONS_AVAILABLE) {
118 } 119 }
119 120
120 121
121 // TemplateURLRef ------------------------------------------------------------- 122 // TemplateURLRef -------------------------------------------------------------
122 123
123 TemplateURLRef::TemplateURLRef(TemplateURL* owner, Type type) 124 TemplateURLRef::TemplateURLRef(TemplateURL* owner, Type type)
124 : owner_(owner), 125 : owner_(owner),
125 type_(type), 126 type_(type),
127 index_in_owner_(-1),
126 parsed_(false), 128 parsed_(false),
127 valid_(false), 129 valid_(false),
128 supports_replacements_(false), 130 supports_replacements_(false),
131 search_term_key_location_(url_parse::Parsed::QUERY),
129 prepopulated_(false) { 132 prepopulated_(false) {
130 DCHECK(owner_); 133 DCHECK(owner_);
134 DCHECK(type_ != INDEXED);
135 }
136
137 TemplateURLRef::TemplateURLRef(TemplateURL* owner, size_t index_in_owner)
138 : owner_(owner),
139 type_(INDEXED),
140 index_in_owner_(index_in_owner),
141 parsed_(false),
142 valid_(false),
143 supports_replacements_(false),
144 search_term_key_location_(url_parse::Parsed::QUERY),
145 prepopulated_(false) {
146 DCHECK(owner_);
147 DCHECK(index_in_owner_ >= 0L && index_in_owner_ < owner_->URLCount());
131 } 148 }
132 149
133 TemplateURLRef::~TemplateURLRef() { 150 TemplateURLRef::~TemplateURLRef() {
134 } 151 }
135 152
136 std::string TemplateURLRef::GetURL() const { 153 std::string TemplateURLRef::GetURL() const {
137 switch (type_) { 154 switch (type_) {
138 case SEARCH: return owner_->url(); 155 case SEARCH: return owner_->url();
139 case SUGGEST: return owner_->suggestions_url(); 156 case SUGGEST: return owner_->suggestions_url();
140 case INSTANT: return owner_->instant_url(); 157 case INSTANT: return owner_->instant_url();
158 case INDEXED: return owner_->GetURL(index_in_owner_);
141 default: NOTREACHED(); return std::string(); // NOLINT 159 default: NOTREACHED(); return std::string(); // NOLINT
142 } 160 }
143 } 161 }
144 162
145 bool TemplateURLRef::SupportsReplacement() const { 163 bool TemplateURLRef::SupportsReplacement() const {
146 UIThreadSearchTermsData search_terms_data(owner_->profile()); 164 UIThreadSearchTermsData search_terms_data(owner_->profile());
147 return SupportsReplacementUsingTermsData(search_terms_data); 165 return SupportsReplacementUsingTermsData(search_terms_data);
148 } 166 }
149 167
150 bool TemplateURLRef::SupportsReplacementUsingTermsData( 168 bool TemplateURLRef::SupportsReplacementUsingTermsData(
(...skipping 246 matching lines...) Expand 10 before | Expand all | Expand 10 after
397 bool TemplateURLRef::HasGoogleBaseURLs() const { 415 bool TemplateURLRef::HasGoogleBaseURLs() const {
398 ParseIfNecessary(); 416 ParseIfNecessary();
399 for (size_t i = 0; i < replacements_.size(); ++i) { 417 for (size_t i = 0; i < replacements_.size(); ++i) {
400 if ((replacements_[i].type == GOOGLE_BASE_URL) || 418 if ((replacements_[i].type == GOOGLE_BASE_URL) ||
401 (replacements_[i].type == GOOGLE_BASE_SUGGEST_URL)) 419 (replacements_[i].type == GOOGLE_BASE_SUGGEST_URL))
402 return true; 420 return true;
403 } 421 }
404 return false; 422 return false;
405 } 423 }
406 424
425
426 bool TemplateURLRef::ExtractSearchTermsFromURL(
427 const GURL& url, string16* search_terms) const {
428 DCHECK(search_terms);
429 search_terms->clear();
430
431 ParseIfNecessary();
432
433 // We need a search term in the template URL to extract something.
434 if (search_term_key_.empty())
435 return false;
436
437 // TODO(beaudoin): Support {Anything} parameter to act as a path wildcard.
438 // See crbug/139176
439
440 // Fill-in the replacements. We don't care about search terms in the pattern,
441 // so we use the empty string.
442 GURL pattern(ReplaceSearchTerms(SearchTermsArgs(string16())));
443 // Scheme, host, path and port must match.
444 if (!url.SchemeIs(pattern.scheme().c_str()) ||
445 url.port() != pattern.port() ||
446 url.host() != host_ ||
447 url.path() != path_) {
448 return false;
449 }
450
451 // Parameter must be present either in the query or the ref.
452 std::string params;
453 switch (search_term_key_location_) {
454 case url_parse::Parsed::QUERY:
455 params = url.query();
456 break;
457 case url_parse::Parsed::REF:
458 params = url.ref();
459 break;
460 default:
461 NOTREACHED();
462 return false;
463 }
464
465 url_parse::Component query, key, value;
466 query.len = static_cast<int>(params.size());
467 while (url_parse::ExtractQueryKeyValue(params.c_str(), &query, &key,
468 &value)) {
469 if (key.is_nonempty()) {
470 if (params.substr(key.begin, key.len) == search_term_key_) {
471 // Extract the search term.
472 *search_terms = net::UnescapeAndDecodeUTF8URLComponent(
473 params.substr(value.begin, value.len),
474 net::UnescapeRule::SPACES |
475 net::UnescapeRule::URL_SPECIAL_CHARS |
476 net::UnescapeRule::REPLACE_PLUS_WITH_SPACE,
477 NULL);
478 return true;
479 }
480 }
481 }
482 return false;
483 }
484
407 void TemplateURLRef::InvalidateCachedValues() const { 485 void TemplateURLRef::InvalidateCachedValues() const {
408 supports_replacements_ = valid_ = parsed_ = false; 486 supports_replacements_ = valid_ = parsed_ = false;
409 host_.clear(); 487 host_.clear();
410 path_.clear(); 488 path_.clear();
411 search_term_key_.clear(); 489 search_term_key_.clear();
412 replacements_.clear(); 490 replacements_.clear();
413 } 491 }
414 492
415 bool TemplateURLRef::ParseParameter(size_t start, 493 bool TemplateURLRef::ParseParameter(size_t start,
416 size_t end, 494 size_t end,
(...skipping 132 matching lines...) Expand 10 before | Expand all | Expand 10 after
549 void TemplateURLRef::ParseHostAndSearchTermKey( 627 void TemplateURLRef::ParseHostAndSearchTermKey(
550 const SearchTermsData& search_terms_data) const { 628 const SearchTermsData& search_terms_data) const {
551 std::string url_string(GetURL()); 629 std::string url_string(GetURL());
552 ReplaceSubstringsAfterOffset(&url_string, 0, 630 ReplaceSubstringsAfterOffset(&url_string, 0,
553 kGoogleBaseURLParameterFull, 631 kGoogleBaseURLParameterFull,
554 search_terms_data.GoogleBaseURLValue()); 632 search_terms_data.GoogleBaseURLValue());
555 ReplaceSubstringsAfterOffset(&url_string, 0, 633 ReplaceSubstringsAfterOffset(&url_string, 0,
556 kGoogleBaseSuggestURLParameterFull, 634 kGoogleBaseSuggestURLParameterFull,
557 search_terms_data.GoogleBaseSuggestURLValue()); 635 search_terms_data.GoogleBaseSuggestURLValue());
558 636
637 search_term_key_.clear();
638 host_.clear();
639 path_.clear();
640 search_term_key_location_ = url_parse::Parsed::REF;
641
559 GURL url(url_string); 642 GURL url(url_string);
560 if (!url.is_valid()) 643 if (!url.is_valid())
561 return; 644 return;
562 645
563 std::string query_string = url.query(); 646 // We want to prioritize search terms in the ref rather than ones in the
564 if (query_string.empty()) 647 // query.
565 return; 648 if (!url.ref().empty())
649 FindSearchTermsKey(url.ref());
566 650
651 // If not found in ref string, look for them in query.
652 if (search_term_key_.empty() && !url.query().empty()) {
653 search_term_key_location_ = url_parse::Parsed::QUERY;
654 FindSearchTermsKey(url.query());
655 }
656
657 if (!search_term_key_.empty()) {
658 host_ = url.host();
659 path_ = url.path();
660 }
661 }
662
663 void TemplateURLRef::FindSearchTermsKey(const std::string& params) const {
567 url_parse::Component query, key, value; 664 url_parse::Component query, key, value;
568 query.len = static_cast<int>(query_string.size()); 665 query.len = static_cast<int>(params.size());
569 while (url_parse::ExtractQueryKeyValue(query_string.c_str(), &query, &key, 666 while (url_parse::ExtractQueryKeyValue(params.c_str(), &query, &key,
570 &value)) { 667 &value)) {
571 if (key.is_nonempty() && value.is_nonempty()) { 668 if (key.is_nonempty() && value.is_nonempty()) {
572 std::string value_string = query_string.substr(value.begin, value.len); 669 std::string value_string = params.substr(value.begin, value.len);
573 if (value_string.find(kSearchTermsParameterFull, 0) != 670 if (value_string.find(kSearchTermsParameterFull, 0) !=
574 std::string::npos || 671 std::string::npos ||
575 value_string.find(kGoogleUnescapedSearchTermsParameterFull, 0) != 672 value_string.find(kGoogleUnescapedSearchTermsParameterFull, 0) !=
576 std::string::npos) { 673 std::string::npos) {
577 search_term_key_ = query_string.substr(key.begin, key.len); 674 search_term_key_ = params.substr(key.begin, key.len);
578 host_ = url.host();
579 path_ = url.path();
580 break; 675 break;
581 } 676 }
582 } 677 }
583 } 678 }
584 } 679 }
585 680
586 681
587 // TemplateURLData ------------------------------------------------------------ 682 // TemplateURLData ------------------------------------------------------------
588 683
589 TemplateURLData::TemplateURLData() 684 TemplateURLData::TemplateURLData()
(...skipping 19 matching lines...) Expand all
609 // Case sensitive keyword matching is confusing. As such, we force all 704 // Case sensitive keyword matching is confusing. As such, we force all
610 // keywords to be lower case. 705 // keywords to be lower case.
611 keyword_ = base::i18n::ToLower(keyword); 706 keyword_ = base::i18n::ToLower(keyword);
612 } 707 }
613 708
614 void TemplateURLData::SetURL(const std::string& url) { 709 void TemplateURLData::SetURL(const std::string& url) {
615 DCHECK(!url.empty()); 710 DCHECK(!url.empty());
616 url_ = url; 711 url_ = url;
617 } 712 }
618 713
714 std::string TemplateURLData::SerializeAlternateURLs() const {
715 std::string result;
716 for (size_t i = 0; i < alternate_urls_.size(); ++i) {
717 // Sanity check that the URL doesn't contain a comma.
718 DCHECK(alternate_urls_[i].find(',') == std::string::npos);
719 if (result.length() != 0)
720 result.append(",");
721 result.append(alternate_urls_[i]);
722 }
723 return result;
724 }
725
726 void TemplateURLData::DeserializeAndSetAlternateURLs(
727 const std::string& alternate_urls) {
728 base::SplitString(alternate_urls, ',', &alternate_urls_);
729 }
619 730
620 // TemplateURL ---------------------------------------------------------------- 731 // TemplateURL ----------------------------------------------------------------
621 732
622 TemplateURL::TemplateURL(Profile* profile, const TemplateURLData& data) 733 TemplateURL::TemplateURL(Profile* profile, const TemplateURLData& data)
623 : profile_(profile), 734 : profile_(profile),
624 data_(data), 735 data_(data),
625 url_ref_(ALLOW_THIS_IN_INITIALIZER_LIST(this), TemplateURLRef::SEARCH), 736 url_ref_(ALLOW_THIS_IN_INITIALIZER_LIST(this), TemplateURLRef::SEARCH),
626 suggestions_url_ref_(ALLOW_THIS_IN_INITIALIZER_LIST(this), 737 suggestions_url_ref_(ALLOW_THIS_IN_INITIALIZER_LIST(this),
627 TemplateURLRef::SUGGEST), 738 TemplateURLRef::SUGGEST),
628 instant_url_ref_(ALLOW_THIS_IN_INITIALIZER_LIST(this), 739 instant_url_ref_(ALLOW_THIS_IN_INITIALIZER_LIST(this),
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after
683 794
684 std::string TemplateURL::GetExtensionId() const { 795 std::string TemplateURL::GetExtensionId() const {
685 DCHECK(IsExtensionKeyword()); 796 DCHECK(IsExtensionKeyword());
686 return GURL(data_.url()).host(); 797 return GURL(data_.url()).host();
687 } 798 }
688 799
689 bool TemplateURL::IsExtensionKeyword() const { 800 bool TemplateURL::IsExtensionKeyword() const {
690 return GURL(data_.url()).SchemeIs(chrome::kExtensionScheme); 801 return GURL(data_.url()).SchemeIs(chrome::kExtensionScheme);
691 } 802 }
692 803
804 size_t TemplateURL::URLCount() const {
805 DCHECK(!url().empty());
806 // Add 1 for the regular search URL.
807 return data_.alternate_urls().size() + 1;
808 }
809
810 const std::string& TemplateURL::GetURL(size_t index) const {
811 DCHECK(!url().empty());
812 DCHECK(index >= 0 && index < URLCount());
813
814 if (index < data_.alternate_urls().size())
815 return data_.alternate_urls()[index];
816 return url();
817 }
818
819 bool TemplateURL::ExtractSearchTermsFromInstantExtendedURL(
820 const GURL& url, string16* search_terms) {
821 DCHECK(search_terms);
822 search_terms->clear();
823
824 // Ensure this is an instant extended URL.
825 std::string params = url.query();
826 url_parse::Component query, key, value;
827 query.len = static_cast<int>(params.size());
828 bool is_instant_extended = false;
829 while (url_parse::ExtractQueryKeyValue(params.c_str(), &query, &key,
830 &value)) {
831 if (!params.compare(key.begin, key.len,
832 google_util::kInstantExtendedAPIParam)) {
833 // If the parameter key is |kInstantExtendedAPIParam| and the value is
834 // not 0 this is an Instant Extended API search URL.
835 int int_value = 0;
836 if (value.is_nonempty())
837 base::StringToInt(params.substr(value.begin, value.len), &int_value);
838 if (int_value == 0)
839 return false;
840 is_instant_extended = true;
841 break;
842 }
843 }
844 if (!is_instant_extended)
845 return false;
846
847 // Then try to match with every pattern.
848 for (size_t i = 0; i < URLCount(); ++i) {
849 TemplateURLRef ref(this, i);
850 if (ref.ExtractSearchTermsFromURL(url, search_terms)) {
851 // Never accept an empty string as a valid result, but exit early if
852 // one is found. This ensures 'http://google.com/?q=foo#q=' fails.
853 return !search_terms->empty();
854 }
855 }
856 return false;
857 }
858
693 void TemplateURL::CopyFrom(const TemplateURL& other) { 859 void TemplateURL::CopyFrom(const TemplateURL& other) {
694 if (this == &other) 860 if (this == &other)
695 return; 861 return;
696 862
697 profile_ = other.profile_; 863 profile_ = other.profile_;
698 data_ = other.data_; 864 data_ = other.data_;
699 url_ref_.InvalidateCachedValues(); 865 url_ref_.InvalidateCachedValues();
700 suggestions_url_ref_.InvalidateCachedValues(); 866 suggestions_url_ref_.InvalidateCachedValues();
701 instant_url_ref_.InvalidateCachedValues(); 867 instant_url_ref_.InvalidateCachedValues();
702 SetPrepopulateId(other.data_.prepopulate_id); 868 SetPrepopulateId(other.data_.prepopulate_id);
(...skipping 13 matching lines...) Expand all
716 } 882 }
717 883
718 void TemplateURL::ResetKeywordIfNecessary(bool force) { 884 void TemplateURL::ResetKeywordIfNecessary(bool force) {
719 if (IsGoogleSearchURLWithReplaceableKeyword() || force) { 885 if (IsGoogleSearchURLWithReplaceableKeyword() || force) {
720 DCHECK(!IsExtensionKeyword()); 886 DCHECK(!IsExtensionKeyword());
721 GURL url(TemplateURLService::GenerateSearchURL(this)); 887 GURL url(TemplateURLService::GenerateSearchURL(this));
722 if (url.is_valid()) 888 if (url.is_valid())
723 data_.SetKeyword(TemplateURLService::GenerateKeyword(url)); 889 data_.SetKeyword(TemplateURLService::GenerateKeyword(url));
724 } 890 }
725 } 891 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698