Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(171)

Side by Side Diff: chrome/browser/search_engines/template_url.cc

Issue 10908226: Introduces a search term extraction mechanism working for arbitrary search providers. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Fixed dominich comments. Created 8 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/search_engines/template_url.h" 5 #include "chrome/browser/search_engines/template_url.h"
6 6
7 #include "base/guid.h" 7 #include "base/guid.h"
8 #include "base/i18n/case_conversion.h" 8 #include "base/i18n/case_conversion.h"
9 #include "base/i18n/icu_string_conversions.h" 9 #include "base/i18n/icu_string_conversions.h"
10 #include "base/i18n/rtl.h" 10 #include "base/i18n/rtl.h"
11 #include "base/logging.h" 11 #include "base/logging.h"
12 #include "base/metrics/field_trial.h" 12 #include "base/metrics/field_trial.h"
13 #include "base/string_number_conversions.h" 13 #include "base/string_number_conversions.h"
14 #include "base/string_split.h"
14 #include "base/string_util.h" 15 #include "base/string_util.h"
15 #include "base/stringprintf.h" 16 #include "base/stringprintf.h"
16 #include "base/utf_string_conversions.h" 17 #include "base/utf_string_conversions.h"
17 #include "chrome/browser/autocomplete/autocomplete_field_trial.h" 18 #include "chrome/browser/autocomplete/autocomplete_field_trial.h"
18 #include "chrome/browser/google/google_util.h" 19 #include "chrome/browser/google/google_util.h"
19 #include "chrome/browser/search_engines/search_terms_data.h" 20 #include "chrome/browser/search_engines/search_terms_data.h"
20 #include "chrome/browser/search_engines/template_url_service.h" 21 #include "chrome/browser/search_engines/template_url_service.h"
21 #include "chrome/common/url_constants.h" 22 #include "chrome/common/url_constants.h"
22 #include "net/base/escape.h" 23 #include "net/base/escape.h"
23 #include "ui/base/l10n/l10n_util.h" 24 #include "ui/base/l10n/l10n_util.h"
(...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after
114 : search_terms(search_terms), 115 : search_terms(search_terms),
115 accepted_suggestion(NO_SUGGESTIONS_AVAILABLE) { 116 accepted_suggestion(NO_SUGGESTIONS_AVAILABLE) {
116 } 117 }
117 118
118 119
119 // TemplateURLRef ------------------------------------------------------------- 120 // TemplateURLRef -------------------------------------------------------------
120 121
121 TemplateURLRef::TemplateURLRef(TemplateURL* owner, Type type) 122 TemplateURLRef::TemplateURLRef(TemplateURL* owner, Type type)
122 : owner_(owner), 123 : owner_(owner),
123 type_(type), 124 type_(type),
125 index_in_owner_(-1),
124 parsed_(false), 126 parsed_(false),
125 valid_(false), 127 valid_(false),
126 supports_replacements_(false), 128 supports_replacements_(false),
129 search_term_key_location_(url_parse::Parsed::QUERY),
127 prepopulated_(false) { 130 prepopulated_(false) {
128 DCHECK(owner_); 131 DCHECK(owner_);
132 DCHECK(type_ != INDEXED);
133 }
134
135 TemplateURLRef::TemplateURLRef(TemplateURL* owner, size_t index_in_owner)
136 : owner_(owner),
137 type_(INDEXED),
138 index_in_owner_(index_in_owner),
139 parsed_(false),
140 valid_(false),
141 supports_replacements_(false),
142 search_term_key_location_(url_parse::Parsed::QUERY),
143 prepopulated_(false) {
144 DCHECK(owner_);
145 DCHECK(index_in_owner_ >= 0L && index_in_owner_ < owner_->URLCount());
129 } 146 }
130 147
131 TemplateURLRef::~TemplateURLRef() { 148 TemplateURLRef::~TemplateURLRef() {
132 } 149 }
133 150
134 std::string TemplateURLRef::GetURL() const { 151 std::string TemplateURLRef::GetURL() const {
135 switch (type_) { 152 switch (type_) {
136 case SEARCH: return owner_->url(); 153 case SEARCH: return owner_->url();
137 case SUGGEST: return owner_->suggestions_url(); 154 case SUGGEST: return owner_->suggestions_url();
138 case INSTANT: return owner_->instant_url(); 155 case INSTANT: return owner_->instant_url();
139 default: NOTREACHED(); return std::string(); 156 case INDEXED: return owner_->GetURL(index_in_owner_);
157 default:
158 NOTREACHED();
159 return std::string();
140 } 160 }
141 } 161 }
142 162
143 bool TemplateURLRef::SupportsReplacement() const { 163 bool TemplateURLRef::SupportsReplacement() const {
144 UIThreadSearchTermsData search_terms_data(owner_->profile()); 164 UIThreadSearchTermsData search_terms_data(owner_->profile());
145 return SupportsReplacementUsingTermsData(search_terms_data); 165 return SupportsReplacementUsingTermsData(search_terms_data);
146 } 166 }
147 167
148 bool TemplateURLRef::SupportsReplacementUsingTermsData( 168 bool TemplateURLRef::SupportsReplacementUsingTermsData(
149 const SearchTermsData& search_terms_data) const { 169 const SearchTermsData& search_terms_data) const {
(...skipping 245 matching lines...) Expand 10 before | Expand all | Expand 10 after
395 bool TemplateURLRef::HasGoogleBaseURLs() const { 415 bool TemplateURLRef::HasGoogleBaseURLs() const {
396 ParseIfNecessary(); 416 ParseIfNecessary();
397 for (size_t i = 0; i < replacements_.size(); ++i) { 417 for (size_t i = 0; i < replacements_.size(); ++i) {
398 if ((replacements_[i].type == GOOGLE_BASE_URL) || 418 if ((replacements_[i].type == GOOGLE_BASE_URL) ||
399 (replacements_[i].type == GOOGLE_BASE_SUGGEST_URL)) 419 (replacements_[i].type == GOOGLE_BASE_SUGGEST_URL))
400 return true; 420 return true;
401 } 421 }
402 return false; 422 return false;
403 } 423 }
404 424
425 string16 TemplateURLRef::ExtractSearchTermsFromURL(const GURL& url) const {
426 ParseIfNecessary();
427
428 // We need a search term in the template URL to extract something.
429 if (search_term_key_.empty())
430 return string16();
431
432 // TODO(beaudoin): Support {Anything} parameter to act as a path wildcard.
433 // See crbug/139176
434
435 // Fill-in the replacements. We don't care about search terms in the pattern,
436 // so we use the empty string.
437 GURL pattern(ReplaceSearchTerms(SearchTermsArgs(string16())));
438 // Scheme, host, path and port must match.
439 if (!url.SchemeIs(pattern.scheme().c_str()) ||
440 url.port() != pattern.port() ||
441 url.host() != host_ ||
442 url.path() != path_) {
443 return string16();
444 }
445
446 // Parameter must be present either in the query or the ref.
447 std::string params;
448 switch (search_term_key_location_) {
449 case url_parse::Parsed::QUERY:
450 params = url.query();
451 break;
452 case url_parse::Parsed::REF:
453 params = url.ref();
454 break;
455 default:
456 NOTREACHED();
457 return string16();
458 }
459
460 url_parse::Component query, key, value;
461 query.len = static_cast<int>(params.size());
462 while (url_parse::ExtractQueryKeyValue(params.c_str(), &query, &key,
463 &value)) {
464 if (key.is_nonempty() && value.is_nonempty()) {
465 if (params.substr(key.begin, key.len) == search_term_key_) {
466 // Extract the search term.
467 return net::UnescapeAndDecodeUTF8URLComponent(
468 params.substr(value.begin, value.len),
469 net::UnescapeRule::SPACES |
470 net::UnescapeRule::URL_SPECIAL_CHARS |
471 net::UnescapeRule::REPLACE_PLUS_WITH_SPACE,
472 NULL);
473 }
474 }
475 }
476 return string16();
477 }
478
405 void TemplateURLRef::InvalidateCachedValues() const { 479 void TemplateURLRef::InvalidateCachedValues() const {
406 supports_replacements_ = valid_ = parsed_ = false; 480 supports_replacements_ = valid_ = parsed_ = false;
407 host_.clear(); 481 host_.clear();
408 path_.clear(); 482 path_.clear();
409 search_term_key_.clear(); 483 search_term_key_.clear();
410 replacements_.clear(); 484 replacements_.clear();
411 } 485 }
412 486
413 bool TemplateURLRef::ParseParameter(size_t start, 487 bool TemplateURLRef::ParseParameter(size_t start,
414 size_t end, 488 size_t end,
(...skipping 129 matching lines...) Expand 10 before | Expand all | Expand 10 after
544 void TemplateURLRef::ParseHostAndSearchTermKey( 618 void TemplateURLRef::ParseHostAndSearchTermKey(
545 const SearchTermsData& search_terms_data) const { 619 const SearchTermsData& search_terms_data) const {
546 std::string url_string(GetURL()); 620 std::string url_string(GetURL());
547 ReplaceSubstringsAfterOffset(&url_string, 0, 621 ReplaceSubstringsAfterOffset(&url_string, 0,
548 kGoogleBaseURLParameterFull, 622 kGoogleBaseURLParameterFull,
549 search_terms_data.GoogleBaseURLValue()); 623 search_terms_data.GoogleBaseURLValue());
550 ReplaceSubstringsAfterOffset(&url_string, 0, 624 ReplaceSubstringsAfterOffset(&url_string, 0,
551 kGoogleBaseSuggestURLParameterFull, 625 kGoogleBaseSuggestURLParameterFull,
552 search_terms_data.GoogleBaseSuggestURLValue()); 626 search_terms_data.GoogleBaseSuggestURLValue());
553 627
628 search_term_key_.clear();
629 host_.clear();
630 path_.clear();
631 search_term_key_location_ = url_parse::Parsed::REF;
632
554 GURL url(url_string); 633 GURL url(url_string);
555 if (!url.is_valid()) 634 if (!url.is_valid())
556 return; 635 return;
557 636
558 std::string query_string = url.query(); 637 // We want to prioritize search terms in the ref rather than ones in the
559 if (query_string.empty()) 638 // query.
560 return; 639 if (!url.ref().empty())
640 FindSearchTermsKey(url.ref());
561 641
642 // If not found in ref string, look for them in query.
643 if (search_term_key_.empty() && !url.query().empty()) {
644 search_term_key_location_ = url_parse::Parsed::QUERY;
645 FindSearchTermsKey(url.query());
646 }
647
648 if (!search_term_key_.empty()) {
649 host_ = url.host();
650 path_ = url.path();
651 }
652 }
653
654 void TemplateURLRef::FindSearchTermsKey(const std::string& params) const {
562 url_parse::Component query, key, value; 655 url_parse::Component query, key, value;
563 query.len = static_cast<int>(query_string.size()); 656 query.len = static_cast<int>(params.size());
564 while (url_parse::ExtractQueryKeyValue(query_string.c_str(), &query, &key, 657 while (url_parse::ExtractQueryKeyValue(params.c_str(), &query, &key,
565 &value)) { 658 &value)) {
566 if (key.is_nonempty() && value.is_nonempty()) { 659 if (key.is_nonempty() && value.is_nonempty()) {
567 std::string value_string = query_string.substr(value.begin, value.len); 660 std::string value_string = params.substr(value.begin, value.len);
568 if (value_string.find(kSearchTermsParameterFull, 0) != 661 if (value_string.find(kSearchTermsParameterFull, 0) !=
569 std::string::npos || 662 std::string::npos ||
570 value_string.find(kGoogleUnescapedSearchTermsParameterFull, 0) != 663 value_string.find(kGoogleUnescapedSearchTermsParameterFull, 0) !=
571 std::string::npos) { 664 std::string::npos) {
572 search_term_key_ = query_string.substr(key.begin, key.len); 665 search_term_key_ = params.substr(key.begin, key.len);
573 host_ = url.host();
574 path_ = url.path();
575 break; 666 break;
576 } 667 }
577 } 668 }
578 } 669 }
579 } 670 }
580 671
581 672
582 // TemplateURLData ------------------------------------------------------------ 673 // TemplateURLData ------------------------------------------------------------
583 674
584 TemplateURLData::TemplateURLData() 675 TemplateURLData::TemplateURLData()
(...skipping 19 matching lines...) Expand all
604 // Case sensitive keyword matching is confusing. As such, we force all 695 // Case sensitive keyword matching is confusing. As such, we force all
605 // keywords to be lower case. 696 // keywords to be lower case.
606 keyword_ = base::i18n::ToLower(keyword); 697 keyword_ = base::i18n::ToLower(keyword);
607 } 698 }
608 699
609 void TemplateURLData::SetURL(const std::string& url) { 700 void TemplateURLData::SetURL(const std::string& url) {
610 DCHECK(!url.empty()); 701 DCHECK(!url.empty());
611 url_ = url; 702 url_ = url;
612 } 703 }
613 704
705 std::string TemplateURLData::SerializeAlternateURLs() const {
706 std::string result;
707 for (size_t i = 0; i < alternate_urls_.size(); ++i) {
708 // Sanity check that the URL doesn't contain a comma.
709 DCHECK(alternate_urls_[i].find(',') == std::string::npos);
710 if (result.length() != 0)
711 result.append(",");
712 result.append(alternate_urls_[i]);
713 }
714 return result;
715 }
716
717 void TemplateURLData::DeserializeAndSetAlternateURLs(
718 const std::string& alternate_urls) {
719 base::SplitString(alternate_urls, ',', &alternate_urls_);
720 }
614 721
615 // TemplateURL ---------------------------------------------------------------- 722 // TemplateURL ----------------------------------------------------------------
616 723
617 TemplateURL::TemplateURL(Profile* profile, const TemplateURLData& data) 724 TemplateURL::TemplateURL(Profile* profile, const TemplateURLData& data)
618 : profile_(profile), 725 : profile_(profile),
619 data_(data), 726 data_(data),
620 url_ref_(ALLOW_THIS_IN_INITIALIZER_LIST(this), TemplateURLRef::SEARCH), 727 url_ref_(ALLOW_THIS_IN_INITIALIZER_LIST(this), TemplateURLRef::SEARCH),
621 suggestions_url_ref_(ALLOW_THIS_IN_INITIALIZER_LIST(this), 728 suggestions_url_ref_(ALLOW_THIS_IN_INITIALIZER_LIST(this),
622 TemplateURLRef::SUGGEST), 729 TemplateURLRef::SUGGEST),
623 instant_url_ref_(ALLOW_THIS_IN_INITIALIZER_LIST(this), 730 instant_url_ref_(ALLOW_THIS_IN_INITIALIZER_LIST(this),
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after
678 785
679 std::string TemplateURL::GetExtensionId() const { 786 std::string TemplateURL::GetExtensionId() const {
680 DCHECK(IsExtensionKeyword()); 787 DCHECK(IsExtensionKeyword());
681 return GURL(data_.url()).host(); 788 return GURL(data_.url()).host();
682 } 789 }
683 790
684 bool TemplateURL::IsExtensionKeyword() const { 791 bool TemplateURL::IsExtensionKeyword() const {
685 return GURL(data_.url()).SchemeIs(chrome::kExtensionScheme); 792 return GURL(data_.url()).SchemeIs(chrome::kExtensionScheme);
686 } 793 }
687 794
795 size_t TemplateURL::URLCount() const {
796 DCHECK(!url().empty());
797
798 int count = 1; // At least one for url().
799 if (!instant_url().empty())
800 count++;
801 count += data_.alternate_urls().size();
802 return count;
803 }
804
805 const std::string& TemplateURL::GetURL(size_t index) const {
806 DCHECK(!url().empty());
807 DCHECK(index >= 0);
808
809 if (index < data_.alternate_urls().size())
810 return data_.alternate_urls()[index];
811
812 index -= data_.alternate_urls().size();
813 if (!instant_url().empty()) {
814 if (index == 0)
815 return instant_url();
816 index--;
817 }
818 DCHECK(index == 0);
819 return url();
820 }
821
822 string16 TemplateURL::ExtractSearchTermsFromURL(const GURL& url) {
823 for (size_t i = 0; i < URLCount(); ++i) {
824 TemplateURLRef ref(this, i);
825 string16 result(ref.ExtractSearchTermsFromURL(url));
826 if (!result.empty())
827 return result;
828 }
829 return string16();
830 }
831
688 void TemplateURL::CopyFrom(const TemplateURL& other) { 832 void TemplateURL::CopyFrom(const TemplateURL& other) {
689 if (this == &other) 833 if (this == &other)
690 return; 834 return;
691 835
692 profile_ = other.profile_; 836 profile_ = other.profile_;
693 data_ = other.data_; 837 data_ = other.data_;
694 url_ref_.InvalidateCachedValues(); 838 url_ref_.InvalidateCachedValues();
695 suggestions_url_ref_.InvalidateCachedValues(); 839 suggestions_url_ref_.InvalidateCachedValues();
696 instant_url_ref_.InvalidateCachedValues(); 840 instant_url_ref_.InvalidateCachedValues();
697 SetPrepopulateId(other.data_.prepopulate_id); 841 SetPrepopulateId(other.data_.prepopulate_id);
(...skipping 13 matching lines...) Expand all
711 } 855 }
712 856
713 void TemplateURL::ResetKeywordIfNecessary(bool force) { 857 void TemplateURL::ResetKeywordIfNecessary(bool force) {
714 if (IsGoogleSearchURLWithReplaceableKeyword() || force) { 858 if (IsGoogleSearchURLWithReplaceableKeyword() || force) {
715 DCHECK(!IsExtensionKeyword()); 859 DCHECK(!IsExtensionKeyword());
716 GURL url(TemplateURLService::GenerateSearchURL(this)); 860 GURL url(TemplateURLService::GenerateSearchURL(this));
717 if (url.is_valid()) 861 if (url.is_valid())
718 data_.SetKeyword(TemplateURLService::GenerateKeyword(url)); 862 data_.SetKeyword(TemplateURLService::GenerateKeyword(url));
719 } 863 }
720 } 864 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698