Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(24)

Side by Side Diff: chrome/browser/search_engines/template_url.cc

Issue 10908226: Introduces a search term extraction mechanism working for arbitrary search providers. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Rebased Created 8 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/search_engines/template_url.h" 5 #include "chrome/browser/search_engines/template_url.h"
6 6
7 #include "base/guid.h" 7 #include "base/guid.h"
8 #include "base/i18n/case_conversion.h" 8 #include "base/i18n/case_conversion.h"
9 #include "base/i18n/icu_string_conversions.h" 9 #include "base/i18n/icu_string_conversions.h"
10 #include "base/i18n/rtl.h" 10 #include "base/i18n/rtl.h"
11 #include "base/logging.h" 11 #include "base/logging.h"
12 #include "base/metrics/field_trial.h" 12 #include "base/metrics/field_trial.h"
13 #include "base/string_number_conversions.h" 13 #include "base/string_number_conversions.h"
14 #include "base/string_split.h"
14 #include "base/string_util.h" 15 #include "base/string_util.h"
15 #include "base/stringprintf.h" 16 #include "base/stringprintf.h"
16 #include "base/utf_string_conversions.h" 17 #include "base/utf_string_conversions.h"
17 #include "chrome/browser/autocomplete/autocomplete_field_trial.h" 18 #include "chrome/browser/autocomplete/autocomplete_field_trial.h"
18 #include "chrome/browser/google/google_util.h" 19 #include "chrome/browser/google/google_util.h"
19 #include "chrome/browser/search_engines/search_terms_data.h" 20 #include "chrome/browser/search_engines/search_terms_data.h"
20 #include "chrome/browser/search_engines/template_url_service.h" 21 #include "chrome/browser/search_engines/template_url_service.h"
21 #include "chrome/common/url_constants.h" 22 #include "chrome/common/url_constants.h"
22 #include "net/base/escape.h" 23 #include "net/base/escape.h"
23 #include "ui/base/l10n/l10n_util.h" 24 #include "ui/base/l10n/l10n_util.h"
(...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after
114 : search_terms(search_terms), 115 : search_terms(search_terms),
115 accepted_suggestion(NO_SUGGESTIONS_AVAILABLE) { 116 accepted_suggestion(NO_SUGGESTIONS_AVAILABLE) {
116 } 117 }
117 118
118 119
119 // TemplateURLRef ------------------------------------------------------------- 120 // TemplateURLRef -------------------------------------------------------------
120 121
121 TemplateURLRef::TemplateURLRef(TemplateURL* owner, Type type) 122 TemplateURLRef::TemplateURLRef(TemplateURL* owner, Type type)
122 : owner_(owner), 123 : owner_(owner),
123 type_(type), 124 type_(type),
125 index_in_owner_(-1),
124 parsed_(false), 126 parsed_(false),
125 valid_(false), 127 valid_(false),
126 supports_replacements_(false), 128 supports_replacements_(false),
129 search_term_key_location_(url_parse::Parsed::QUERY),
127 prepopulated_(false) { 130 prepopulated_(false) {
128 DCHECK(owner_); 131 DCHECK(owner_);
132 DCHECK(type_ != INDEXED);
133 }
134
135 TemplateURLRef::TemplateURLRef(TemplateURL* owner, size_t index_in_owner)
136 : owner_(owner),
137 type_(INDEXED),
138 index_in_owner_(index_in_owner),
139 parsed_(false),
140 valid_(false),
141 supports_replacements_(false),
142 search_term_key_location_(url_parse::Parsed::QUERY),
143 prepopulated_(false) {
144 DCHECK(owner_);
145 DCHECK(index_in_owner_ >= 0L && index_in_owner_ < owner_->URLCount());
129 } 146 }
130 147
131 TemplateURLRef::~TemplateURLRef() { 148 TemplateURLRef::~TemplateURLRef() {
132 } 149 }
133 150
134 std::string TemplateURLRef::GetURL() const { 151 std::string TemplateURLRef::GetURL() const {
135 switch (type_) { 152 switch (type_) {
136 case SEARCH: return owner_->url(); 153 case SEARCH: return owner_->url();
137 case SUGGEST: return owner_->suggestions_url(); 154 case SUGGEST: return owner_->suggestions_url();
138 case INSTANT: return owner_->instant_url(); 155 case INSTANT: return owner_->instant_url();
139 default: NOTREACHED(); return std::string(); 156 case INDEXED: return owner_->GetURL(index_in_owner_);
157 default:
158 NOTREACHED();
159 return std::string();
140 } 160 }
141 } 161 }
142 162
143 bool TemplateURLRef::SupportsReplacement() const { 163 bool TemplateURLRef::SupportsReplacement() const {
144 UIThreadSearchTermsData search_terms_data(owner_->profile()); 164 UIThreadSearchTermsData search_terms_data(owner_->profile());
145 return SupportsReplacementUsingTermsData(search_terms_data); 165 return SupportsReplacementUsingTermsData(search_terms_data);
146 } 166 }
147 167
148 bool TemplateURLRef::SupportsReplacementUsingTermsData( 168 bool TemplateURLRef::SupportsReplacementUsingTermsData(
149 const SearchTermsData& search_terms_data) const { 169 const SearchTermsData& search_terms_data) const {
(...skipping 245 matching lines...) Expand 10 before | Expand all | Expand 10 after
395 bool TemplateURLRef::HasGoogleBaseURLs() const { 415 bool TemplateURLRef::HasGoogleBaseURLs() const {
396 ParseIfNecessary(); 416 ParseIfNecessary();
397 for (size_t i = 0; i < replacements_.size(); ++i) { 417 for (size_t i = 0; i < replacements_.size(); ++i) {
398 if ((replacements_[i].type == GOOGLE_BASE_URL) || 418 if ((replacements_[i].type == GOOGLE_BASE_URL) ||
399 (replacements_[i].type == GOOGLE_BASE_SUGGEST_URL)) 419 (replacements_[i].type == GOOGLE_BASE_SUGGEST_URL))
400 return true; 420 return true;
401 } 421 }
402 return false; 422 return false;
403 } 423 }
404 424
425 string16 TemplateURLRef::ExtractSearchTermsFromURL(const GURL& url) const {
426 ParseIfNecessary();
427
428 // We need a search term in the template URL to extract something.
429 if (search_term_key_.empty())
430 return string16();
431
432 // TODO(beaudoin): Support {Anything} parameter to act as a path wildcard.
433 // See crbug/139176
434
435 // Fill-in the replacements. We don't care about search terms in the pattern,
436 // so we use the empty string.
437 GURL pattern(ReplaceSearchTerms(SearchTermsArgs(string16())));
438 // Scheme, host, path and port must match.
439 if (!url.SchemeIs(pattern.scheme().c_str()) ||
440 url.port() != pattern.port() ||
441 url.host() != host_ ||
442 url.path() != path_)
dominich 2012/09/12 22:23:12 nit: add braces here
beaudoin 2012/09/13 18:16:45 Done.
443 return string16();
444
445 // Parameter must be present either in the query or the ref.
446 std::string params;
447 switch (search_term_key_location_) {
448 case url_parse::Parsed::QUERY:
449 params = url.query();
450 break;
451 case url_parse::Parsed::REF:
452 params = url.ref();
453 break;
454 default:
455 NOTREACHED();
456 return string16();
457 }
458
459 url_parse::Component query, key, value;
460 query.len = static_cast<int>(params.size());
461 while (url_parse::ExtractQueryKeyValue(params.c_str(), &query, &key,
462 &value)) {
463 if (key.is_nonempty() && value.is_nonempty()) {
464 if (params.substr(key.begin, key.len) == search_term_key_) {
465 // Extract the search term.
466 return net::UnescapeAndDecodeUTF8URLComponent(
467 params.substr(value.begin, value.len),
468 net::UnescapeRule::SPACES |
469 net::UnescapeRule::URL_SPECIAL_CHARS |
470 net::UnescapeRule::REPLACE_PLUS_WITH_SPACE,
471 NULL);
472 }
473 }
474 }
475 return string16();
476 }
477
405 void TemplateURLRef::InvalidateCachedValues() const { 478 void TemplateURLRef::InvalidateCachedValues() const {
406 supports_replacements_ = valid_ = parsed_ = false; 479 supports_replacements_ = valid_ = parsed_ = false;
407 host_.clear(); 480 host_.clear();
408 path_.clear(); 481 path_.clear();
409 search_term_key_.clear(); 482 search_term_key_.clear();
410 replacements_.clear(); 483 replacements_.clear();
411 } 484 }
412 485
413 bool TemplateURLRef::ParseParameter(size_t start, 486 bool TemplateURLRef::ParseParameter(size_t start,
414 size_t end, 487 size_t end,
(...skipping 129 matching lines...) Expand 10 before | Expand all | Expand 10 after
544 void TemplateURLRef::ParseHostAndSearchTermKey( 617 void TemplateURLRef::ParseHostAndSearchTermKey(
545 const SearchTermsData& search_terms_data) const { 618 const SearchTermsData& search_terms_data) const {
546 std::string url_string(GetURL()); 619 std::string url_string(GetURL());
547 ReplaceSubstringsAfterOffset(&url_string, 0, 620 ReplaceSubstringsAfterOffset(&url_string, 0,
548 kGoogleBaseURLParameterFull, 621 kGoogleBaseURLParameterFull,
549 search_terms_data.GoogleBaseURLValue()); 622 search_terms_data.GoogleBaseURLValue());
550 ReplaceSubstringsAfterOffset(&url_string, 0, 623 ReplaceSubstringsAfterOffset(&url_string, 0,
551 kGoogleBaseSuggestURLParameterFull, 624 kGoogleBaseSuggestURLParameterFull,
552 search_terms_data.GoogleBaseSuggestURLValue()); 625 search_terms_data.GoogleBaseSuggestURLValue());
553 626
627 search_term_key_.clear();
628 host_.clear();
629 path_.clear();
630 search_term_key_location_ = url_parse::Parsed::REF;
631
554 GURL url(url_string); 632 GURL url(url_string);
555 if (!url.is_valid()) 633 if (!url.is_valid())
556 return; 634 return;
557 635
558 std::string query_string = url.query(); 636 // We want to prioritize search terms in the ref rather than ones in the
559 if (query_string.empty()) 637 // query.
560 return; 638 if (!url.ref().empty())
639 FindSearchTermsKey(url.ref());
561 640
641 // If not found in ref string, look for them in query.
642 if (search_term_key_.empty() && !url.query().empty()) {
643 search_term_key_location_ = url_parse::Parsed::QUERY;
644 FindSearchTermsKey(url.query());
645 }
646
647 if (!search_term_key_.empty()) {
648 host_ = url.host();
649 path_ = url.path();
650 }
651 }
652
653 void TemplateURLRef::FindSearchTermsKey(const std::string& params) const {
562 url_parse::Component query, key, value; 654 url_parse::Component query, key, value;
563 query.len = static_cast<int>(query_string.size()); 655 query.len = static_cast<int>(params.size());
564 while (url_parse::ExtractQueryKeyValue(query_string.c_str(), &query, &key, 656 while (url_parse::ExtractQueryKeyValue(params.c_str(), &query, &key,
565 &value)) { 657 &value)) {
566 if (key.is_nonempty() && value.is_nonempty()) { 658 if (key.is_nonempty() && value.is_nonempty()) {
567 std::string value_string = query_string.substr(value.begin, value.len); 659 std::string value_string = params.substr(value.begin, value.len);
568 if (value_string.find(kSearchTermsParameterFull, 0) != 660 if (value_string.find(kSearchTermsParameterFull, 0) !=
569 std::string::npos || 661 std::string::npos ||
570 value_string.find(kGoogleUnescapedSearchTermsParameterFull, 0) != 662 value_string.find(kGoogleUnescapedSearchTermsParameterFull, 0) !=
571 std::string::npos) { 663 std::string::npos) {
572 search_term_key_ = query_string.substr(key.begin, key.len); 664 search_term_key_ = params.substr(key.begin, key.len);
573 host_ = url.host();
574 path_ = url.path();
575 break; 665 break;
576 } 666 }
577 } 667 }
578 } 668 }
579 } 669 }
580 670
581 671
582 // TemplateURLData ------------------------------------------------------------ 672 // TemplateURLData ------------------------------------------------------------
583 673
584 TemplateURLData::TemplateURLData() 674 TemplateURLData::TemplateURLData()
(...skipping 19 matching lines...) Expand all
604 // Case sensitive keyword matching is confusing. As such, we force all 694 // Case sensitive keyword matching is confusing. As such, we force all
605 // keywords to be lower case. 695 // keywords to be lower case.
606 keyword_ = base::i18n::ToLower(keyword); 696 keyword_ = base::i18n::ToLower(keyword);
607 } 697 }
608 698
609 void TemplateURLData::SetURL(const std::string& url) { 699 void TemplateURLData::SetURL(const std::string& url) {
610 DCHECK(!url.empty()); 700 DCHECK(!url.empty());
611 url_ = url; 701 url_ = url;
612 } 702 }
613 703
704 std::string TemplateURLData::SerializeAlternateURLs() const {
705 std::string result;
706 for (size_t i = 0; i < alternate_urls_.size(); ++i) {
707 // Sanity check that the URL doesn't contain a comma.
708 DCHECK(alternate_urls_[i].find(',') == std::string::npos);
709 if (result.length() != 0)
710 result.append(",");
711 result.append(alternate_urls_[i]);
712 }
713 return result;
714 }
715
716 void TemplateURLData::DeserializeAndSetAlternateURLs(
717 const std::string& alternate_urls) {
718 base::SplitString(alternate_urls, ',', &alternate_urls_);
719 }
614 720
615 // TemplateURL ---------------------------------------------------------------- 721 // TemplateURL ----------------------------------------------------------------
616 722
617 TemplateURL::TemplateURL(Profile* profile, const TemplateURLData& data) 723 TemplateURL::TemplateURL(Profile* profile, const TemplateURLData& data)
618 : profile_(profile), 724 : profile_(profile),
619 data_(data), 725 data_(data),
620 url_ref_(ALLOW_THIS_IN_INITIALIZER_LIST(this), TemplateURLRef::SEARCH), 726 url_ref_(ALLOW_THIS_IN_INITIALIZER_LIST(this), TemplateURLRef::SEARCH),
621 suggestions_url_ref_(ALLOW_THIS_IN_INITIALIZER_LIST(this), 727 suggestions_url_ref_(ALLOW_THIS_IN_INITIALIZER_LIST(this),
622 TemplateURLRef::SUGGEST), 728 TemplateURLRef::SUGGEST),
623 instant_url_ref_(ALLOW_THIS_IN_INITIALIZER_LIST(this), 729 instant_url_ref_(ALLOW_THIS_IN_INITIALIZER_LIST(this),
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after
678 784
679 std::string TemplateURL::GetExtensionId() const { 785 std::string TemplateURL::GetExtensionId() const {
680 DCHECK(IsExtensionKeyword()); 786 DCHECK(IsExtensionKeyword());
681 return GURL(data_.url()).host(); 787 return GURL(data_.url()).host();
682 } 788 }
683 789
684 bool TemplateURL::IsExtensionKeyword() const { 790 bool TemplateURL::IsExtensionKeyword() const {
685 return GURL(data_.url()).SchemeIs(chrome::kExtensionScheme); 791 return GURL(data_.url()).SchemeIs(chrome::kExtensionScheme);
686 } 792 }
687 793
794 size_t TemplateURL::URLCount() const {
795 DCHECK(!url().empty());
796
797 int count = 1; // At least one for url().
798 if (!instant_url().empty())
799 count++;
800 count += data_.alternate_urls().size();
801 return count;
802 }
803
804 const std::string& TemplateURL::GetURL(size_t index) const {
805 DCHECK(!url().empty());
806 DCHECK(index >= 0);
807
808 if (index < data_.alternate_urls().size())
809 return data_.alternate_urls()[index];
810
811 index -= data_.alternate_urls().size();
812 if (!instant_url().empty()) {
813 if (index == 0)
814 return instant_url();
815 index--;
816 }
817 DCHECK(index == 0);
818 return url();
819 }
820
821 string16 TemplateURL::ExtractSearchTermsFromURL(const GURL& url) {
822 for (size_t i = 0; i < URLCount(); ++i) {
823 TemplateURLRef ref(this, i);
824 string16 result(ref.ExtractSearchTermsFromURL(url));
825 if (!result.empty())
826 return result;
827 }
828 return string16();
829 }
830
688 void TemplateURL::CopyFrom(const TemplateURL& other) { 831 void TemplateURL::CopyFrom(const TemplateURL& other) {
689 if (this == &other) 832 if (this == &other)
690 return; 833 return;
691 834
692 profile_ = other.profile_; 835 profile_ = other.profile_;
693 data_ = other.data_; 836 data_ = other.data_;
694 url_ref_.InvalidateCachedValues(); 837 url_ref_.InvalidateCachedValues();
695 suggestions_url_ref_.InvalidateCachedValues(); 838 suggestions_url_ref_.InvalidateCachedValues();
696 instant_url_ref_.InvalidateCachedValues(); 839 instant_url_ref_.InvalidateCachedValues();
697 SetPrepopulateId(other.data_.prepopulate_id); 840 SetPrepopulateId(other.data_.prepopulate_id);
(...skipping 13 matching lines...) Expand all
711 } 854 }
712 855
713 void TemplateURL::ResetKeywordIfNecessary(bool force) { 856 void TemplateURL::ResetKeywordIfNecessary(bool force) {
714 if (IsGoogleSearchURLWithReplaceableKeyword() || force) { 857 if (IsGoogleSearchURLWithReplaceableKeyword() || force) {
715 DCHECK(!IsExtensionKeyword()); 858 DCHECK(!IsExtensionKeyword());
716 GURL url(TemplateURLService::GenerateSearchURL(this)); 859 GURL url(TemplateURLService::GenerateSearchURL(this));
717 if (url.is_valid()) 860 if (url.is_valid())
718 data_.SetKeyword(TemplateURLService::GenerateKeyword(url)); 861 data_.SetKeyword(TemplateURLService::GenerateKeyword(url));
719 } 862 }
720 } 863 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698