Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(652)

Side by Side Diff: chrome/browser/search_engines/template_url.cc

Issue 10908226: Introduces a search term extraction mechanism working for arbitrary search providers. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Removed need for espv=1 for search term extraction. Created 8 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/search_engines/template_url.h" 5 #include "chrome/browser/search_engines/template_url.h"
6 6
7 #include "base/guid.h" 7 #include "base/guid.h"
8 #include "base/i18n/case_conversion.h" 8 #include "base/i18n/case_conversion.h"
9 #include "base/i18n/icu_string_conversions.h" 9 #include "base/i18n/icu_string_conversions.h"
10 #include "base/i18n/rtl.h" 10 #include "base/i18n/rtl.h"
(...skipping 105 matching lines...) Expand 10 before | Expand all | Expand 10 after
116 : search_terms(search_terms), 116 : search_terms(search_terms),
117 accepted_suggestion(NO_SUGGESTIONS_AVAILABLE) { 117 accepted_suggestion(NO_SUGGESTIONS_AVAILABLE) {
118 } 118 }
119 119
120 120
121 // TemplateURLRef ------------------------------------------------------------- 121 // TemplateURLRef -------------------------------------------------------------
122 122
123 TemplateURLRef::TemplateURLRef(TemplateURL* owner, Type type) 123 TemplateURLRef::TemplateURLRef(TemplateURL* owner, Type type)
124 : owner_(owner), 124 : owner_(owner),
125 type_(type), 125 type_(type),
126 index_in_owner_(-1),
126 parsed_(false), 127 parsed_(false),
127 valid_(false), 128 valid_(false),
128 supports_replacements_(false), 129 supports_replacements_(false),
130 search_term_key_location_(url_parse::Parsed::QUERY),
129 prepopulated_(false) { 131 prepopulated_(false) {
130 DCHECK(owner_); 132 DCHECK(owner_);
133 DCHECK(type_ != INDEXED);
Peter Kasting 2012/10/02 21:47:59 Nit: DCHECK_NE(INDEXED, type);
beaudoin 2012/10/03 22:46:52 Kept type_ for uniformity. Please explain if there
Peter Kasting 2012/10/03 22:59:33 Nope, it was a typo, I intended |type_| :)
134 }
135
136 TemplateURLRef::TemplateURLRef(TemplateURL* owner, size_t index_in_owner)
137 : owner_(owner),
138 type_(INDEXED),
139 index_in_owner_(index_in_owner),
140 parsed_(false),
141 valid_(false),
142 supports_replacements_(false),
143 search_term_key_location_(url_parse::Parsed::QUERY),
144 prepopulated_(false) {
145 DCHECK(owner_);
146 DCHECK(index_in_owner_ >= 0L && index_in_owner_ < owner_->URLCount());
Peter Kasting 2012/10/02 21:47:59 Nit: The >= 0 check is nonsensical since size_t is
beaudoin 2012/10/03 22:46:52 Done.
131 } 147 }
132 148
133 TemplateURLRef::~TemplateURLRef() { 149 TemplateURLRef::~TemplateURLRef() {
134 } 150 }
135 151
136 std::string TemplateURLRef::GetURL() const { 152 std::string TemplateURLRef::GetURL() const {
137 switch (type_) { 153 switch (type_) {
138 case SEARCH: return owner_->url(); 154 case SEARCH: return owner_->url();
139 case SUGGEST: return owner_->suggestions_url(); 155 case SUGGEST: return owner_->suggestions_url();
140 case INSTANT: return owner_->instant_url(); 156 case INSTANT: return owner_->instant_url();
157 case INDEXED: return owner_->GetURL(index_in_owner_);
141 default: NOTREACHED(); return std::string(); // NOLINT 158 default: NOTREACHED(); return std::string(); // NOLINT
142 } 159 }
143 } 160 }
144 161
145 bool TemplateURLRef::SupportsReplacement() const { 162 bool TemplateURLRef::SupportsReplacement() const {
146 UIThreadSearchTermsData search_terms_data(owner_->profile()); 163 UIThreadSearchTermsData search_terms_data(owner_->profile());
147 return SupportsReplacementUsingTermsData(search_terms_data); 164 return SupportsReplacementUsingTermsData(search_terms_data);
148 } 165 }
149 166
150 bool TemplateURLRef::SupportsReplacementUsingTermsData( 167 bool TemplateURLRef::SupportsReplacementUsingTermsData(
(...skipping 246 matching lines...) Expand 10 before | Expand all | Expand 10 after
397 bool TemplateURLRef::HasGoogleBaseURLs() const { 414 bool TemplateURLRef::HasGoogleBaseURLs() const {
398 ParseIfNecessary(); 415 ParseIfNecessary();
399 for (size_t i = 0; i < replacements_.size(); ++i) { 416 for (size_t i = 0; i < replacements_.size(); ++i) {
400 if ((replacements_[i].type == GOOGLE_BASE_URL) || 417 if ((replacements_[i].type == GOOGLE_BASE_URL) ||
401 (replacements_[i].type == GOOGLE_BASE_SUGGEST_URL)) 418 (replacements_[i].type == GOOGLE_BASE_SUGGEST_URL))
402 return true; 419 return true;
403 } 420 }
404 return false; 421 return false;
405 } 422 }
406 423
424
Peter Kasting 2012/10/02 21:47:59 Nit: Unnecessary blank line
beaudoin 2012/10/03 22:46:52 Done.
425 bool TemplateURLRef::ExtractSearchTermsFromURL(
426 const GURL& url, string16* search_terms) const {
Peter Kasting 2012/10/02 21:47:59 Nit: One arg per line, first arg on first line: b
beaudoin 2012/10/03 22:46:52 Done.
427 DCHECK(search_terms);
428 search_terms->clear();
429
430 ParseIfNecessary();
431
432 // We need a search term in the template URL to extract something.
433 if (search_term_key_.empty())
434 return false;
435
436 // TODO(beaudoin): Support {Anything} parameter to act as a path wildcard.
437 // See crbug/139176
Peter Kasting 2012/10/02 21:47:59 Let's not plan to do this. Just remove this TODO.
beaudoin 2012/10/03 22:46:52 Done.
438
439 // Fill-in the replacements. We don't care about search terms in the pattern,
440 // so we use the empty string.
441 GURL pattern(ReplaceSearchTerms(SearchTermsArgs(string16())));
442 // Scheme, host, path and port must match.
Peter Kasting 2012/10/02 21:47:59 Is it feasible (maybe as a followup change) to sup
beaudoin 2012/10/03 22:46:52 Entered a TODO and a bug for this: crbug.com/15379
443 if (!url.SchemeIs(pattern.scheme().c_str()) ||
444 url.port() != pattern.port() ||
445 url.host() != host_ ||
446 url.path() != path_) {
447 return false;
448 }
449
450 // Parameter must be present either in the query or the ref.
451 std::string params;
Peter Kasting 2012/10/02 21:47:59 Nit: Simpler: const std::string& params(
beaudoin 2012/10/03 22:46:52 Done.
452 switch (search_term_key_location_) {
453 case url_parse::Parsed::QUERY:
454 params = url.query();
455 break;
456 case url_parse::Parsed::REF:
457 params = url.ref();
458 break;
459 default:
460 NOTREACHED();
461 return false;
462 }
463
464 url_parse::Component query, key, value;
465 query.len = static_cast<int>(params.size());
466 while (url_parse::ExtractQueryKeyValue(params.c_str(), &query, &key,
467 &value)) {
468 if (key.is_nonempty()) {
469 if (params.substr(key.begin, key.len) == search_term_key_) {
470 // Extract the search term.
471 *search_terms = net::UnescapeAndDecodeUTF8URLComponent(
472 params.substr(value.begin, value.len),
473 net::UnescapeRule::SPACES |
474 net::UnescapeRule::URL_SPECIAL_CHARS |
475 net::UnescapeRule::REPLACE_PLUS_WITH_SPACE,
476 NULL);
477 return true;
478 }
479 }
480 }
481 return false;
482 }
483
407 void TemplateURLRef::InvalidateCachedValues() const { 484 void TemplateURLRef::InvalidateCachedValues() const {
408 supports_replacements_ = valid_ = parsed_ = false; 485 supports_replacements_ = valid_ = parsed_ = false;
409 host_.clear(); 486 host_.clear();
410 path_.clear(); 487 path_.clear();
411 search_term_key_.clear(); 488 search_term_key_.clear();
412 replacements_.clear(); 489 replacements_.clear();
413 } 490 }
414 491
415 bool TemplateURLRef::ParseParameter(size_t start, 492 bool TemplateURLRef::ParseParameter(size_t start,
416 size_t end, 493 size_t end,
(...skipping 132 matching lines...) Expand 10 before | Expand all | Expand 10 after
549 void TemplateURLRef::ParseHostAndSearchTermKey( 626 void TemplateURLRef::ParseHostAndSearchTermKey(
550 const SearchTermsData& search_terms_data) const { 627 const SearchTermsData& search_terms_data) const {
551 std::string url_string(GetURL()); 628 std::string url_string(GetURL());
552 ReplaceSubstringsAfterOffset(&url_string, 0, 629 ReplaceSubstringsAfterOffset(&url_string, 0,
553 kGoogleBaseURLParameterFull, 630 kGoogleBaseURLParameterFull,
554 search_terms_data.GoogleBaseURLValue()); 631 search_terms_data.GoogleBaseURLValue());
555 ReplaceSubstringsAfterOffset(&url_string, 0, 632 ReplaceSubstringsAfterOffset(&url_string, 0,
556 kGoogleBaseSuggestURLParameterFull, 633 kGoogleBaseSuggestURLParameterFull,
557 search_terms_data.GoogleBaseSuggestURLValue()); 634 search_terms_data.GoogleBaseSuggestURLValue());
558 635
636 search_term_key_.clear();
637 host_.clear();
638 path_.clear();
639 search_term_key_location_ = url_parse::Parsed::REF;
640
559 GURL url(url_string); 641 GURL url(url_string);
560 if (!url.is_valid()) 642 if (!url.is_valid())
561 return; 643 return;
562 644
563 std::string query_string = url.query(); 645 // We want to prioritize search terms in the ref rather than ones in the
564 if (query_string.empty()) 646 // query.
Peter Kasting 2012/10/02 21:47:59 Ugh, I don't like hardcoding this. We don't need
beaudoin 2012/10/03 22:46:52 Removed the comment. The goal here is just to find
565 return; 647 if (!url.ref().empty())
648 FindSearchTermsKey(url.ref());
566 649
650 // If not found in ref string, look for them in query.
651 if (search_term_key_.empty() && !url.query().empty()) {
652 search_term_key_location_ = url_parse::Parsed::QUERY;
653 FindSearchTermsKey(url.query());
654 }
655
656 if (!search_term_key_.empty()) {
657 host_ = url.host();
658 path_ = url.path();
659 }
660 }
661
662 void TemplateURLRef::FindSearchTermsKey(const std::string& params) const {
567 url_parse::Component query, key, value; 663 url_parse::Component query, key, value;
568 query.len = static_cast<int>(query_string.size()); 664 query.len = static_cast<int>(params.size());
569 while (url_parse::ExtractQueryKeyValue(query_string.c_str(), &query, &key, 665 while (url_parse::ExtractQueryKeyValue(params.c_str(), &query, &key,
570 &value)) { 666 &value)) {
571 if (key.is_nonempty() && value.is_nonempty()) { 667 if (key.is_nonempty() && value.is_nonempty()) {
572 std::string value_string = query_string.substr(value.begin, value.len); 668 std::string value_string = params.substr(value.begin, value.len);
573 if (value_string.find(kSearchTermsParameterFull, 0) != 669 if (value_string.find(kSearchTermsParameterFull, 0) !=
574 std::string::npos || 670 std::string::npos ||
575 value_string.find(kGoogleUnescapedSearchTermsParameterFull, 0) != 671 value_string.find(kGoogleUnescapedSearchTermsParameterFull, 0) !=
576 std::string::npos) { 672 std::string::npos) {
577 search_term_key_ = query_string.substr(key.begin, key.len); 673 search_term_key_ = params.substr(key.begin, key.len);
578 host_ = url.host();
579 path_ = url.path();
580 break; 674 break;
581 } 675 }
582 } 676 }
583 } 677 }
584 } 678 }
585 679
586 680
587 // TemplateURLData ------------------------------------------------------------ 681 // TemplateURLData ------------------------------------------------------------
588 682
589 TemplateURLData::TemplateURLData() 683 TemplateURLData::TemplateURLData()
(...skipping 18 matching lines...) Expand all
608 702
609 // Case sensitive keyword matching is confusing. As such, we force all 703 // Case sensitive keyword matching is confusing. As such, we force all
610 // keywords to be lower case. 704 // keywords to be lower case.
611 keyword_ = base::i18n::ToLower(keyword); 705 keyword_ = base::i18n::ToLower(keyword);
612 } 706 }
613 707
614 void TemplateURLData::SetURL(const std::string& url) { 708 void TemplateURLData::SetURL(const std::string& url) {
615 DCHECK(!url.empty()); 709 DCHECK(!url.empty());
616 url_ = url; 710 url_ = url;
617 } 711 }
618 712
Peter Kasting 2012/10/02 21:47:59 Nit: Don't remove this blank line
beaudoin 2012/10/03 22:46:52 Done.
619
620 // TemplateURL ---------------------------------------------------------------- 713 // TemplateURL ----------------------------------------------------------------
621 714
622 TemplateURL::TemplateURL(Profile* profile, const TemplateURLData& data) 715 TemplateURL::TemplateURL(Profile* profile, const TemplateURLData& data)
623 : profile_(profile), 716 : profile_(profile),
624 data_(data), 717 data_(data),
625 url_ref_(ALLOW_THIS_IN_INITIALIZER_LIST(this), TemplateURLRef::SEARCH), 718 url_ref_(ALLOW_THIS_IN_INITIALIZER_LIST(this), TemplateURLRef::SEARCH),
626 suggestions_url_ref_(ALLOW_THIS_IN_INITIALIZER_LIST(this), 719 suggestions_url_ref_(ALLOW_THIS_IN_INITIALIZER_LIST(this),
627 TemplateURLRef::SUGGEST), 720 TemplateURLRef::SUGGEST),
628 instant_url_ref_(ALLOW_THIS_IN_INITIALIZER_LIST(this), 721 instant_url_ref_(ALLOW_THIS_IN_INITIALIZER_LIST(this),
629 TemplateURLRef::INSTANT) { 722 TemplateURLRef::INSTANT) {
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after
683 776
684 std::string TemplateURL::GetExtensionId() const { 777 std::string TemplateURL::GetExtensionId() const {
685 DCHECK(IsExtensionKeyword()); 778 DCHECK(IsExtensionKeyword());
686 return GURL(data_.url()).host(); 779 return GURL(data_.url()).host();
687 } 780 }
688 781
689 bool TemplateURL::IsExtensionKeyword() const { 782 bool TemplateURL::IsExtensionKeyword() const {
690 return GURL(data_.url()).SchemeIs(chrome::kExtensionScheme); 783 return GURL(data_.url()).SchemeIs(chrome::kExtensionScheme);
691 } 784 }
692 785
786 size_t TemplateURL::URLCount() const {
787 DCHECK(!url().empty());
Peter Kasting 2012/10/02 21:47:59 Nit: This is guaranteed at all times, you need not
beaudoin 2012/10/03 22:46:52 Done.
788 // Add 1 for the regular search URL.
789 return data_.alternate_urls.size() + 1;
790 }
791
792 const std::string& TemplateURL::GetURL(size_t index) const {
793 DCHECK(!url().empty());
794 DCHECK(index >= 0 && index < URLCount());
795
796 if (index < data_.alternate_urls.size())
Peter Kasting 2012/10/02 21:47:59 Nit: Simpler: return (index < data_.alternate_u
beaudoin 2012/10/03 22:46:52 Done.
797 return data_.alternate_urls[index];
798 return url();
799 }
800
801 bool TemplateURL::ExtractSearchTermsFromURL(
802 const GURL& url, string16* search_terms) {
803 DCHECK(search_terms);
804 search_terms->clear();
805
806 // Then try to match with every pattern.
807 for (size_t i = 0; i < URLCount(); ++i) {
808 TemplateURLRef ref(this, i);
809 if (ref.ExtractSearchTermsFromURL(url, search_terms)) {
810 // Never accept an empty string as a valid result, but exit early if
811 // one is found. This ensures 'http://google.com/?q=foo#q=' fails.
812 return !search_terms->empty();
813 }
814 }
815 return false;
816 }
817
693 void TemplateURL::CopyFrom(const TemplateURL& other) { 818 void TemplateURL::CopyFrom(const TemplateURL& other) {
694 if (this == &other) 819 if (this == &other)
695 return; 820 return;
696 821
697 profile_ = other.profile_; 822 profile_ = other.profile_;
698 data_ = other.data_; 823 data_ = other.data_;
699 url_ref_.InvalidateCachedValues(); 824 url_ref_.InvalidateCachedValues();
700 suggestions_url_ref_.InvalidateCachedValues(); 825 suggestions_url_ref_.InvalidateCachedValues();
701 instant_url_ref_.InvalidateCachedValues(); 826 instant_url_ref_.InvalidateCachedValues();
702 SetPrepopulateId(other.data_.prepopulate_id); 827 SetPrepopulateId(other.data_.prepopulate_id);
(...skipping 13 matching lines...) Expand all
716 } 841 }
717 842
718 void TemplateURL::ResetKeywordIfNecessary(bool force) { 843 void TemplateURL::ResetKeywordIfNecessary(bool force) {
719 if (IsGoogleSearchURLWithReplaceableKeyword() || force) { 844 if (IsGoogleSearchURLWithReplaceableKeyword() || force) {
720 DCHECK(!IsExtensionKeyword()); 845 DCHECK(!IsExtensionKeyword());
721 GURL url(TemplateURLService::GenerateSearchURL(this)); 846 GURL url(TemplateURLService::GenerateSearchURL(this));
722 if (url.is_valid()) 847 if (url.is_valid())
723 data_.SetKeyword(TemplateURLService::GenerateKeyword(url)); 848 data_.SetKeyword(TemplateURLService::GenerateKeyword(url));
724 } 849 }
725 } 850 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698