OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/browser/search_engines/template_url.h" | 5 #include "chrome/browser/search_engines/template_url.h" |
6 | 6 |
7 #include "base/guid.h" | 7 #include "base/guid.h" |
8 #include "base/i18n/case_conversion.h" | 8 #include "base/i18n/case_conversion.h" |
9 #include "base/i18n/icu_string_conversions.h" | 9 #include "base/i18n/icu_string_conversions.h" |
10 #include "base/i18n/rtl.h" | 10 #include "base/i18n/rtl.h" |
11 #include "base/logging.h" | 11 #include "base/logging.h" |
12 #include "base/metrics/field_trial.h" | 12 #include "base/metrics/field_trial.h" |
13 #include "base/string_number_conversions.h" | 13 #include "base/string_number_conversions.h" |
| 14 #include "base/string_split.h" |
14 #include "base/string_util.h" | 15 #include "base/string_util.h" |
15 #include "base/stringprintf.h" | 16 #include "base/stringprintf.h" |
16 #include "base/utf_string_conversions.h" | 17 #include "base/utf_string_conversions.h" |
17 #include "chrome/browser/autocomplete/autocomplete_field_trial.h" | 18 #include "chrome/browser/autocomplete/autocomplete_field_trial.h" |
18 #include "chrome/browser/google/google_util.h" | 19 #include "chrome/browser/google/google_util.h" |
19 #include "chrome/browser/search_engines/search_terms_data.h" | 20 #include "chrome/browser/search_engines/search_terms_data.h" |
20 #include "chrome/browser/search_engines/template_url_service.h" | 21 #include "chrome/browser/search_engines/template_url_service.h" |
21 #include "chrome/common/url_constants.h" | 22 #include "chrome/common/url_constants.h" |
22 #include "google_apis/google_api_keys.h" | 23 #include "google_apis/google_api_keys.h" |
23 #include "net/base/escape.h" | 24 #include "net/base/escape.h" |
(...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
116 : search_terms(search_terms), | 117 : search_terms(search_terms), |
117 accepted_suggestion(NO_SUGGESTIONS_AVAILABLE) { | 118 accepted_suggestion(NO_SUGGESTIONS_AVAILABLE) { |
118 } | 119 } |
119 | 120 |
120 | 121 |
121 // TemplateURLRef ------------------------------------------------------------- | 122 // TemplateURLRef ------------------------------------------------------------- |
122 | 123 |
123 TemplateURLRef::TemplateURLRef(TemplateURL* owner, Type type) | 124 TemplateURLRef::TemplateURLRef(TemplateURL* owner, Type type) |
124 : owner_(owner), | 125 : owner_(owner), |
125 type_(type), | 126 type_(type), |
| 127 index_in_owner_(-1), |
126 parsed_(false), | 128 parsed_(false), |
127 valid_(false), | 129 valid_(false), |
128 supports_replacements_(false), | 130 supports_replacements_(false), |
| 131 search_term_key_location_(url_parse::Parsed::QUERY), |
129 prepopulated_(false) { | 132 prepopulated_(false) { |
130 DCHECK(owner_); | 133 DCHECK(owner_); |
| 134 DCHECK(type_ != INDEXED); |
| 135 } |
| 136 |
| 137 TemplateURLRef::TemplateURLRef(TemplateURL* owner, size_t index_in_owner) |
| 138 : owner_(owner), |
| 139 type_(INDEXED), |
| 140 index_in_owner_(index_in_owner), |
| 141 parsed_(false), |
| 142 valid_(false), |
| 143 supports_replacements_(false), |
| 144 search_term_key_location_(url_parse::Parsed::QUERY), |
| 145 prepopulated_(false) { |
| 146 DCHECK(owner_); |
| 147 DCHECK(index_in_owner_ >= 0L && index_in_owner_ < owner_->URLCount()); |
131 } | 148 } |
132 | 149 |
133 TemplateURLRef::~TemplateURLRef() { | 150 TemplateURLRef::~TemplateURLRef() { |
134 } | 151 } |
135 | 152 |
136 std::string TemplateURLRef::GetURL() const { | 153 std::string TemplateURLRef::GetURL() const { |
137 switch (type_) { | 154 switch (type_) { |
138 case SEARCH: return owner_->url(); | 155 case SEARCH: return owner_->url(); |
139 case SUGGEST: return owner_->suggestions_url(); | 156 case SUGGEST: return owner_->suggestions_url(); |
140 case INSTANT: return owner_->instant_url(); | 157 case INSTANT: return owner_->instant_url(); |
| 158 case INDEXED: return owner_->GetURL(index_in_owner_); |
141 default: NOTREACHED(); return std::string(); // NOLINT | 159 default: NOTREACHED(); return std::string(); // NOLINT |
142 } | 160 } |
143 } | 161 } |
144 | 162 |
145 bool TemplateURLRef::SupportsReplacement() const { | 163 bool TemplateURLRef::SupportsReplacement() const { |
146 UIThreadSearchTermsData search_terms_data(owner_->profile()); | 164 UIThreadSearchTermsData search_terms_data(owner_->profile()); |
147 return SupportsReplacementUsingTermsData(search_terms_data); | 165 return SupportsReplacementUsingTermsData(search_terms_data); |
148 } | 166 } |
149 | 167 |
150 bool TemplateURLRef::SupportsReplacementUsingTermsData( | 168 bool TemplateURLRef::SupportsReplacementUsingTermsData( |
(...skipping 246 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
397 bool TemplateURLRef::HasGoogleBaseURLs() const { | 415 bool TemplateURLRef::HasGoogleBaseURLs() const { |
398 ParseIfNecessary(); | 416 ParseIfNecessary(); |
399 for (size_t i = 0; i < replacements_.size(); ++i) { | 417 for (size_t i = 0; i < replacements_.size(); ++i) { |
400 if ((replacements_[i].type == GOOGLE_BASE_URL) || | 418 if ((replacements_[i].type == GOOGLE_BASE_URL) || |
401 (replacements_[i].type == GOOGLE_BASE_SUGGEST_URL)) | 419 (replacements_[i].type == GOOGLE_BASE_SUGGEST_URL)) |
402 return true; | 420 return true; |
403 } | 421 } |
404 return false; | 422 return false; |
405 } | 423 } |
406 | 424 |
| 425 |
| 426 bool TemplateURLRef::ExtractSearchTermsFromURL( |
| 427 const GURL& url, string16* search_terms) const { |
| 428 DCHECK(search_terms); |
| 429 search_terms->clear(); |
| 430 |
| 431 ParseIfNecessary(); |
| 432 |
| 433 // We need a search term in the template URL to extract something. |
| 434 if (search_term_key_.empty()) |
| 435 return false; |
| 436 |
| 437 // TODO(beaudoin): Support {Anything} parameter to act as a path wildcard. |
| 438 // See crbug/139176 |
| 439 |
| 440 // Fill-in the replacements. We don't care about search terms in the pattern, |
| 441 // so we use the empty string. |
| 442 GURL pattern(ReplaceSearchTerms(SearchTermsArgs(string16()))); |
| 443 // Scheme, host, path and port must match. |
| 444 if (!url.SchemeIs(pattern.scheme().c_str()) || |
| 445 url.port() != pattern.port() || |
| 446 url.host() != host_ || |
| 447 url.path() != path_) { |
| 448 return false; |
| 449 } |
| 450 |
| 451 // Parameter must be present either in the query or the ref. |
| 452 std::string params; |
| 453 switch (search_term_key_location_) { |
| 454 case url_parse::Parsed::QUERY: |
| 455 params = url.query(); |
| 456 break; |
| 457 case url_parse::Parsed::REF: |
| 458 params = url.ref(); |
| 459 break; |
| 460 default: |
| 461 NOTREACHED(); |
| 462 return false; |
| 463 } |
| 464 |
| 465 url_parse::Component query, key, value; |
| 466 query.len = static_cast<int>(params.size()); |
| 467 while (url_parse::ExtractQueryKeyValue(params.c_str(), &query, &key, |
| 468 &value)) { |
| 469 if (key.is_nonempty()) { |
| 470 if (params.substr(key.begin, key.len) == search_term_key_) { |
| 471 // Extract the search term. |
| 472 *search_terms = net::UnescapeAndDecodeUTF8URLComponent( |
| 473 params.substr(value.begin, value.len), |
| 474 net::UnescapeRule::SPACES | |
| 475 net::UnescapeRule::URL_SPECIAL_CHARS | |
| 476 net::UnescapeRule::REPLACE_PLUS_WITH_SPACE, |
| 477 NULL); |
| 478 return true; |
| 479 } |
| 480 } |
| 481 } |
| 482 return false; |
| 483 } |
| 484 |
407 void TemplateURLRef::InvalidateCachedValues() const { | 485 void TemplateURLRef::InvalidateCachedValues() const { |
408 supports_replacements_ = valid_ = parsed_ = false; | 486 supports_replacements_ = valid_ = parsed_ = false; |
409 host_.clear(); | 487 host_.clear(); |
410 path_.clear(); | 488 path_.clear(); |
411 search_term_key_.clear(); | 489 search_term_key_.clear(); |
412 replacements_.clear(); | 490 replacements_.clear(); |
413 } | 491 } |
414 | 492 |
415 bool TemplateURLRef::ParseParameter(size_t start, | 493 bool TemplateURLRef::ParseParameter(size_t start, |
416 size_t end, | 494 size_t end, |
(...skipping 132 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
549 void TemplateURLRef::ParseHostAndSearchTermKey( | 627 void TemplateURLRef::ParseHostAndSearchTermKey( |
550 const SearchTermsData& search_terms_data) const { | 628 const SearchTermsData& search_terms_data) const { |
551 std::string url_string(GetURL()); | 629 std::string url_string(GetURL()); |
552 ReplaceSubstringsAfterOffset(&url_string, 0, | 630 ReplaceSubstringsAfterOffset(&url_string, 0, |
553 kGoogleBaseURLParameterFull, | 631 kGoogleBaseURLParameterFull, |
554 search_terms_data.GoogleBaseURLValue()); | 632 search_terms_data.GoogleBaseURLValue()); |
555 ReplaceSubstringsAfterOffset(&url_string, 0, | 633 ReplaceSubstringsAfterOffset(&url_string, 0, |
556 kGoogleBaseSuggestURLParameterFull, | 634 kGoogleBaseSuggestURLParameterFull, |
557 search_terms_data.GoogleBaseSuggestURLValue()); | 635 search_terms_data.GoogleBaseSuggestURLValue()); |
558 | 636 |
| 637 search_term_key_.clear(); |
| 638 host_.clear(); |
| 639 path_.clear(); |
| 640 search_term_key_location_ = url_parse::Parsed::REF; |
| 641 |
559 GURL url(url_string); | 642 GURL url(url_string); |
560 if (!url.is_valid()) | 643 if (!url.is_valid()) |
561 return; | 644 return; |
562 | 645 |
563 std::string query_string = url.query(); | 646 // We want to prioritize search terms in the ref rather than ones in the |
564 if (query_string.empty()) | 647 // query. |
565 return; | 648 if (!url.ref().empty()) |
| 649 FindSearchTermsKey(url.ref()); |
566 | 650 |
| 651 // If not found in ref string, look for them in query. |
| 652 if (search_term_key_.empty() && !url.query().empty()) { |
| 653 search_term_key_location_ = url_parse::Parsed::QUERY; |
| 654 FindSearchTermsKey(url.query()); |
| 655 } |
| 656 |
| 657 if (!search_term_key_.empty()) { |
| 658 host_ = url.host(); |
| 659 path_ = url.path(); |
| 660 } |
| 661 } |
| 662 |
| 663 void TemplateURLRef::FindSearchTermsKey(const std::string& params) const { |
567 url_parse::Component query, key, value; | 664 url_parse::Component query, key, value; |
568 query.len = static_cast<int>(query_string.size()); | 665 query.len = static_cast<int>(params.size()); |
569 while (url_parse::ExtractQueryKeyValue(query_string.c_str(), &query, &key, | 666 while (url_parse::ExtractQueryKeyValue(params.c_str(), &query, &key, |
570 &value)) { | 667 &value)) { |
571 if (key.is_nonempty() && value.is_nonempty()) { | 668 if (key.is_nonempty() && value.is_nonempty()) { |
572 std::string value_string = query_string.substr(value.begin, value.len); | 669 std::string value_string = params.substr(value.begin, value.len); |
573 if (value_string.find(kSearchTermsParameterFull, 0) != | 670 if (value_string.find(kSearchTermsParameterFull, 0) != |
574 std::string::npos || | 671 std::string::npos || |
575 value_string.find(kGoogleUnescapedSearchTermsParameterFull, 0) != | 672 value_string.find(kGoogleUnescapedSearchTermsParameterFull, 0) != |
576 std::string::npos) { | 673 std::string::npos) { |
577 search_term_key_ = query_string.substr(key.begin, key.len); | 674 search_term_key_ = params.substr(key.begin, key.len); |
578 host_ = url.host(); | |
579 path_ = url.path(); | |
580 break; | 675 break; |
581 } | 676 } |
582 } | 677 } |
583 } | 678 } |
584 } | 679 } |
585 | 680 |
586 | 681 |
587 // TemplateURLData ------------------------------------------------------------ | 682 // TemplateURLData ------------------------------------------------------------ |
588 | 683 |
589 TemplateURLData::TemplateURLData() | 684 TemplateURLData::TemplateURLData() |
(...skipping 19 matching lines...) Expand all Loading... |
609 // Case sensitive keyword matching is confusing. As such, we force all | 704 // Case sensitive keyword matching is confusing. As such, we force all |
610 // keywords to be lower case. | 705 // keywords to be lower case. |
611 keyword_ = base::i18n::ToLower(keyword); | 706 keyword_ = base::i18n::ToLower(keyword); |
612 } | 707 } |
613 | 708 |
614 void TemplateURLData::SetURL(const std::string& url) { | 709 void TemplateURLData::SetURL(const std::string& url) { |
615 DCHECK(!url.empty()); | 710 DCHECK(!url.empty()); |
616 url_ = url; | 711 url_ = url; |
617 } | 712 } |
618 | 713 |
| 714 std::string TemplateURLData::SerializeAlternateURLs() const { |
| 715 std::string result; |
| 716 for (size_t i = 0; i < alternate_urls_.size(); ++i) { |
| 717 // Sanity check that the URL doesn't contain a comma. |
| 718 DCHECK(alternate_urls_[i].find(',') == std::string::npos); |
| 719 if (result.length() != 0) |
| 720 result.append(","); |
| 721 result.append(alternate_urls_[i]); |
| 722 } |
| 723 return result; |
| 724 } |
| 725 |
| 726 void TemplateURLData::DeserializeAndSetAlternateURLs( |
| 727 const std::string& alternate_urls) { |
| 728 base::SplitString(alternate_urls, ',', &alternate_urls_); |
| 729 } |
619 | 730 |
620 // TemplateURL ---------------------------------------------------------------- | 731 // TemplateURL ---------------------------------------------------------------- |
621 | 732 |
622 TemplateURL::TemplateURL(Profile* profile, const TemplateURLData& data) | 733 TemplateURL::TemplateURL(Profile* profile, const TemplateURLData& data) |
623 : profile_(profile), | 734 : profile_(profile), |
624 data_(data), | 735 data_(data), |
625 url_ref_(ALLOW_THIS_IN_INITIALIZER_LIST(this), TemplateURLRef::SEARCH), | 736 url_ref_(ALLOW_THIS_IN_INITIALIZER_LIST(this), TemplateURLRef::SEARCH), |
626 suggestions_url_ref_(ALLOW_THIS_IN_INITIALIZER_LIST(this), | 737 suggestions_url_ref_(ALLOW_THIS_IN_INITIALIZER_LIST(this), |
627 TemplateURLRef::SUGGEST), | 738 TemplateURLRef::SUGGEST), |
628 instant_url_ref_(ALLOW_THIS_IN_INITIALIZER_LIST(this), | 739 instant_url_ref_(ALLOW_THIS_IN_INITIALIZER_LIST(this), |
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
683 | 794 |
684 std::string TemplateURL::GetExtensionId() const { | 795 std::string TemplateURL::GetExtensionId() const { |
685 DCHECK(IsExtensionKeyword()); | 796 DCHECK(IsExtensionKeyword()); |
686 return GURL(data_.url()).host(); | 797 return GURL(data_.url()).host(); |
687 } | 798 } |
688 | 799 |
689 bool TemplateURL::IsExtensionKeyword() const { | 800 bool TemplateURL::IsExtensionKeyword() const { |
690 return GURL(data_.url()).SchemeIs(chrome::kExtensionScheme); | 801 return GURL(data_.url()).SchemeIs(chrome::kExtensionScheme); |
691 } | 802 } |
692 | 803 |
| 804 size_t TemplateURL::URLCount() const { |
| 805 DCHECK(!url().empty()); |
| 806 // Add 1 for the regular search URL. |
| 807 return data_.alternate_urls().size() + 1; |
| 808 } |
| 809 |
| 810 const std::string& TemplateURL::GetURL(size_t index) const { |
| 811 DCHECK(!url().empty()); |
| 812 DCHECK(index >= 0 && index < URLCount()); |
| 813 |
| 814 if (index < data_.alternate_urls().size()) |
| 815 return data_.alternate_urls()[index]; |
| 816 return url(); |
| 817 } |
| 818 |
| 819 bool TemplateURL::ExtractSearchTermsFromInstantExtendedURL( |
| 820 const GURL& url, string16* search_terms) { |
| 821 DCHECK(search_terms); |
| 822 search_terms->clear(); |
| 823 |
| 824 // Ensure this is an instant extended URL. |
| 825 std::string params = url.query(); |
| 826 url_parse::Component query, key, value; |
| 827 query.len = static_cast<int>(params.size()); |
| 828 bool is_instant_extended = false; |
| 829 while (url_parse::ExtractQueryKeyValue(params.c_str(), &query, &key, |
| 830 &value)) { |
| 831 if (!params.compare(key.begin, key.len, |
| 832 google_util::kInstantExtendedAPIParam)) { |
| 833 // If the parameter key is |kInstantExtendedAPIParam| and the value is |
| 834 // not 0 this is an Instant Extended API search URL. |
| 835 int int_value = 0; |
| 836 if (value.is_nonempty()) |
| 837 base::StringToInt(params.substr(value.begin, value.len), &int_value); |
| 838 if (int_value == 0) |
| 839 return false; |
| 840 is_instant_extended = true; |
| 841 break; |
| 842 } |
| 843 } |
| 844 if (!is_instant_extended) |
| 845 return false; |
| 846 |
| 847 // Then try to match with every pattern. |
| 848 for (size_t i = 0; i < URLCount(); ++i) { |
| 849 TemplateURLRef ref(this, i); |
| 850 if (ref.ExtractSearchTermsFromURL(url, search_terms)) { |
| 851 // Never accept an empty string as a valid result, but exit early if |
| 852 // one is found. This ensures 'http://google.com/?q=foo#q=' fails. |
| 853 return !search_terms->empty(); |
| 854 } |
| 855 } |
| 856 return false; |
| 857 } |
| 858 |
693 void TemplateURL::CopyFrom(const TemplateURL& other) { | 859 void TemplateURL::CopyFrom(const TemplateURL& other) { |
694 if (this == &other) | 860 if (this == &other) |
695 return; | 861 return; |
696 | 862 |
697 profile_ = other.profile_; | 863 profile_ = other.profile_; |
698 data_ = other.data_; | 864 data_ = other.data_; |
699 url_ref_.InvalidateCachedValues(); | 865 url_ref_.InvalidateCachedValues(); |
700 suggestions_url_ref_.InvalidateCachedValues(); | 866 suggestions_url_ref_.InvalidateCachedValues(); |
701 instant_url_ref_.InvalidateCachedValues(); | 867 instant_url_ref_.InvalidateCachedValues(); |
702 SetPrepopulateId(other.data_.prepopulate_id); | 868 SetPrepopulateId(other.data_.prepopulate_id); |
(...skipping 13 matching lines...) Expand all Loading... |
716 } | 882 } |
717 | 883 |
718 void TemplateURL::ResetKeywordIfNecessary(bool force) { | 884 void TemplateURL::ResetKeywordIfNecessary(bool force) { |
719 if (IsGoogleSearchURLWithReplaceableKeyword() || force) { | 885 if (IsGoogleSearchURLWithReplaceableKeyword() || force) { |
720 DCHECK(!IsExtensionKeyword()); | 886 DCHECK(!IsExtensionKeyword()); |
721 GURL url(TemplateURLService::GenerateSearchURL(this)); | 887 GURL url(TemplateURLService::GenerateSearchURL(this)); |
722 if (url.is_valid()) | 888 if (url.is_valid()) |
723 data_.SetKeyword(TemplateURLService::GenerateKeyword(url)); | 889 data_.SetKeyword(TemplateURLService::GenerateKeyword(url)); |
724 } | 890 } |
725 } | 891 } |
OLD | NEW |