Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(312)

Side by Side Diff: components/search_engines/template_url.cc

Issue 1902263002: Support fixed prefixes and suffixes when extracting terms from search template URLs. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: review Created 4 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2014 The Chromium Authors. All rights reserved. 1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/search_engines/template_url.h" 5 #include "components/search_engines/template_url.h"
6 6
7 #include <string> 7 #include <string>
8 #include <vector> 8 #include <vector>
9 9
10 #include "base/command_line.h" 10 #include "base/command_line.h"
(...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after
89 return true; 89 return true;
90 std::string encoded_original_query; 90 std::string encoded_original_query;
91 if (!base::UTF16ToCodepage(original_query, encoding, 91 if (!base::UTF16ToCodepage(original_query, encoding,
92 base::OnStringConversionError::SKIP, &encoded_original_query)) 92 base::OnStringConversionError::SKIP, &encoded_original_query))
93 return false; 93 return false;
94 *escaped_original_query = base::UTF8ToUTF16( 94 *escaped_original_query = base::UTF8ToUTF16(
95 net::EscapeQueryParamValue(encoded_original_query, true)); 95 net::EscapeQueryParamValue(encoded_original_query, true));
96 return true; 96 return true;
97 } 97 }
98 98
99 // Returns true if the search term placeholder is present, and also produces
100 // the constant prefix/suffix found.
101 bool TryMatchSearchParam(base::StringPiece text,
102 base::StringPiece pattern,
103 std::string* prefix,
104 std::string* suffix) {
105 auto pos = text.find(pattern);
106 if (pos == base::StringPiece::npos)
107 return false;
108 text.substr(0, pos).CopyToString(prefix);
109 text.substr(pos + pattern.length()).CopyToString(suffix);
110 return true;
111 }
112
99 // Extract query key and host given a list of parameters coming from the URL 113 // Extract query key and host given a list of parameters coming from the URL
100 // query or ref. 114 // query or ref.
101 std::string FindSearchTermsKey(const std::string& params) { 115 struct SearchTermsKeyResult {
116 std::string key;
117 std::string value_prefix;
118 std::string value_suffix;
119 bool found() const { return !key.empty(); }
120 };
121 SearchTermsKeyResult FindSearchTermsKey(const std::string& params) {
122 SearchTermsKeyResult result;
102 if (params.empty()) 123 if (params.empty())
103 return std::string(); 124 return result;
104 url::Component query, key, value; 125 url::Component query, key, value;
105 query.len = static_cast<int>(params.size()); 126 query.len = static_cast<int>(params.size());
106 while (url::ExtractQueryKeyValue(params.c_str(), &query, &key, &value)) { 127 while (url::ExtractQueryKeyValue(params.c_str(), &query, &key, &value)) {
107 if (key.is_nonempty() && value.is_nonempty()) { 128 if (key.is_nonempty() && value.is_nonempty()) {
108 const base::StringPiece value_string(params.c_str() + value.begin, 129 const base::StringPiece value_string(params.c_str() + value.begin,
109 value.len); 130 value.len);
110 if (value_string.find(kSearchTermsParameterFull, 0) != 131 if (TryMatchSearchParam(value_string, kSearchTermsParameterFull,
111 base::StringPiece::npos || 132 &result.value_prefix, &result.value_suffix) ||
112 value_string.find(kGoogleUnescapedSearchTermsParameterFull, 0) != 133 TryMatchSearchParam(value_string,
113 base::StringPiece::npos) { 134 kGoogleUnescapedSearchTermsParameterFull,
114 return params.substr(key.begin, key.len); 135 &result.value_prefix, &result.value_suffix)) {
136 result.key = params.substr(key.begin, key.len);
137 break;
115 } 138 }
116 } 139 }
117 } 140 }
118 return std::string(); 141 return result;
119 } 142 }
120 143
121 // Extract the position of the search terms' parameter in the URL path. 144 // Extract the position of the search terms' parameter in the URL path.
122 bool FindSearchTermsInPath(const std::string& path, 145 bool FindSearchTermsInPath(const std::string& path,
123 url::Component* parameter_position) { 146 url::Component* parameter_position) {
124 DCHECK(parameter_position); 147 DCHECK(parameter_position);
125 parameter_position->reset(); 148 parameter_position->reset();
126 const size_t begin = path.find(kSearchTermsParameterFullEscaped); 149 const size_t begin = path.find(kSearchTermsParameterFullEscaped);
127 if (begin == std::string::npos) 150 if (begin == std::string::npos)
128 return false; 151 return false;
(...skipping 370 matching lines...) Expand 10 before | Expand all | Expand 10 after
499 522
500 url::Component query, key, value; 523 url::Component query, key, value;
501 query.len = static_cast<int>(source.size()); 524 query.len = static_cast<int>(source.size());
502 bool key_found = false; 525 bool key_found = false;
503 while (url::ExtractQueryKeyValue(source.c_str(), &query, &key, &value)) { 526 while (url::ExtractQueryKeyValue(source.c_str(), &query, &key, &value)) {
504 if (key.is_nonempty()) { 527 if (key.is_nonempty()) {
505 if (source.substr(key.begin, key.len) == search_term_key_) { 528 if (source.substr(key.begin, key.len) == search_term_key_) {
506 // Fail if search term key is found twice. 529 // Fail if search term key is found twice.
507 if (key_found) 530 if (key_found)
508 return false; 531 return false;
532
533 // If the query parameter does not contain the expected prefix and
534 // suffix, then this is not a match.
535 base::StringPiece search_term =
536 base::StringPiece(source).substr(value.begin, value.len);
537 if (!search_term.starts_with(search_term_value_prefix_) ||
538 !search_term.ends_with(search_term_value_suffix_))
539 continue;
540
509 key_found = true; 541 key_found = true;
510 position = value; 542 position =
543 url::MakeRange(value.begin + search_term_value_prefix_.size(),
544 value.end() - search_term_value_suffix_.size());
511 } 545 }
512 } 546 }
513 } 547 }
514 if (!key_found) 548 if (!key_found)
515 return false; 549 return false;
516 } 550 }
517 551
518 // Extract the search term. 552 // Extract the search term.
519 *search_terms = SearchTermToString16( 553 *search_terms =
520 source.substr(position.begin, position.len)); 554 SearchTermToString16(source.substr(position.begin, position.len));
521 if (search_terms_component) 555 if (search_terms_component)
522 *search_terms_component = search_term_key_location_; 556 *search_terms_component = search_term_key_location_;
523 if (search_terms_position) 557 if (search_terms_position)
524 *search_terms_position = position; 558 *search_terms_position = position;
525 return true; 559 return true;
526 } 560 }
527 561
528 void TemplateURLRef::InvalidateCachedValues() const { 562 void TemplateURLRef::InvalidateCachedValues() const {
529 supports_replacements_ = valid_ = parsed_ = false; 563 supports_replacements_ = valid_ = parsed_ = false;
530 host_.clear(); 564 host_.clear();
(...skipping 245 matching lines...) Expand 10 before | Expand all | Expand 10 after
776 &url_string, 0, "{google:baseURL}", 810 &url_string, 0, "{google:baseURL}",
777 search_terms_data.GoogleBaseURLValue()); 811 search_terms_data.GoogleBaseURLValue());
778 base::ReplaceSubstringsAfterOffset( 812 base::ReplaceSubstringsAfterOffset(
779 &url_string, 0, "{google:baseSuggestURL}", 813 &url_string, 0, "{google:baseSuggestURL}",
780 search_terms_data.GoogleBaseSuggestURLValue()); 814 search_terms_data.GoogleBaseSuggestURLValue());
781 815
782 GURL url(url_string); 816 GURL url(url_string);
783 if (!url.is_valid()) 817 if (!url.is_valid())
784 return; 818 return;
785 819
786 std::string query_key = FindSearchTermsKey(url.query()); 820 auto query_result = FindSearchTermsKey(url.query());
787 std::string ref_key = FindSearchTermsKey(url.ref()); 821 auto ref_result = FindSearchTermsKey(url.ref());
788 url::Component parameter_position; 822 url::Component parameter_position;
789 const bool in_query = !query_key.empty(); 823 const bool in_query = query_result.found();
790 const bool in_ref = !ref_key.empty(); 824 const bool in_ref = ref_result.found();
791 const bool in_path = FindSearchTermsInPath(url.path(), &parameter_position); 825 const bool in_path = FindSearchTermsInPath(url.path(), &parameter_position);
792 if (in_query ? (in_ref || in_path) : (in_ref == in_path)) 826 if (in_query ? (in_ref || in_path) : (in_ref == in_path))
793 return; // No key or multiple keys found. We only handle having one key. 827 return; // No key or multiple keys found. We only handle having one key.
794 828
795 host_ = url.host(); 829 host_ = url.host();
796 port_ = url.port(); 830 port_ = url.port();
797 path_ = url.path(); 831 path_ = url.path();
798 if (in_query) { 832 if (in_query) {
799 search_term_key_ = query_key; 833 search_term_key_ = query_result.key;
800 search_term_key_location_ = url::Parsed::QUERY; 834 search_term_key_location_ = url::Parsed::QUERY;
835 search_term_value_prefix_ = query_result.value_prefix;
836 search_term_value_suffix_ = query_result.value_suffix;
801 } else if (in_ref) { 837 } else if (in_ref) {
802 search_term_key_ = ref_key; 838 search_term_key_ = ref_result.key;
803 search_term_key_location_ = url::Parsed::REF; 839 search_term_key_location_ = url::Parsed::REF;
840 search_term_value_prefix_ = ref_result.value_prefix;
841 search_term_value_suffix_ = ref_result.value_suffix;
804 } else { 842 } else {
805 DCHECK(in_path); 843 DCHECK(in_path);
806 DCHECK_GE(parameter_position.begin, 1); // Path must start with '/'. 844 DCHECK_GE(parameter_position.begin, 1); // Path must start with '/'.
807 search_term_key_location_ = url::Parsed::PATH; 845 search_term_key_location_ = url::Parsed::PATH;
808 search_term_position_in_path_ = parameter_position.begin; 846 search_term_position_in_path_ = parameter_position.begin;
809 // Remove the "{searchTerms}" itself from |path_|. 847 // Remove the "{searchTerms}" itself from |path_|.
810 path_.erase(parameter_position.begin, parameter_position.len); 848 path_.erase(parameter_position.begin, parameter_position.len);
811 } 849 }
812 } 850 }
813 851
(...skipping 683 matching lines...) Expand 10 before | Expand all | Expand 10 after
1497 // patterns. This means that given patterns 1535 // patterns. This means that given patterns
1498 // [ "http://foo/#q={searchTerms}", "http://foo/?q={searchTerms}" ], 1536 // [ "http://foo/#q={searchTerms}", "http://foo/?q={searchTerms}" ],
1499 // calling ExtractSearchTermsFromURL() on "http://foo/?q=bar#q=' would 1537 // calling ExtractSearchTermsFromURL() on "http://foo/?q=bar#q=' would
1500 // return false. This is important for at least Google, where such URLs 1538 // return false. This is important for at least Google, where such URLs
1501 // are invalid. 1539 // are invalid.
1502 return !search_terms->empty(); 1540 return !search_terms->empty();
1503 } 1541 }
1504 } 1542 }
1505 return false; 1543 return false;
1506 } 1544 }
OLDNEW
« no previous file with comments | « components/search_engines/template_url.h ('k') | components/search_engines/template_url_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698