Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(997)

Side by Side Diff: components/search_engines/template_url.cc

Issue 2877983002: Use search_term_value_prefix_ & search_term_value_suffix_ to specify the location of {searchTerms} … (Closed)
Patch Set: Fix unit-test Created 3 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2014 The Chromium Authors. All rights reserved. 1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/search_engines/template_url.h" 5 #include "components/search_engines/template_url.h"
6 6
7 #include <string> 7 #include <string>
8 #include <vector> 8 #include <vector>
9 9
10 #include "base/command_line.h" 10 #include "base/command_line.h"
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after
91 return true; 91 return true;
92 std::string encoded_original_query; 92 std::string encoded_original_query;
93 if (!base::UTF16ToCodepage(original_query, encoding, 93 if (!base::UTF16ToCodepage(original_query, encoding,
94 base::OnStringConversionError::SKIP, &encoded_original_query)) 94 base::OnStringConversionError::SKIP, &encoded_original_query))
95 return false; 95 return false;
96 *escaped_original_query = base::UTF8ToUTF16( 96 *escaped_original_query = base::UTF8ToUTF16(
97 net::EscapeQueryParamValue(encoded_original_query, true)); 97 net::EscapeQueryParamValue(encoded_original_query, true));
98 return true; 98 return true;
99 } 99 }
100 100
101 // Returns true if the search term placeholder is present, and also produces 101 // Finds the position of the search terms' parameter in the URL component.
102 // the constant prefix/suffix found. 102 class SearchTermLocation {
103 bool TryMatchSearchParam(base::StringPiece text, 103 public:
104 base::StringPiece pattern, 104 SearchTermLocation(const base::StringPiece& url_component,
105 std::string* prefix, 105 url::Parsed::ComponentType url_component_type)
106 std::string* suffix) { 106 : found_(false) {
107 auto pos = text.find(pattern); 107 if (url_component_type == url::Parsed::PATH) {
108 if (pos == base::StringPiece::npos) 108 // GURL's constructor escapes "{" and "}" in the path of a passed string.
109 return false; 109 found_ =
110 text.substr(0, pos).CopyToString(prefix); 110 TryMatchSearchParam(url_component, kSearchTermsParameterFullEscaped);
111 text.substr(pos + pattern.length()).CopyToString(suffix); 111 } else {
112 return true; 112 DCHECK((url_component_type == url::Parsed::QUERY) ||
113 } 113 (url_component_type == url::Parsed::REF));
114 114 url::Component query, key, value;
115 // Extract query key and host given a list of parameters coming from the URL 115 query.len = static_cast<int>(url_component.size());
116 // query or ref. 116 while (url::ExtractQueryKeyValue(url_component.data(), &query, &key,
117 struct SearchTermsKeyResult { 117 &value)) {
118 std::string key; 118 if (key.is_nonempty() && value.is_nonempty()) {
119 std::string value_prefix; 119 const base::StringPiece value_string =
120 std::string value_suffix; 120 url_component.substr(value.begin, value.len);
121 bool found() const { return !key.empty(); } 121 if (TryMatchSearchParam(value_string, kSearchTermsParameterFull) ||
122 }; 122 TryMatchSearchParam(value_string,
123 SearchTermsKeyResult FindSearchTermsKey(const std::string& params) { 123 kGoogleUnescapedSearchTermsParameterFull)) {
124 SearchTermsKeyResult result; 124 found_ = true;
125 if (params.empty()) 125 url_component.substr(key.begin, key.len).CopyToString(&key_);
126 return result; 126 break;
127 url::Component query, key, value; 127 }
128 query.len = static_cast<int>(params.size()); 128 }
129 while (url::ExtractQueryKeyValue(params.c_str(), &query, &key, &value)) {
130 if (key.is_nonempty() && value.is_nonempty()) {
131 const base::StringPiece value_string(params.c_str() + value.begin,
132 value.len);
133 if (TryMatchSearchParam(value_string, kSearchTermsParameterFull,
134 &result.value_prefix, &result.value_suffix) ||
135 TryMatchSearchParam(value_string,
136 kGoogleUnescapedSearchTermsParameterFull,
137 &result.value_prefix, &result.value_suffix)) {
138 result.key = params.substr(key.begin, key.len);
139 break;
140 } 129 }
141 } 130 }
142 } 131 }
143 return result;
144 }
145 132
146 // Extract the position of the search terms' parameter in the URL path. 133 bool found() const { return found_; }
147 bool FindSearchTermsInPath(const std::string& path, 134 const std::string& key() const { return key_; }
148 url::Component* parameter_position) { 135 const std::string& value_prefix() const { return value_prefix_; }
149 DCHECK(parameter_position); 136 const std::string& value_suffix() const { return value_suffix_; }
150 parameter_position->reset(); 137
151 const size_t begin = path.find(kSearchTermsParameterFullEscaped); 138 private:
152 if (begin == std::string::npos) 139 // Returns true if the search term placeholder is present, and also assigns
153 return false; 140 // the constant prefix/suffix found.
154 parameter_position->begin = begin; 141 bool TryMatchSearchParam(const base::StringPiece& value,
155 parameter_position->len = arraysize(kSearchTermsParameterFullEscaped) - 1; 142 const base::StringPiece& pattern) {
156 return true; 143 size_t pos = value.find(pattern);
157 } 144 if (pos == base::StringPiece::npos)
145 return false;
146 value.substr(0, pos).CopyToString(&value_prefix_);
147 value.substr(pos + pattern.length()).CopyToString(&value_suffix_);
148 return true;
149 }
150
151 bool found_;
152 std::string key_;
153 std::string value_prefix_;
154 std::string value_suffix_;
155
156 DISALLOW_COPY_AND_ASSIGN(SearchTermLocation);
157 };
158 158
159 bool IsTemplateParameterString(const std::string& param) { 159 bool IsTemplateParameterString(const std::string& param) {
160 return (param.length() > 2) && (*(param.begin()) == kStartParameter) && 160 return (param.length() > 2) && (*(param.begin()) == kStartParameter) &&
161 (*(param.rbegin()) == kEndParameter); 161 (*(param.rbegin()) == kEndParameter);
162 } 162 }
163 163
164 } // namespace 164 } // namespace
165 165
166 // TemplateURLRef::SearchTermsArgs -------------------------------------------- 166 // TemplateURLRef::SearchTermsArgs --------------------------------------------
167 167
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
205 205
206 // TemplateURLRef ------------------------------------------------------------- 206 // TemplateURLRef -------------------------------------------------------------
207 207
208 TemplateURLRef::TemplateURLRef(const TemplateURL* owner, Type type) 208 TemplateURLRef::TemplateURLRef(const TemplateURL* owner, Type type)
209 : owner_(owner), 209 : owner_(owner),
210 type_(type), 210 type_(type),
211 index_in_owner_(0), 211 index_in_owner_(0),
212 parsed_(false), 212 parsed_(false),
213 valid_(false), 213 valid_(false),
214 supports_replacements_(false), 214 supports_replacements_(false),
215 search_term_position_in_path_(std::string::npos),
216 search_term_key_location_(url::Parsed::QUERY), 215 search_term_key_location_(url::Parsed::QUERY),
217 prepopulated_(false) { 216 prepopulated_(false) {
218 DCHECK(owner_); 217 DCHECK(owner_);
219 DCHECK_NE(INDEXED, type_); 218 DCHECK_NE(INDEXED, type_);
220 } 219 }
221 220
222 TemplateURLRef::TemplateURLRef(const TemplateURL* owner, size_t index_in_owner) 221 TemplateURLRef::TemplateURLRef(const TemplateURL* owner, size_t index_in_owner)
223 : owner_(owner), 222 : owner_(owner),
224 type_(INDEXED), 223 type_(INDEXED),
225 index_in_owner_(index_in_owner), 224 index_in_owner_(index_in_owner),
226 parsed_(false), 225 parsed_(false),
227 valid_(false), 226 valid_(false),
228 supports_replacements_(false), 227 supports_replacements_(false),
229 search_term_position_in_path_(std::string::npos),
230 search_term_key_location_(url::Parsed::QUERY), 228 search_term_key_location_(url::Parsed::QUERY),
231 prepopulated_(false) { 229 prepopulated_(false) {
232 DCHECK(owner_); 230 DCHECK(owner_);
233 DCHECK_LT(index_in_owner_, owner_->alternate_urls().size()); 231 DCHECK_LT(index_in_owner_, owner_->alternate_urls().size());
234 } 232 }
235 233
236 TemplateURLRef::~TemplateURLRef() { 234 TemplateURLRef::~TemplateURLRef() {
237 } 235 }
238 236
239 TemplateURLRef::TemplateURLRef(const TemplateURLRef& source) = default; 237 TemplateURLRef::TemplateURLRef(const TemplateURLRef& source) = default;
(...skipping 146 matching lines...) Expand 10 before | Expand all | Expand 10 after
386 ParseIfNecessary(search_terms_data); 384 ParseIfNecessary(search_terms_data);
387 return path_; 385 return path_;
388 } 386 }
389 387
390 const std::string& TemplateURLRef::GetSearchTermKey( 388 const std::string& TemplateURLRef::GetSearchTermKey(
391 const SearchTermsData& search_terms_data) const { 389 const SearchTermsData& search_terms_data) const {
392 ParseIfNecessary(search_terms_data); 390 ParseIfNecessary(search_terms_data);
393 return search_term_key_; 391 return search_term_key_;
394 } 392 }
395 393
396 size_t TemplateURLRef::GetSearchTermPositionInPath(
397 const SearchTermsData& search_terms_data) const {
398 ParseIfNecessary(search_terms_data);
399 return search_term_position_in_path_;
400 }
401
402 url::Parsed::ComponentType TemplateURLRef::GetSearchTermKeyLocation( 394 url::Parsed::ComponentType TemplateURLRef::GetSearchTermKeyLocation(
403 const SearchTermsData& search_terms_data) const { 395 const SearchTermsData& search_terms_data) const {
404 ParseIfNecessary(search_terms_data); 396 ParseIfNecessary(search_terms_data);
405 return search_term_key_location_; 397 return search_term_key_location_;
406 } 398 }
407 399
400 const std::string& TemplateURLRef::GetSearchTermValuePrefix(
401 const SearchTermsData& search_terms_data) const {
402 ParseIfNecessary(search_terms_data);
403 return search_term_value_prefix_;
404 }
405
406 const std::string& TemplateURLRef::GetSearchTermValueSuffix(
407 const SearchTermsData& search_terms_data) const {
408 ParseIfNecessary(search_terms_data);
409 return search_term_value_suffix_;
410 }
411
408 base::string16 TemplateURLRef::SearchTermToString16( 412 base::string16 TemplateURLRef::SearchTermToString16(
409 const std::string& term) const { 413 const base::StringPiece& term) const {
410 const std::vector<std::string>& encodings = owner_->input_encodings(); 414 const std::vector<std::string>& encodings = owner_->input_encodings();
411 base::string16 result; 415 base::string16 result;
412 416
413 net::UnescapeRule::Type unescape_rules = 417 net::UnescapeRule::Type unescape_rules =
414 net::UnescapeRule::SPACES | net::UnescapeRule::PATH_SEPARATORS | 418 net::UnescapeRule::SPACES | net::UnescapeRule::PATH_SEPARATORS |
415 net::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS; 419 net::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS;
416 if (search_term_key_location_ != url::Parsed::PATH) 420 if (search_term_key_location_ != url::Parsed::PATH)
417 unescape_rules |= net::UnescapeRule::REPLACE_PLUS_WITH_SPACE; 421 unescape_rules |= net::UnescapeRule::REPLACE_PLUS_WITH_SPACE;
418 422
419 std::string unescaped = net::UnescapeURLComponent(term, unescape_rules); 423 std::string unescaped = net::UnescapeURLComponent(term, unescape_rules);
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after
465 return false; 469 return false;
466 470
467 // Host, port, and path must match. 471 // Host, port, and path must match.
468 if ((url.host() != host_) || 472 if ((url.host() != host_) ||
469 (url.port() != port_) || 473 (url.port() != port_) ||
470 ((url.path() != path_) && 474 ((url.path() != path_) &&
471 (search_term_key_location_ != url::Parsed::PATH))) { 475 (search_term_key_location_ != url::Parsed::PATH))) {
472 return false; 476 return false;
473 } 477 }
474 478
475 std::string source; 479 base::StringPiece source;
476 url::Component position; 480 url::Component position;
477 481
478 if (search_term_key_location_ == url::Parsed::PATH) { 482 if (search_term_key_location_ == url::Parsed::PATH) {
479 source = url.path(); 483 source = url.path_piece();
480 484
481 // Characters in the path before and after search terms must match. 485 // If the path does not contain the expected prefix and suffix, then this is
482 if (source.length() < path_.length()) 486 // not a match.
487 if (source.size() < (search_term_value_prefix_.size() +
488 search_term_value_suffix_.size()) ||
489 !source.starts_with(search_term_value_prefix_) ||
490 !source.ends_with(search_term_value_suffix_))
483 return false; 491 return false;
484 position.begin = search_term_position_in_path_; 492 position =
485 position.len = source.length() - path_.length(); 493 url::MakeRange(search_term_value_prefix_.size(),
486 if (source.substr(0, position.begin) + source.substr(position.end()) != 494 source.length() - search_term_value_suffix_.size());
487 path_)
488 return false;
489 } else { 495 } else {
490 DCHECK(search_term_key_location_ == url::Parsed::QUERY || 496 DCHECK(search_term_key_location_ == url::Parsed::QUERY ||
491 search_term_key_location_ == url::Parsed::REF); 497 search_term_key_location_ == url::Parsed::REF);
492 source = (search_term_key_location_ == url::Parsed::QUERY) ? 498 source = (search_term_key_location_ == url::Parsed::QUERY)
493 url.query() : url.ref(); 499 ? url.query_piece()
500 : url.ref_piece();
494 501
495 url::Component query, key, value; 502 url::Component query, key, value;
496 query.len = static_cast<int>(source.size()); 503 query.len = static_cast<int>(source.size());
497 bool key_found = false; 504 bool key_found = false;
498 while (url::ExtractQueryKeyValue(source.c_str(), &query, &key, &value)) { 505 while (url::ExtractQueryKeyValue(source.data(), &query, &key, &value)) {
499 if (key.is_nonempty()) { 506 if (key.is_nonempty()) {
500 if (source.substr(key.begin, key.len) == search_term_key_) { 507 if (source.substr(key.begin, key.len) == search_term_key_) {
501 // Fail if search term key is found twice. 508 // Fail if search term key is found twice.
502 if (key_found) 509 if (key_found)
503 return false; 510 return false;
504 511
505 // If the query parameter does not contain the expected prefix and 512 // If the query parameter does not contain the expected prefix and
506 // suffix, then this is not a match. 513 // suffix, then this is not a match.
507 base::StringPiece search_term = 514 base::StringPiece search_term =
508 base::StringPiece(source).substr(value.begin, value.len); 515 base::StringPiece(source).substr(value.begin, value.len);
(...skipping 23 matching lines...) Expand all
532 *search_terms_position = position; 539 *search_terms_position = position;
533 return true; 540 return true;
534 } 541 }
535 542
536 void TemplateURLRef::InvalidateCachedValues() const { 543 void TemplateURLRef::InvalidateCachedValues() const {
537 supports_replacements_ = valid_ = parsed_ = false; 544 supports_replacements_ = valid_ = parsed_ = false;
538 host_.clear(); 545 host_.clear();
539 port_.clear(); 546 port_.clear();
540 path_.clear(); 547 path_.clear();
541 search_term_key_.clear(); 548 search_term_key_.clear();
542 search_term_position_in_path_ = std::string::npos;
543 search_term_key_location_ = url::Parsed::QUERY; 549 search_term_key_location_ = url::Parsed::QUERY;
550 search_term_value_prefix_.clear();
551 search_term_value_suffix_.clear();
544 replacements_.clear(); 552 replacements_.clear();
545 post_params_.clear(); 553 post_params_.clear();
546 } 554 }
547 555
548 bool TemplateURLRef::ParseParameter(size_t start, 556 bool TemplateURLRef::ParseParameter(size_t start,
549 size_t end, 557 size_t end,
550 std::string* url, 558 std::string* url,
551 Replacements* replacements) const { 559 Replacements* replacements) const {
552 DCHECK(start != std::string::npos && 560 DCHECK(start != std::string::npos &&
553 end != std::string::npos && end > start); 561 end != std::string::npos && end > start);
(...skipping 230 matching lines...) Expand 10 before | Expand all | Expand 10 after
784 &url_string, 0, "{google:baseURL}", 792 &url_string, 0, "{google:baseURL}",
785 search_terms_data.GoogleBaseURLValue()); 793 search_terms_data.GoogleBaseURLValue());
786 base::ReplaceSubstringsAfterOffset( 794 base::ReplaceSubstringsAfterOffset(
787 &url_string, 0, "{google:baseSuggestURL}", 795 &url_string, 0, "{google:baseSuggestURL}",
788 search_terms_data.GoogleBaseSuggestURLValue()); 796 search_terms_data.GoogleBaseSuggestURLValue());
789 797
790 GURL url(url_string); 798 GURL url(url_string);
791 if (!url.is_valid()) 799 if (!url.is_valid())
792 return; 800 return;
793 801
794 auto query_result = FindSearchTermsKey(url.query()); 802 SearchTermLocation query_result(url.query_piece(), url::Parsed::QUERY);
795 auto ref_result = FindSearchTermsKey(url.ref()); 803 SearchTermLocation ref_result(url.ref_piece(), url::Parsed::REF);
796 url::Component parameter_position; 804 SearchTermLocation path_result(url.path_piece(), url::Parsed::PATH);
797 const bool in_query = query_result.found(); 805 const bool in_query = query_result.found();
798 const bool in_ref = ref_result.found(); 806 const bool in_ref = ref_result.found();
799 const bool in_path = FindSearchTermsInPath(url.path(), &parameter_position); 807 const bool in_path = path_result.found();
800 if (in_query ? (in_ref || in_path) : (in_ref == in_path)) 808 if (in_query ? (in_ref || in_path) : (in_ref == in_path))
801 return; // No key or multiple keys found. We only handle having one key. 809 return; // No key or multiple keys found. We only handle having one key.
802 810
803 host_ = url.host(); 811 host_ = url.host();
804 port_ = url.port(); 812 port_ = url.port();
805 path_ = url.path();
806 if (in_query) { 813 if (in_query) {
807 search_term_key_ = query_result.key;
808 search_term_key_location_ = url::Parsed::QUERY; 814 search_term_key_location_ = url::Parsed::QUERY;
809 search_term_value_prefix_ = query_result.value_prefix; 815 search_term_key_ = query_result.key();
810 search_term_value_suffix_ = query_result.value_suffix; 816 search_term_value_prefix_ = query_result.value_prefix();
817 search_term_value_suffix_ = query_result.value_suffix();
818 path_ = url.path();
811 } else if (in_ref) { 819 } else if (in_ref) {
812 search_term_key_ = ref_result.key;
813 search_term_key_location_ = url::Parsed::REF; 820 search_term_key_location_ = url::Parsed::REF;
814 search_term_value_prefix_ = ref_result.value_prefix; 821 search_term_key_ = ref_result.key();
815 search_term_value_suffix_ = ref_result.value_suffix; 822 search_term_value_prefix_ = ref_result.value_prefix();
823 search_term_value_suffix_ = ref_result.value_suffix();
824 path_ = url.path();
816 } else { 825 } else {
817 DCHECK(in_path); 826 DCHECK(in_path);
818 DCHECK_GE(parameter_position.begin, 1); // Path must start with '/'.
819 search_term_key_location_ = url::Parsed::PATH; 827 search_term_key_location_ = url::Parsed::PATH;
820 search_term_position_in_path_ = parameter_position.begin; 828 search_term_value_prefix_ = path_result.value_prefix();
821 // Remove the "{searchTerms}" itself from |path_|. 829 search_term_value_suffix_ = path_result.value_suffix();
822 path_.erase(parameter_position.begin, parameter_position.len);
823 } 830 }
824 } 831 }
825 832
826 void TemplateURLRef::HandleReplacement(const std::string& name, 833 void TemplateURLRef::HandleReplacement(const std::string& name,
827 const std::string& value, 834 const std::string& value,
828 const Replacement& replacement, 835 const Replacement& replacement,
829 std::string* url) const { 836 std::string* url) const {
830 size_t pos = replacement.index; 837 size_t pos = replacement.index;
831 if (replacement.is_post_param) { 838 if (replacement.is_post_param) {
832 DCHECK_LT(pos, post_params_.size()); 839 DCHECK_LT(pos, post_params_.size());
(...skipping 689 matching lines...) Expand 10 before | Expand all | Expand 10 after
1522 // patterns. This means that given patterns 1529 // patterns. This means that given patterns
1523 // [ "http://foo/#q={searchTerms}", "http://foo/?q={searchTerms}" ], 1530 // [ "http://foo/#q={searchTerms}", "http://foo/?q={searchTerms}" ],
1524 // calling ExtractSearchTermsFromURL() on "http://foo/?q=bar#q=' would 1531 // calling ExtractSearchTermsFromURL() on "http://foo/?q=bar#q=' would
1525 // return false. This is important for at least Google, where such URLs 1532 // return false. This is important for at least Google, where such URLs
1526 // are invalid. 1533 // are invalid.
1527 return !search_terms->empty(); 1534 return !search_terms->empty();
1528 } 1535 }
1529 } 1536 }
1530 return false; 1537 return false;
1531 } 1538 }
OLDNEW
« no previous file with comments | « components/search_engines/template_url.h ('k') | components/search_engines/template_url_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698