Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(137)

Side by Side Diff: components/search_engines/template_url.cc

Issue 2877983002: Use search_term_value_prefix_ & search_term_value_suffix_ to specify the location of {searchTerms} … (Closed)
Patch Set: Created 3 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2014 The Chromium Authors. All rights reserved. 1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/search_engines/template_url.h" 5 #include "components/search_engines/template_url.h"
6 6
7 #include <string> 7 #include <string>
8 #include <vector> 8 #include <vector>
9 9
10 #include "base/command_line.h" 10 #include "base/command_line.h"
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after
91 return true; 91 return true;
92 std::string encoded_original_query; 92 std::string encoded_original_query;
93 if (!base::UTF16ToCodepage(original_query, encoding, 93 if (!base::UTF16ToCodepage(original_query, encoding,
94 base::OnStringConversionError::SKIP, &encoded_original_query)) 94 base::OnStringConversionError::SKIP, &encoded_original_query))
95 return false; 95 return false;
96 *escaped_original_query = base::UTF8ToUTF16( 96 *escaped_original_query = base::UTF8ToUTF16(
97 net::EscapeQueryParamValue(encoded_original_query, true)); 97 net::EscapeQueryParamValue(encoded_original_query, true));
98 return true; 98 return true;
99 } 99 }
100 100
101 // Returns true if the search term placeholder is present, and also produces 101 // Finds the position of the search terms' parameter in the URL component.
102 // the constant prefix/suffix found. 102 class SearchTermLocation {
103 bool TryMatchSearchParam(base::StringPiece text, 103 public:
104 base::StringPiece pattern, 104 SearchTermLocation(const base::StringPiece& url_component,
105 std::string* prefix, 105 url::Parsed::ComponentType url_component_type)
106 std::string* suffix) { 106 : found_(false) {
107 auto pos = text.find(pattern); 107 if (url_component_type == url::Parsed::PATH) {
108 if (pos == base::StringPiece::npos) 108 // GURL's constructor escapes "{" and "}" in the path of a passed string.
109 return false; 109 found_ =
110 text.substr(0, pos).CopyToString(prefix); 110 TryMatchSearchParam(url_component, kSearchTermsParameterFullEscaped);
111 text.substr(pos + pattern.length()).CopyToString(suffix); 111 } else {
112 return true; 112 DCHECK((url_component_type == url::Parsed::QUERY) ||
113 } 113 (url_component_type == url::Parsed::REF));
114 114 url::Component query, key, value;
115 // Extract query key and host given a list of parameters coming from the URL 115 query.len = static_cast<int>(url_component.size());
116 // query or ref. 116 while (url::ExtractQueryKeyValue(url_component.data(), &query, &key,
117 struct SearchTermsKeyResult { 117 &value)) {
118 std::string key; 118 if (key.is_nonempty() && value.is_nonempty()) {
119 std::string value_prefix; 119 const base::StringPiece value_string =
120 std::string value_suffix; 120 url_component.substr(value.begin, value.len);
121 bool found() const { return !key.empty(); } 121 if (TryMatchSearchParam(value_string, kSearchTermsParameterFull) ||
122 }; 122 TryMatchSearchParam(value_string,
123 SearchTermsKeyResult FindSearchTermsKey(const std::string& params) { 123 kGoogleUnescapedSearchTermsParameterFull)) {
124 SearchTermsKeyResult result; 124 found_ = true;
125 if (params.empty()) 125 url_component.substr(key.begin, key.len).CopyToString(&key_);
126 return result; 126 break;
127 url::Component query, key, value; 127 }
128 query.len = static_cast<int>(params.size()); 128 }
129 while (url::ExtractQueryKeyValue(params.c_str(), &query, &key, &value)) {
130 if (key.is_nonempty() && value.is_nonempty()) {
131 const base::StringPiece value_string(params.c_str() + value.begin,
132 value.len);
133 if (TryMatchSearchParam(value_string, kSearchTermsParameterFull,
134 &result.value_prefix, &result.value_suffix) ||
135 TryMatchSearchParam(value_string,
136 kGoogleUnescapedSearchTermsParameterFull,
137 &result.value_prefix, &result.value_suffix)) {
138 result.key = params.substr(key.begin, key.len);
139 break;
140 } 129 }
141 } 130 }
142 } 131 }
143 return result;
144 }
145 132
146 // Extract the position of the search terms' parameter in the URL path. 133 bool found() const { return found_; }
147 bool FindSearchTermsInPath(const std::string& path, 134 const std::string& key() const { return key_; }
148 url::Component* parameter_position) { 135 const std::string& value_prefix() const { return value_prefix_; }
149 DCHECK(parameter_position); 136 const std::string& value_suffix() const { return value_suffix_; }
150 parameter_position->reset(); 137
151 const size_t begin = path.find(kSearchTermsParameterFullEscaped); 138 private:
152 if (begin == std::string::npos) 139 // Returns true if the search term placeholder is present, and also assigns
153 return false; 140 // the constant prefix/suffix found.
154 parameter_position->begin = begin; 141 bool TryMatchSearchParam(const base::StringPiece& value,
155 parameter_position->len = arraysize(kSearchTermsParameterFullEscaped) - 1; 142 const base::StringPiece& pattern) {
156 return true; 143 size_t pos = value.find(pattern);
157 } 144 if (pos == base::StringPiece::npos)
145 return false;
146 value.substr(0, pos).CopyToString(&value_prefix_);
147 value.substr(pos + pattern.length()).CopyToString(&value_suffix_);
148 return true;
149 }
150
151 bool found_;
152 std::string key_;
153 std::string value_prefix_;
154 std::string value_suffix_;
155 };
Peter Kasting 2017/05/12 21:17:46 Nit: DISALLOW_COPY_AND_ASSIGN
Vitaly Baranov 2017/05/15 07:55:34 Done.
158 156
159 bool IsTemplateParameterString(const std::string& param) { 157 bool IsTemplateParameterString(const std::string& param) {
160 return (param.length() > 2) && (*(param.begin()) == kStartParameter) && 158 return (param.length() > 2) && (*(param.begin()) == kStartParameter) &&
161 (*(param.rbegin()) == kEndParameter); 159 (*(param.rbegin()) == kEndParameter);
162 } 160 }
163 161
164 } // namespace 162 } // namespace
165 163
166 // TemplateURLRef::SearchTermsArgs -------------------------------------------- 164 // TemplateURLRef::SearchTermsArgs --------------------------------------------
167 165
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
205 203
206 // TemplateURLRef ------------------------------------------------------------- 204 // TemplateURLRef -------------------------------------------------------------
207 205
208 TemplateURLRef::TemplateURLRef(const TemplateURL* owner, Type type) 206 TemplateURLRef::TemplateURLRef(const TemplateURL* owner, Type type)
209 : owner_(owner), 207 : owner_(owner),
210 type_(type), 208 type_(type),
211 index_in_owner_(0), 209 index_in_owner_(0),
212 parsed_(false), 210 parsed_(false),
213 valid_(false), 211 valid_(false),
214 supports_replacements_(false), 212 supports_replacements_(false),
215 search_term_position_in_path_(std::string::npos),
216 search_term_key_location_(url::Parsed::QUERY), 213 search_term_key_location_(url::Parsed::QUERY),
217 prepopulated_(false) { 214 prepopulated_(false) {
218 DCHECK(owner_); 215 DCHECK(owner_);
219 DCHECK_NE(INDEXED, type_); 216 DCHECK_NE(INDEXED, type_);
220 } 217 }
221 218
222 TemplateURLRef::TemplateURLRef(const TemplateURL* owner, size_t index_in_owner) 219 TemplateURLRef::TemplateURLRef(const TemplateURL* owner, size_t index_in_owner)
223 : owner_(owner), 220 : owner_(owner),
224 type_(INDEXED), 221 type_(INDEXED),
225 index_in_owner_(index_in_owner), 222 index_in_owner_(index_in_owner),
226 parsed_(false), 223 parsed_(false),
227 valid_(false), 224 valid_(false),
228 supports_replacements_(false), 225 supports_replacements_(false),
229 search_term_position_in_path_(std::string::npos),
230 search_term_key_location_(url::Parsed::QUERY), 226 search_term_key_location_(url::Parsed::QUERY),
231 prepopulated_(false) { 227 prepopulated_(false) {
232 DCHECK(owner_); 228 DCHECK(owner_);
233 DCHECK_LT(index_in_owner_, owner_->alternate_urls().size()); 229 DCHECK_LT(index_in_owner_, owner_->alternate_urls().size());
234 } 230 }
235 231
236 TemplateURLRef::~TemplateURLRef() { 232 TemplateURLRef::~TemplateURLRef() {
237 } 233 }
238 234
239 TemplateURLRef::TemplateURLRef(const TemplateURLRef& source) = default; 235 TemplateURLRef::TemplateURLRef(const TemplateURLRef& source) = default;
(...skipping 146 matching lines...) Expand 10 before | Expand all | Expand 10 after
386 ParseIfNecessary(search_terms_data); 382 ParseIfNecessary(search_terms_data);
387 return path_; 383 return path_;
388 } 384 }
389 385
390 const std::string& TemplateURLRef::GetSearchTermKey( 386 const std::string& TemplateURLRef::GetSearchTermKey(
391 const SearchTermsData& search_terms_data) const { 387 const SearchTermsData& search_terms_data) const {
392 ParseIfNecessary(search_terms_data); 388 ParseIfNecessary(search_terms_data);
393 return search_term_key_; 389 return search_term_key_;
394 } 390 }
395 391
396 size_t TemplateURLRef::GetSearchTermPositionInPath(
397 const SearchTermsData& search_terms_data) const {
398 ParseIfNecessary(search_terms_data);
399 return search_term_position_in_path_;
400 }
401
402 url::Parsed::ComponentType TemplateURLRef::GetSearchTermKeyLocation( 392 url::Parsed::ComponentType TemplateURLRef::GetSearchTermKeyLocation(
403 const SearchTermsData& search_terms_data) const { 393 const SearchTermsData& search_terms_data) const {
404 ParseIfNecessary(search_terms_data); 394 ParseIfNecessary(search_terms_data);
405 return search_term_key_location_; 395 return search_term_key_location_;
406 } 396 }
407 397
398 const std::string& TemplateURLRef::GetSearchTermValuePrefix(
399 const SearchTermsData& search_terms_data) const {
400 ParseIfNecessary(search_terms_data);
401 return search_term_value_prefix_;
402 }
403
404 const std::string& TemplateURLRef::GetSearchTermValueSuffix(
405 const SearchTermsData& search_terms_data) const {
406 ParseIfNecessary(search_terms_data);
407 return search_term_value_suffix_;
408 }
409
408 base::string16 TemplateURLRef::SearchTermToString16( 410 base::string16 TemplateURLRef::SearchTermToString16(
409 const std::string& term) const { 411 const base::StringPiece& term) const {
410 const std::vector<std::string>& encodings = owner_->input_encodings(); 412 const std::vector<std::string>& encodings = owner_->input_encodings();
411 base::string16 result; 413 base::string16 result;
412 414
413 net::UnescapeRule::Type unescape_rules = 415 net::UnescapeRule::Type unescape_rules =
414 net::UnescapeRule::SPACES | net::UnescapeRule::PATH_SEPARATORS | 416 net::UnescapeRule::SPACES | net::UnescapeRule::PATH_SEPARATORS |
415 net::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS; 417 net::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS;
416 if (search_term_key_location_ != url::Parsed::PATH) 418 if (search_term_key_location_ != url::Parsed::PATH)
417 unescape_rules |= net::UnescapeRule::REPLACE_PLUS_WITH_SPACE; 419 unescape_rules |= net::UnescapeRule::REPLACE_PLUS_WITH_SPACE;
418 420
419 std::string unescaped = net::UnescapeURLComponent(term, unescape_rules); 421 std::string unescaped = net::UnescapeURLComponent(term, unescape_rules);
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after
465 return false; 467 return false;
466 468
467 // Host, port, and path must match. 469 // Host, port, and path must match.
468 if ((url.host() != host_) || 470 if ((url.host() != host_) ||
469 (url.port() != port_) || 471 (url.port() != port_) ||
470 ((url.path() != path_) && 472 ((url.path() != path_) &&
471 (search_term_key_location_ != url::Parsed::PATH))) { 473 (search_term_key_location_ != url::Parsed::PATH))) {
472 return false; 474 return false;
473 } 475 }
474 476
475 std::string source; 477 base::StringPiece source;
476 url::Component position; 478 url::Component position;
477 479
478 if (search_term_key_location_ == url::Parsed::PATH) { 480 if (search_term_key_location_ == url::Parsed::PATH) {
479 source = url.path(); 481 source = url.path_piece();
480 482
481 // Characters in the path before and after search terms must match. 483 // If the path does not contain the expected prefix and suffix, then this is
482 if (source.length() < path_.length()) 484 // not a match.
485 if (source.size() < (search_term_value_prefix_.size() +
486 search_term_value_suffix_.size()) ||
487 !source.starts_with(search_term_value_prefix_) ||
488 !source.ends_with(search_term_value_suffix_))
483 return false; 489 return false;
484 position.begin = search_term_position_in_path_; 490 position =
485 position.len = source.length() - path_.length(); 491 url::MakeRange(search_term_value_prefix_.size(),
486 if (source.substr(0, position.begin) + source.substr(position.end()) != 492 source.length() - search_term_value_suffix_.size());
487 path_)
488 return false;
489 } else { 493 } else {
490 DCHECK(search_term_key_location_ == url::Parsed::QUERY || 494 DCHECK(search_term_key_location_ == url::Parsed::QUERY ||
491 search_term_key_location_ == url::Parsed::REF); 495 search_term_key_location_ == url::Parsed::REF);
492 source = (search_term_key_location_ == url::Parsed::QUERY) ? 496 source = (search_term_key_location_ == url::Parsed::QUERY)
493 url.query() : url.ref(); 497 ? url.query_piece()
498 : url.ref_piece();
494 499
495 url::Component query, key, value; 500 url::Component query, key, value;
496 query.len = static_cast<int>(source.size()); 501 query.len = static_cast<int>(source.size());
497 bool key_found = false; 502 bool key_found = false;
498 while (url::ExtractQueryKeyValue(source.c_str(), &query, &key, &value)) { 503 while (url::ExtractQueryKeyValue(source.data(), &query, &key, &value)) {
499 if (key.is_nonempty()) { 504 if (key.is_nonempty()) {
500 if (source.substr(key.begin, key.len) == search_term_key_) { 505 if (source.substr(key.begin, key.len) == search_term_key_) {
501 // Fail if search term key is found twice. 506 // Fail if search term key is found twice.
502 if (key_found) 507 if (key_found)
503 return false; 508 return false;
504 509
505 // If the query parameter does not contain the expected prefix and 510 // If the query parameter does not contain the expected prefix and
506 // suffix, then this is not a match. 511 // suffix, then this is not a match.
507 base::StringPiece search_term = 512 base::StringPiece search_term =
508 base::StringPiece(source).substr(value.begin, value.len); 513 base::StringPiece(source).substr(value.begin, value.len);
(...skipping 23 matching lines...) Expand all
532 *search_terms_position = position; 537 *search_terms_position = position;
533 return true; 538 return true;
534 } 539 }
535 540
536 void TemplateURLRef::InvalidateCachedValues() const { 541 void TemplateURLRef::InvalidateCachedValues() const {
537 supports_replacements_ = valid_ = parsed_ = false; 542 supports_replacements_ = valid_ = parsed_ = false;
538 host_.clear(); 543 host_.clear();
539 port_.clear(); 544 port_.clear();
540 path_.clear(); 545 path_.clear();
541 search_term_key_.clear(); 546 search_term_key_.clear();
542 search_term_position_in_path_ = std::string::npos;
543 search_term_key_location_ = url::Parsed::QUERY; 547 search_term_key_location_ = url::Parsed::QUERY;
548 search_term_value_prefix_.clear();
549 search_term_value_suffix_.clear();
544 replacements_.clear(); 550 replacements_.clear();
545 post_params_.clear(); 551 post_params_.clear();
546 } 552 }
547 553
548 bool TemplateURLRef::ParseParameter(size_t start, 554 bool TemplateURLRef::ParseParameter(size_t start,
549 size_t end, 555 size_t end,
550 std::string* url, 556 std::string* url,
551 Replacements* replacements) const { 557 Replacements* replacements) const {
552 DCHECK(start != std::string::npos && 558 DCHECK(start != std::string::npos &&
553 end != std::string::npos && end > start); 559 end != std::string::npos && end > start);
(...skipping 230 matching lines...) Expand 10 before | Expand all | Expand 10 after
784 &url_string, 0, "{google:baseURL}", 790 &url_string, 0, "{google:baseURL}",
785 search_terms_data.GoogleBaseURLValue()); 791 search_terms_data.GoogleBaseURLValue());
786 base::ReplaceSubstringsAfterOffset( 792 base::ReplaceSubstringsAfterOffset(
787 &url_string, 0, "{google:baseSuggestURL}", 793 &url_string, 0, "{google:baseSuggestURL}",
788 search_terms_data.GoogleBaseSuggestURLValue()); 794 search_terms_data.GoogleBaseSuggestURLValue());
789 795
790 GURL url(url_string); 796 GURL url(url_string);
791 if (!url.is_valid()) 797 if (!url.is_valid())
792 return; 798 return;
793 799
794 auto query_result = FindSearchTermsKey(url.query()); 800 SearchTermLocation query_result(url.query_piece(), url::Parsed::QUERY);
795 auto ref_result = FindSearchTermsKey(url.ref()); 801 SearchTermLocation ref_result(url.ref_piece(), url::Parsed::REF);
796 url::Component parameter_position; 802 SearchTermLocation path_result(url.path_piece(), url::Parsed::PATH);
797 const bool in_query = query_result.found(); 803 const bool in_query = query_result.found();
798 const bool in_ref = ref_result.found(); 804 const bool in_ref = ref_result.found();
799 const bool in_path = FindSearchTermsInPath(url.path(), &parameter_position); 805 const bool in_path = path_result.found();
800 if (in_query ? (in_ref || in_path) : (in_ref == in_path)) 806 if (in_query ? (in_ref || in_path) : (in_ref == in_path))
801 return; // No key or multiple keys found. We only handle having one key. 807 return; // No key or multiple keys found. We only handle having one key.
802 808
803 host_ = url.host(); 809 host_ = url.host();
804 port_ = url.port(); 810 port_ = url.port();
805 path_ = url.path();
806 if (in_query) { 811 if (in_query) {
807 search_term_key_ = query_result.key;
808 search_term_key_location_ = url::Parsed::QUERY; 812 search_term_key_location_ = url::Parsed::QUERY;
809 search_term_value_prefix_ = query_result.value_prefix; 813 search_term_key_ = query_result.key();
810 search_term_value_suffix_ = query_result.value_suffix; 814 search_term_value_prefix_ = query_result.value_prefix();
815 search_term_value_suffix_ = query_result.value_suffix();
816 path_ = url.path();
811 } else if (in_ref) { 817 } else if (in_ref) {
812 search_term_key_ = ref_result.key;
813 search_term_key_location_ = url::Parsed::REF; 818 search_term_key_location_ = url::Parsed::REF;
814 search_term_value_prefix_ = ref_result.value_prefix; 819 search_term_key_ = ref_result.key();
815 search_term_value_suffix_ = ref_result.value_suffix; 820 search_term_value_prefix_ = ref_result.value_prefix();
821 search_term_value_suffix_ = ref_result.value_suffix();
822 path_ = url.path();
816 } else { 823 } else {
817 DCHECK(in_path); 824 DCHECK(in_path);
818 DCHECK_GE(parameter_position.begin, 1); // Path must start with '/'.
819 search_term_key_location_ = url::Parsed::PATH; 825 search_term_key_location_ = url::Parsed::PATH;
820 search_term_position_in_path_ = parameter_position.begin; 826 search_term_value_prefix_ = path_result.value_prefix();
821 // Remove the "{searchTerms}" itself from |path_|. 827 search_term_value_suffix_ = path_result.value_suffix();
822 path_.erase(parameter_position.begin, parameter_position.len);
823 } 828 }
824 } 829 }
825 830
826 void TemplateURLRef::HandleReplacement(const std::string& name, 831 void TemplateURLRef::HandleReplacement(const std::string& name,
827 const std::string& value, 832 const std::string& value,
828 const Replacement& replacement, 833 const Replacement& replacement,
829 std::string* url) const { 834 std::string* url) const {
830 size_t pos = replacement.index; 835 size_t pos = replacement.index;
831 if (replacement.is_post_param) { 836 if (replacement.is_post_param) {
832 DCHECK_LT(pos, post_params_.size()); 837 DCHECK_LT(pos, post_params_.size());
(...skipping 689 matching lines...) Expand 10 before | Expand all | Expand 10 after
1522 // patterns. This means that given patterns 1527 // patterns. This means that given patterns
1523 // [ "http://foo/#q={searchTerms}", "http://foo/?q={searchTerms}" ], 1528 // [ "http://foo/#q={searchTerms}", "http://foo/?q={searchTerms}" ],
1524 // calling ExtractSearchTermsFromURL() on "http://foo/?q=bar#q=' would 1529 // calling ExtractSearchTermsFromURL() on "http://foo/?q=bar#q=' would
1525 // return false. This is important for at least Google, where such URLs 1530 // return false. This is important for at least Google, where such URLs
1526 // are invalid. 1531 // are invalid.
1527 return !search_terms->empty(); 1532 return !search_terms->empty();
1528 } 1533 }
1529 } 1534 }
1530 return false; 1535 return false;
1531 } 1536 }
OLDNEW
« no previous file with comments | « components/search_engines/template_url.h ('k') | components/search_engines/template_url_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698