Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(344)

Side by Side Diff: components/search_engines/template_url.cc

Issue 1978553002: Refactor extracting search terms from Template URL. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@support-prefix-path-matching-when-extracting-terms-from-template-url
Patch Set: Created 4 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2014 The Chromium Authors. All rights reserved. 1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/search_engines/template_url.h" 5 #include "components/search_engines/template_url.h"
6 6
7 #include <string> 7 #include <string>
8 #include <vector> 8 #include <vector>
9 9
10 #include "base/command_line.h" 10 #include "base/command_line.h"
(...skipping 123 matching lines...) Expand 10 before | Expand all | Expand 10 after
134 kGoogleUnescapedSearchTermsParameterFull, 134 kGoogleUnescapedSearchTermsParameterFull,
135 &result.value_prefix, &result.value_suffix)) { 135 &result.value_prefix, &result.value_suffix)) {
136 result.key = params.substr(key.begin, key.len); 136 result.key = params.substr(key.begin, key.len);
137 break; 137 break;
138 } 138 }
139 } 139 }
140 } 140 }
141 return result; 141 return result;
142 } 142 }
143 143
144 struct SearchTermsInPathResult {
145 std::string value_prefix;
146 std::string value_suffix;
147 bool search_terms_found;
148 SearchTermsInPathResult() : search_terms_found(false) {}
149 bool found() const { return search_terms_found; }
150 };
151
144 // Extract the position of the search terms' parameter in the URL path. 152 // Extract the position of the search terms' parameter in the URL path.
145 bool FindSearchTermsInPath(const std::string& path, 153 SearchTermsInPathResult FindSearchTermsInPath(const base::StringPiece& path) {
Vitaly Baranov 2016/05/12 15:24:12 Changed for consistency with https://codereview.ch
146 url::Component* parameter_position) { 154 DCHECK(path.starts_with("/"));
147 DCHECK(parameter_position); 155 SearchTermsInPathResult result;
148 parameter_position->reset(); 156 const base::StringPiece search_terms_parameter(
149 const size_t begin = path.find(kSearchTermsParameterFullEscaped); 157 kSearchTermsParameterFullEscaped);
150 if (begin == std::string::npos) 158 const size_t search_terms_pos = path.find(search_terms_parameter);
151 return false; 159 result.search_terms_found = (search_terms_pos != std::string::npos);
152 parameter_position->begin = begin; 160 if (result.search_terms_found) {
153 parameter_position->len = arraysize(kSearchTermsParameterFullEscaped) - 1; 161 path.substr(0, search_terms_pos).CopyToString(&result.value_prefix);
154 return true; 162 path.substr(search_terms_pos + search_terms_parameter.length()).
163 CopyToString(&result.value_suffix);
164 } else {
165 path.CopyToString(&result.value_prefix);
166 }
167 DCHECK(base::StartsWith(result.value_prefix, "/",
168 base::CompareCase::SENSITIVE));
169 return result;
155 } 170 }
156 171
157 bool IsTemplateParameterString(const std::string& param) { 172 bool IsTemplateParameterString(const std::string& param) {
158 return (param.length() > 2) && (*(param.begin()) == kStartParameter) && 173 return (param.length() > 2) && (*(param.begin()) == kStartParameter) &&
159 (*(param.rbegin()) == kEndParameter); 174 (*(param.rbegin()) == kEndParameter);
160 } 175 }
161 176
162 } // namespace 177 } // namespace
163 178
164 179
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after
233 248
234 // TemplateURLRef ------------------------------------------------------------- 249 // TemplateURLRef -------------------------------------------------------------
235 250
236 TemplateURLRef::TemplateURLRef(const TemplateURL* owner, Type type) 251 TemplateURLRef::TemplateURLRef(const TemplateURL* owner, Type type)
237 : owner_(owner), 252 : owner_(owner),
238 type_(type), 253 type_(type),
239 index_in_owner_(0), 254 index_in_owner_(0),
240 parsed_(false), 255 parsed_(false),
241 valid_(false), 256 valid_(false),
242 supports_replacements_(false), 257 supports_replacements_(false),
243 search_term_position_in_path_(std::string::npos),
244 search_term_key_location_(url::Parsed::QUERY), 258 search_term_key_location_(url::Parsed::QUERY),
245 prepopulated_(false) { 259 prepopulated_(false) {
246 DCHECK(owner_); 260 DCHECK(owner_);
247 DCHECK_NE(INDEXED, type_); 261 DCHECK_NE(INDEXED, type_);
248 } 262 }
249 263
250 TemplateURLRef::TemplateURLRef(const TemplateURL* owner, size_t index_in_owner) 264 TemplateURLRef::TemplateURLRef(const TemplateURL* owner, size_t index_in_owner)
251 : owner_(owner), 265 : owner_(owner),
252 type_(INDEXED), 266 type_(INDEXED),
253 index_in_owner_(index_in_owner), 267 index_in_owner_(index_in_owner),
254 parsed_(false), 268 parsed_(false),
255 valid_(false), 269 valid_(false),
256 supports_replacements_(false), 270 supports_replacements_(false),
257 search_term_position_in_path_(std::string::npos),
258 search_term_key_location_(url::Parsed::QUERY), 271 search_term_key_location_(url::Parsed::QUERY),
259 prepopulated_(false) { 272 prepopulated_(false) {
260 DCHECK(owner_); 273 DCHECK(owner_);
261 DCHECK_LT(index_in_owner_, owner_->alternate_urls().size()); 274 DCHECK_LT(index_in_owner_, owner_->alternate_urls().size());
262 } 275 }
263 276
264 TemplateURLRef::~TemplateURLRef() { 277 TemplateURLRef::~TemplateURLRef() {
265 } 278 }
266 279
267 TemplateURLRef::TemplateURLRef(const TemplateURLRef& source) = default; 280 TemplateURLRef::TemplateURLRef(const TemplateURLRef& source) = default;
(...skipping 146 matching lines...) Expand 10 before | Expand all | Expand 10 after
414 ParseIfNecessary(search_terms_data); 427 ParseIfNecessary(search_terms_data);
415 return path_; 428 return path_;
416 } 429 }
417 430
418 const std::string& TemplateURLRef::GetSearchTermKey( 431 const std::string& TemplateURLRef::GetSearchTermKey(
419 const SearchTermsData& search_terms_data) const { 432 const SearchTermsData& search_terms_data) const {
420 ParseIfNecessary(search_terms_data); 433 ParseIfNecessary(search_terms_data);
421 return search_term_key_; 434 return search_term_key_;
422 } 435 }
423 436
424 size_t TemplateURLRef::GetSearchTermPositionInPath(
425 const SearchTermsData& search_terms_data) const {
426 ParseIfNecessary(search_terms_data);
427 return search_term_position_in_path_;
428 }
429
430 url::Parsed::ComponentType TemplateURLRef::GetSearchTermKeyLocation( 437 url::Parsed::ComponentType TemplateURLRef::GetSearchTermKeyLocation(
431 const SearchTermsData& search_terms_data) const { 438 const SearchTermsData& search_terms_data) const {
432 ParseIfNecessary(search_terms_data); 439 ParseIfNecessary(search_terms_data);
433 return search_term_key_location_; 440 return search_term_key_location_;
434 } 441 }
435 442
443 const std::string& TemplateURLRef::GetSearchTermValuePrefix(
444 const SearchTermsData& search_terms_data) const {
445 ParseIfNecessary(search_terms_data);
446 return search_term_value_prefix_;
447 }
448
449 const std::string& TemplateURLRef::GetSearchTermValueSuffix(
450 const SearchTermsData& search_terms_data) const {
451 ParseIfNecessary(search_terms_data);
452 return search_term_value_suffix_;
453 }
Vitaly Baranov 2016/05/12 15:24:12 With these two function we can check the prefix an
454
436 base::string16 TemplateURLRef::SearchTermToString16( 455 base::string16 TemplateURLRef::SearchTermToString16(
437 const std::string& term) const { 456 const std::string& term) const {
438 const std::vector<std::string>& encodings = owner_->input_encodings(); 457 const std::vector<std::string>& encodings = owner_->input_encodings();
439 base::string16 result; 458 base::string16 result;
440 459
441 net::UnescapeRule::Type unescape_rules = 460 net::UnescapeRule::Type unescape_rules =
442 net::UnescapeRule::SPACES | net::UnescapeRule::PATH_SEPARATORS | 461 net::UnescapeRule::SPACES | net::UnescapeRule::PATH_SEPARATORS |
443 net::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS; 462 net::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS;
444 if (search_term_key_location_ != url::Parsed::PATH) 463 if (search_term_key_location_ != url::Parsed::PATH)
445 unescape_rules |= net::UnescapeRule::REPLACE_PLUS_WITH_SPACE; 464 unescape_rules |= net::UnescapeRule::REPLACE_PLUS_WITH_SPACE;
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after
488 ParseIfNecessary(search_terms_data); 507 ParseIfNecessary(search_terms_data);
489 508
490 // We need a search term in the template URL to extract something. 509 // We need a search term in the template URL to extract something.
491 if (search_term_key_.empty() && 510 if (search_term_key_.empty() &&
492 (search_term_key_location_ != url::Parsed::PATH)) 511 (search_term_key_location_ != url::Parsed::PATH))
493 return false; 512 return false;
494 513
495 // Host, port, and path must match. 514 // Host, port, and path must match.
496 if ((url.host() != host_) || 515 if ((url.host() != host_) ||
497 (url.port() != port_) || 516 (url.port() != port_) ||
498 ((url.path() != path_) && 517 ((search_term_key_location_ != url::Parsed::PATH) &&
499 (search_term_key_location_ != url::Parsed::PATH))) { 518 (url.path() != path_))) {
Vitaly Baranov 2016/05/12 15:24:12 Optimization.
500 return false; 519 return false;
501 } 520 }
502 521
503 std::string source; 522 std::string source;
504 url::Component position; 523 url::Component position;
505 524
506 if (search_term_key_location_ == url::Parsed::PATH) { 525 if (search_term_key_location_ == url::Parsed::PATH) {
507 source = url.path(); 526 source = url.path();
508 527 if (!ExtractSearchTermsFromPath(source, &position))
509 // Characters in the path before and after search terms must match.
510 if (source.length() < path_.length())
511 return false;
512 position.begin = search_term_position_in_path_;
513 position.len = source.length() - path_.length();
514 if (source.substr(0, position.begin) + source.substr(position.end()) !=
515 path_)
516 return false; 528 return false;
517 } else { 529 } else {
518 DCHECK(search_term_key_location_ == url::Parsed::QUERY || 530 DCHECK(search_term_key_location_ == url::Parsed::QUERY ||
519 search_term_key_location_ == url::Parsed::REF); 531 search_term_key_location_ == url::Parsed::REF);
520 source = (search_term_key_location_ == url::Parsed::QUERY) ? 532 source = (search_term_key_location_ == url::Parsed::QUERY) ?
521 url.query() : url.ref(); 533 url.query() : url.ref();
522 534
523 url::Component query, key, value; 535 url::Component query, key, value;
524 query.len = static_cast<int>(source.size()); 536 query.len = static_cast<int>(source.size());
525 bool key_found = false; 537 bool key_found = false;
(...skipping 26 matching lines...) Expand all
552 // Extract the search term. 564 // Extract the search term.
553 *search_terms = 565 *search_terms =
554 SearchTermToString16(source.substr(position.begin, position.len)); 566 SearchTermToString16(source.substr(position.begin, position.len));
555 if (search_terms_component) 567 if (search_terms_component)
556 *search_terms_component = search_term_key_location_; 568 *search_terms_component = search_term_key_location_;
557 if (search_terms_position) 569 if (search_terms_position)
558 *search_terms_position = position; 570 *search_terms_position = position;
559 return true; 571 return true;
560 } 572 }
561 573
574 bool TemplateURLRef::ExtractSearchTermsFromPath(
575 const std::string& path,
576 url::Component* search_terms_position) const {
577 if (search_term_key_location_ != url::Parsed::PATH)
578 return false;
579 if (path.length() < search_term_value_prefix_.length() +
580 search_term_value_suffix_.length())
581 return false;
582 if (!base::StartsWith(path, search_term_value_prefix_,
583 base::CompareCase::SENSITIVE))
584 return false;
585 const size_t search_terms_pos = search_term_value_prefix_.length();
586 size_t search_terms_end = std::string::npos;
587 if (!base::EndsWith(path, search_term_value_suffix_,
588 base::CompareCase::SENSITIVE))
589 return false;
590 search_terms_end = path.length() - search_term_value_suffix_.length();
591 DCHECK_NE(std::string::npos, search_terms_end);
592 *search_terms_position = url::MakeRange(search_terms_pos, search_terms_end);
593 return true;
594 }
595
562 void TemplateURLRef::InvalidateCachedValues() const { 596 void TemplateURLRef::InvalidateCachedValues() const {
563 supports_replacements_ = valid_ = parsed_ = false; 597 supports_replacements_ = valid_ = parsed_ = false;
564 host_.clear(); 598 host_.clear();
565 port_.clear(); 599 port_.clear();
566 path_.clear(); 600 path_.clear();
567 search_term_key_.clear(); 601 search_term_key_.clear();
568 search_term_position_in_path_ = std::string::npos;
569 search_term_key_location_ = url::Parsed::QUERY; 602 search_term_key_location_ = url::Parsed::QUERY;
603 search_term_value_prefix_.clear();
604 search_term_value_suffix_.clear();
Vitaly Baranov 2016/05/12 15:24:12 Minor fix.
570 replacements_.clear(); 605 replacements_.clear();
571 post_params_.clear(); 606 post_params_.clear();
572 } 607 }
573 608
574 bool TemplateURLRef::ParseParameter(size_t start, 609 bool TemplateURLRef::ParseParameter(size_t start,
575 size_t end, 610 size_t end,
576 std::string* url, 611 std::string* url,
577 Replacements* replacements) const { 612 Replacements* replacements) const {
578 DCHECK(start != std::string::npos && 613 DCHECK(start != std::string::npos &&
579 end != std::string::npos && end > start); 614 end != std::string::npos && end > start);
(...skipping 232 matching lines...) Expand 10 before | Expand all | Expand 10 after
812 base::ReplaceSubstringsAfterOffset( 847 base::ReplaceSubstringsAfterOffset(
813 &url_string, 0, "{google:baseSuggestURL}", 848 &url_string, 0, "{google:baseSuggestURL}",
814 search_terms_data.GoogleBaseSuggestURLValue()); 849 search_terms_data.GoogleBaseSuggestURLValue());
815 850
816 GURL url(url_string); 851 GURL url(url_string);
817 if (!url.is_valid()) 852 if (!url.is_valid())
818 return; 853 return;
819 854
820 auto query_result = FindSearchTermsKey(url.query()); 855 auto query_result = FindSearchTermsKey(url.query());
821 auto ref_result = FindSearchTermsKey(url.ref()); 856 auto ref_result = FindSearchTermsKey(url.ref());
822 url::Component parameter_position; 857 auto path_result = FindSearchTermsInPath(url.path());
823 const bool in_query = query_result.found(); 858 const bool in_query = query_result.found();
824 const bool in_ref = ref_result.found(); 859 const bool in_ref = ref_result.found();
825 const bool in_path = FindSearchTermsInPath(url.path(), &parameter_position); 860 const bool in_path = path_result.found();
826 if (in_query ? (in_ref || in_path) : (in_ref == in_path)) 861 if (in_query ? (in_ref || in_path) : (in_ref == in_path))
827 return; // No key or multiple keys found. We only handle having one key. 862 return; // No key or multiple keys found. We only handle having one key.
828 863
829 host_ = url.host(); 864 host_ = url.host();
830 port_ = url.port(); 865 port_ = url.port();
831 path_ = url.path(); 866 path_ = path_result.value_prefix + path_result.value_suffix;
832 if (in_query) { 867 if (in_query) {
833 search_term_key_ = query_result.key; 868 search_term_key_ = query_result.key;
834 search_term_key_location_ = url::Parsed::QUERY; 869 search_term_key_location_ = url::Parsed::QUERY;
835 search_term_value_prefix_ = query_result.value_prefix; 870 search_term_value_prefix_ = query_result.value_prefix;
836 search_term_value_suffix_ = query_result.value_suffix; 871 search_term_value_suffix_ = query_result.value_suffix;
837 } else if (in_ref) { 872 } else if (in_ref) {
838 search_term_key_ = ref_result.key; 873 search_term_key_ = ref_result.key;
839 search_term_key_location_ = url::Parsed::REF; 874 search_term_key_location_ = url::Parsed::REF;
840 search_term_value_prefix_ = ref_result.value_prefix; 875 search_term_value_prefix_ = ref_result.value_prefix;
841 search_term_value_suffix_ = ref_result.value_suffix; 876 search_term_value_suffix_ = ref_result.value_suffix;
842 } else { 877 } else {
843 DCHECK(in_path); 878 DCHECK(in_path);
844 DCHECK_GE(parameter_position.begin, 1); // Path must start with '/'.
845 search_term_key_location_ = url::Parsed::PATH; 879 search_term_key_location_ = url::Parsed::PATH;
846 search_term_position_in_path_ = parameter_position.begin; 880 search_term_value_prefix_ = path_result.value_prefix;
847 // Remove the "{searchTerms}" itself from |path_|. 881 search_term_value_suffix_ = path_result.value_suffix;
848 path_.erase(parameter_position.begin, parameter_position.len);
849 } 882 }
850 } 883 }
851 884
852 void TemplateURLRef::HandleReplacement(const std::string& name, 885 void TemplateURLRef::HandleReplacement(const std::string& name,
853 const std::string& value, 886 const std::string& value,
854 const Replacement& replacement, 887 const Replacement& replacement,
855 std::string* url) const { 888 std::string* url) const {
856 size_t pos = replacement.index; 889 size_t pos = replacement.index;
857 if (replacement.is_post_param) { 890 if (replacement.is_post_param) {
858 DCHECK_LT(pos, post_params_.size()); 891 DCHECK_LT(pos, post_params_.size());
(...skipping 676 matching lines...) Expand 10 before | Expand all | Expand 10 after
1535 // patterns. This means that given patterns 1568 // patterns. This means that given patterns
1536 // [ "http://foo/#q={searchTerms}", "http://foo/?q={searchTerms}" ], 1569 // [ "http://foo/#q={searchTerms}", "http://foo/?q={searchTerms}" ],
1537 // calling ExtractSearchTermsFromURL() on "http://foo/?q=bar#q=' would 1570 // calling ExtractSearchTermsFromURL() on "http://foo/?q=bar#q=' would
1538 // return false. This is important for at least Google, where such URLs 1571 // return false. This is important for at least Google, where such URLs
1539 // are invalid. 1572 // are invalid.
1540 return !search_terms->empty(); 1573 return !search_terms->empty();
1541 } 1574 }
1542 } 1575 }
1543 return false; 1576 return false;
1544 } 1577 }
OLDNEW
« no previous file with comments | « components/search_engines/template_url.h ('k') | components/search_engines/template_url_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698