Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(5404)

Unified Diff: components/search_engines/template_url.cc

Issue 1968303002: Support inexact path matching when extracting terms from Template URL. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 4 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « components/search_engines/template_url.h ('k') | components/search_engines/template_url_unittest.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: components/search_engines/template_url.cc
diff --git a/components/search_engines/template_url.cc b/components/search_engines/template_url.cc
index 2a1cb2109533d7cead63dc5a25e9faf54a088da7..8afd241a0e66c0516e0f7b46e3b448b2d0d5aaa8 100644
--- a/components/search_engines/template_url.cc
+++ b/components/search_engines/template_url.cc
@@ -59,6 +59,11 @@ const char kDisplaySearchTerms[] = "%s";
// Display value for kGoogleUnescapedSearchTermsParameter.
const char kDisplayUnescapedSearchTerms[] = "%S";
+// Text "{google:ignorePathEnding}" at the end of template path orders to
+// not compare rest of an URL's path while extracting search terms from the URL.
+const char kGoogleIgnorePathEndingFullEscaped[] =
+ "%7Bgoogle:ignorePathEnding%7D";
+
// Used if the count parameter is not optional. Indicates we want 10 search
// results.
const char kDefaultCount[] = "10";
@@ -141,17 +146,71 @@ SearchTermsKeyResult FindSearchTermsKey(const std::string& params) {
return result;
}
+struct SearchTermsInPathResult {
+ bool search_terms_found;
+ std::string value_prefix;
+ std::string value_suffix;
+ bool ignore_ending;
+ SearchTermsInPathResult() : search_terms_found(false), ignore_ending(false) {}
+ bool found() const { return search_terms_found; }
+};
+
// Extract the position of the search terms' parameter in the URL path.
-bool FindSearchTermsInPath(const std::string& path,
- url::Component* parameter_position) {
- DCHECK(parameter_position);
- parameter_position->reset();
- const size_t begin = path.find(kSearchTermsParameterFullEscaped);
- if (begin == std::string::npos)
- return false;
- parameter_position->begin = begin;
- parameter_position->len = arraysize(kSearchTermsParameterFullEscaped) - 1;
- return true;
+SearchTermsInPathResult FindSearchTermsInPath(const base::StringPiece& path) {
+ DCHECK(path.starts_with("/"));
+
+ const base::StringPiece search_terms_parameter(
+ kSearchTermsParameterFullEscaped);
+ const size_t search_terms_pos = path.find(search_terms_parameter);
+ const size_t search_terms_end = (search_terms_pos == std::string::npos) ?
+ std::string::npos : (search_terms_pos + search_terms_parameter.length());
+
+ const base::StringPiece ignore_ending_parameter(
+ kGoogleIgnorePathEndingFullEscaped);
+ const size_t ignore_ending_pos = path.find(ignore_ending_parameter);
+
+ bool search_terms_found = (search_terms_pos != std::string::npos);
+ bool ignore_ending = (ignore_ending_pos != std::string::npos);
+ size_t path_end = path.length();
+
+ if (search_terms_found && ignore_ending) {
+ if (ignore_ending_pos < search_terms_pos) {
+ // "{google:ignorePathEnding}" occurs before "{searchTerms}".
+ // Ignore "{searchTerms}".
+ search_terms_found = false;
+ } else if (search_terms_end == ignore_ending_pos) {
+ // No characters occur between "{searchTerms}" and
+ // "{google:ignorePathEnding}". Ignore "{google:ignorePathEnding}".
+ ignore_ending = false;
+ path_end = ignore_ending_pos;
+ }
+ }
+
+ base::StringPiece prefix, suffix;
+ if (search_terms_found) {
+ prefix = path.substr(0, search_terms_pos);
+ if (ignore_ending)
+ suffix = path.substr(search_terms_end,
+ ignore_ending_pos - search_terms_end);
+ else
+ suffix = path.substr(search_terms_end, path_end - search_terms_end);
+ } else {
+ if (ignore_ending)
+ prefix = path.substr(0, ignore_ending_pos);
+ else
+ prefix = path.substr(0, path_end);
+ }
+
+ SearchTermsInPathResult result;
+ result.search_terms_found = search_terms_found;
+ result.ignore_ending = ignore_ending;
+ result.value_prefix = prefix.as_string();
+ result.value_suffix = suffix.as_string();
+ DCHECK(base::StartsWith(result.value_prefix, "/",
+ base::CompareCase::SENSITIVE));
+ DCHECK(!result.search_terms_found || !result.ignore_ending ||
+ !result.value_suffix.empty());
+ return result;
}
bool IsTemplateParameterString(const std::string& param) {
@@ -240,7 +299,7 @@ TemplateURLRef::TemplateURLRef(const TemplateURL* owner, Type type)
parsed_(false),
valid_(false),
supports_replacements_(false),
- search_term_position_in_path_(std::string::npos),
+ ignore_path_ending_(false),
search_term_key_location_(url::Parsed::QUERY),
prepopulated_(false) {
DCHECK(owner_);
@@ -254,7 +313,7 @@ TemplateURLRef::TemplateURLRef(const TemplateURL* owner, size_t index_in_owner)
parsed_(false),
valid_(false),
supports_replacements_(false),
- search_term_position_in_path_(std::string::npos),
+ ignore_path_ending_(false),
search_term_key_location_(url::Parsed::QUERY),
prepopulated_(false) {
DCHECK(owner_);
@@ -415,16 +474,16 @@ const std::string& TemplateURLRef::GetPath(
return path_;
}
-const std::string& TemplateURLRef::GetSearchTermKey(
+bool TemplateURLRef::GetIgnorePathEnding(
const SearchTermsData& search_terms_data) const {
ParseIfNecessary(search_terms_data);
- return search_term_key_;
+ return ignore_path_ending_;
}
-size_t TemplateURLRef::GetSearchTermPositionInPath(
+const std::string& TemplateURLRef::GetSearchTermKey(
const SearchTermsData& search_terms_data) const {
ParseIfNecessary(search_terms_data);
- return search_term_position_in_path_;
+ return search_term_key_;
}
url::Parsed::ComponentType TemplateURLRef::GetSearchTermKeyLocation(
@@ -433,6 +492,18 @@ url::Parsed::ComponentType TemplateURLRef::GetSearchTermKeyLocation(
return search_term_key_location_;
}
+const std::string& TemplateURLRef::GetSearchTermValuePrefix(
+ const SearchTermsData& search_terms_data) const {
+ ParseIfNecessary(search_terms_data);
+ return search_term_value_prefix_;
+}
+
+const std::string& TemplateURLRef::GetSearchTermValueSuffix(
+ const SearchTermsData& search_terms_data) const {
+ ParseIfNecessary(search_terms_data);
+ return search_term_value_suffix_;
+}
+
base::string16 TemplateURLRef::SearchTermToString16(
const std::string& term) const {
const std::vector<std::string>& encodings = owner_->input_encodings();
@@ -495,8 +566,8 @@ bool TemplateURLRef::ExtractSearchTermsFromURL(
// Host, port, and path must match.
if ((url.host() != host_) ||
(url.port() != port_) ||
- ((url.path() != path_) &&
- (search_term_key_location_ != url::Parsed::PATH))) {
+ ((search_term_key_location_ != url::Parsed::PATH) &&
+ !MatchPath(url.path()))) {
return false;
}
@@ -505,14 +576,7 @@ bool TemplateURLRef::ExtractSearchTermsFromURL(
if (search_term_key_location_ == url::Parsed::PATH) {
source = url.path();
-
- // Characters in the path before and after search terms must match.
- if (source.length() < path_.length())
- return false;
- position.begin = search_term_position_in_path_;
- position.len = source.length() - path_.length();
- if (source.substr(0, position.begin) + source.substr(position.end()) !=
- path_)
+ if (!MatchPathWithSearchTerms(source, &position))
return false;
} else {
DCHECK(search_term_key_location_ == url::Parsed::QUERY ||
@@ -559,14 +623,54 @@ bool TemplateURLRef::ExtractSearchTermsFromURL(
return true;
}
+bool TemplateURLRef::MatchPath(const std::string& path) const {
+ if (search_term_key_location_ == url::Parsed::PATH)
+ return false;
+ if (ignore_path_ending_)
+ return base::StartsWith(path, path_, base::CompareCase::SENSITIVE);
+ else
+ return (path == path_);
+}
+
+bool TemplateURLRef::MatchPathWithSearchTerms(
+ const std::string& path,
+ url::Component* search_terms_position) const {
+ if (search_term_key_location_ != url::Parsed::PATH)
+ return false;
+ if (path.length() < search_term_value_prefix_.length() +
+ search_term_value_suffix_.length())
+ return false;
+ if (!base::StartsWith(path, search_term_value_prefix_,
+ base::CompareCase::SENSITIVE))
+ return false;
+ const size_t search_terms_pos = search_term_value_prefix_.length();
+ size_t search_terms_end = std::string::npos;
+ if (ignore_path_ending_) {
+ search_terms_end = path.find(search_term_value_suffix_,
+ search_terms_pos);
+ if (search_terms_end == std::string::npos)
+ return false;
+ } else {
+ if (!base::EndsWith(path, search_term_value_suffix_,
+ base::CompareCase::SENSITIVE))
+ return false;
+ search_terms_end = path.length() - search_term_value_suffix_.length();
+ }
+ DCHECK_NE(std::string::npos, search_terms_end);
+ *search_terms_position = url::MakeRange(search_terms_pos, search_terms_end);
+ return true;
+}
+
void TemplateURLRef::InvalidateCachedValues() const {
supports_replacements_ = valid_ = parsed_ = false;
host_.clear();
port_.clear();
path_.clear();
+ ignore_path_ending_ = false;
search_term_key_.clear();
- search_term_position_in_path_ = std::string::npos;
search_term_key_location_ = url::Parsed::QUERY;
+ search_term_value_prefix_.clear();
+ search_term_value_suffix_.clear();
replacements_.clear();
post_params_.clear();
}
@@ -819,16 +923,17 @@ void TemplateURLRef::ParseHostAndSearchTermKey(
auto query_result = FindSearchTermsKey(url.query());
auto ref_result = FindSearchTermsKey(url.ref());
- url::Component parameter_position;
+ auto path_result = FindSearchTermsInPath(url.path());
const bool in_query = query_result.found();
const bool in_ref = ref_result.found();
- const bool in_path = FindSearchTermsInPath(url.path(), &parameter_position);
+ const bool in_path = path_result.found();
if (in_query ? (in_ref || in_path) : (in_ref == in_path))
return; // No key or multiple keys found. We only handle having one key.
host_ = url.host();
port_ = url.port();
- path_ = url.path();
+ path_ = path_result.value_prefix + path_result.value_suffix;
+ ignore_path_ending_ = path_result.ignore_ending;
if (in_query) {
search_term_key_ = query_result.key;
search_term_key_location_ = url::Parsed::QUERY;
@@ -841,11 +946,9 @@ void TemplateURLRef::ParseHostAndSearchTermKey(
search_term_value_suffix_ = ref_result.value_suffix;
} else {
DCHECK(in_path);
- DCHECK_GE(parameter_position.begin, 1); // Path must start with '/'.
search_term_key_location_ = url::Parsed::PATH;
- search_term_position_in_path_ = parameter_position.begin;
- // Remove the "{searchTerms}" itself from |path_|.
- path_.erase(parameter_position.begin, parameter_position.len);
+ search_term_value_prefix_ = path_result.value_prefix;
+ search_term_value_suffix_ = path_result.value_suffix;
}
}
« no previous file with comments | « components/search_engines/template_url.h ('k') | components/search_engines/template_url_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698