Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(62)

Unified Diff: chrome/browser/search_engines/template_url.cc

Issue 10908226: Introduces a search term extraction mechanism working for arbitrary search providers. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Removed need for espv=1 for search term extraction. Created 8 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: chrome/browser/search_engines/template_url.cc
diff --git a/chrome/browser/search_engines/template_url.cc b/chrome/browser/search_engines/template_url.cc
index 1612f5a408da560bf1d264b4a67941daae9efa9f..9f7d9adc5b51889df700152575df50f6c8a525c6 100644
--- a/chrome/browser/search_engines/template_url.cc
+++ b/chrome/browser/search_engines/template_url.cc
@@ -123,11 +123,27 @@ TemplateURLRef::SearchTermsArgs::SearchTermsArgs(const string16& search_terms)
TemplateURLRef::TemplateURLRef(TemplateURL* owner, Type type)
: owner_(owner),
type_(type),
+ index_in_owner_(-1),
parsed_(false),
valid_(false),
supports_replacements_(false),
+ search_term_key_location_(url_parse::Parsed::QUERY),
prepopulated_(false) {
DCHECK(owner_);
+ DCHECK(type_ != INDEXED);
Peter Kasting 2012/10/02 21:47:59 Nit: DCHECK_NE(INDEXED, type);
beaudoin 2012/10/03 22:46:52 Kept type_ for uniformity. Please explain if there
Peter Kasting 2012/10/03 22:59:33 Nope, it was a typo, I intended |type_| :)
+}
+
+TemplateURLRef::TemplateURLRef(TemplateURL* owner, size_t index_in_owner)
+ : owner_(owner),
+ type_(INDEXED),
+ index_in_owner_(index_in_owner),
+ parsed_(false),
+ valid_(false),
+ supports_replacements_(false),
+ search_term_key_location_(url_parse::Parsed::QUERY),
+ prepopulated_(false) {
+ DCHECK(owner_);
+ DCHECK(index_in_owner_ >= 0L && index_in_owner_ < owner_->URLCount());
Peter Kasting 2012/10/02 21:47:59 Nit: The >= 0 check is nonsensical since size_t is
beaudoin 2012/10/03 22:46:52 Done.
}
TemplateURLRef::~TemplateURLRef() {
@@ -138,6 +154,7 @@ std::string TemplateURLRef::GetURL() const {
case SEARCH: return owner_->url();
case SUGGEST: return owner_->suggestions_url();
case INSTANT: return owner_->instant_url();
+ case INDEXED: return owner_->GetURL(index_in_owner_);
default: NOTREACHED(); return std::string(); // NOLINT
}
}
@@ -404,6 +421,66 @@ bool TemplateURLRef::HasGoogleBaseURLs() const {
return false;
}
+
Peter Kasting 2012/10/02 21:47:59 Nit: Unnecessary blank line
beaudoin 2012/10/03 22:46:52 Done.
+bool TemplateURLRef::ExtractSearchTermsFromURL(
+ const GURL& url, string16* search_terms) const {
Peter Kasting 2012/10/02 21:47:59 Nit: One arg per line, first arg on first line: b
beaudoin 2012/10/03 22:46:52 Done.
+ DCHECK(search_terms);
+ search_terms->clear();
+
+ ParseIfNecessary();
+
+ // We need a search term in the template URL to extract something.
+ if (search_term_key_.empty())
+ return false;
+
+ // TODO(beaudoin): Support {Anything} parameter to act as a path wildcard.
+ // See crbug/139176
Peter Kasting 2012/10/02 21:47:59 Let's not plan to do this. Just remove this TODO.
beaudoin 2012/10/03 22:46:52 Done.
+
+ // Fill-in the replacements. We don't care about search terms in the pattern,
+ // so we use the empty string.
+ GURL pattern(ReplaceSearchTerms(SearchTermsArgs(string16())));
+ // Scheme, host, path and port must match.
Peter Kasting 2012/10/02 21:47:59 Is it feasible (maybe as a followup change) to sup
beaudoin 2012/10/03 22:46:52 Entered a TODO and a bug for this: crbug.com/15379
+ if (!url.SchemeIs(pattern.scheme().c_str()) ||
+ url.port() != pattern.port() ||
+ url.host() != host_ ||
+ url.path() != path_) {
+ return false;
+ }
+
+ // Parameter must be present either in the query or the ref.
+ std::string params;
Peter Kasting 2012/10/02 21:47:59 Nit: Simpler: const std::string& params(
beaudoin 2012/10/03 22:46:52 Done.
+ switch (search_term_key_location_) {
+ case url_parse::Parsed::QUERY:
+ params = url.query();
+ break;
+ case url_parse::Parsed::REF:
+ params = url.ref();
+ break;
+ default:
+ NOTREACHED();
+ return false;
+ }
+
+ url_parse::Component query, key, value;
+ query.len = static_cast<int>(params.size());
+ while (url_parse::ExtractQueryKeyValue(params.c_str(), &query, &key,
+ &value)) {
+ if (key.is_nonempty()) {
+ if (params.substr(key.begin, key.len) == search_term_key_) {
+ // Extract the search term.
+ *search_terms = net::UnescapeAndDecodeUTF8URLComponent(
+ params.substr(value.begin, value.len),
+ net::UnescapeRule::SPACES |
+ net::UnescapeRule::URL_SPECIAL_CHARS |
+ net::UnescapeRule::REPLACE_PLUS_WITH_SPACE,
+ NULL);
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
void TemplateURLRef::InvalidateCachedValues() const {
supports_replacements_ = valid_ = parsed_ = false;
host_.clear();
@@ -556,27 +633,44 @@ void TemplateURLRef::ParseHostAndSearchTermKey(
kGoogleBaseSuggestURLParameterFull,
search_terms_data.GoogleBaseSuggestURLValue());
+ search_term_key_.clear();
+ host_.clear();
+ path_.clear();
+ search_term_key_location_ = url_parse::Parsed::REF;
+
GURL url(url_string);
if (!url.is_valid())
return;
- std::string query_string = url.query();
- if (query_string.empty())
- return;
+ // We want to prioritize search terms in the ref rather than ones in the
+ // query.
Peter Kasting 2012/10/02 21:47:59 Ugh, I don't like hardcoding this. We don't need
beaudoin 2012/10/03 22:46:52 Removed the comment. The goal here is just to find
+ if (!url.ref().empty())
+ FindSearchTermsKey(url.ref());
+
+ // If not found in ref string, look for them in query.
+ if (search_term_key_.empty() && !url.query().empty()) {
+ search_term_key_location_ = url_parse::Parsed::QUERY;
+ FindSearchTermsKey(url.query());
+ }
+
+ if (!search_term_key_.empty()) {
+ host_ = url.host();
+ path_ = url.path();
+ }
+}
+void TemplateURLRef::FindSearchTermsKey(const std::string& params) const {
url_parse::Component query, key, value;
- query.len = static_cast<int>(query_string.size());
- while (url_parse::ExtractQueryKeyValue(query_string.c_str(), &query, &key,
+ query.len = static_cast<int>(params.size());
+ while (url_parse::ExtractQueryKeyValue(params.c_str(), &query, &key,
&value)) {
if (key.is_nonempty() && value.is_nonempty()) {
- std::string value_string = query_string.substr(value.begin, value.len);
+ std::string value_string = params.substr(value.begin, value.len);
if (value_string.find(kSearchTermsParameterFull, 0) !=
std::string::npos ||
value_string.find(kGoogleUnescapedSearchTermsParameterFull, 0) !=
std::string::npos) {
- search_term_key_ = query_string.substr(key.begin, key.len);
- host_ = url.host();
- path_ = url.path();
+ search_term_key_ = params.substr(key.begin, key.len);
break;
}
}
@@ -616,7 +710,6 @@ void TemplateURLData::SetURL(const std::string& url) {
url_ = url;
}
Peter Kasting 2012/10/02 21:47:59 Nit: Don't remove this blank line
beaudoin 2012/10/03 22:46:52 Done.
-
// TemplateURL ----------------------------------------------------------------
TemplateURL::TemplateURL(Profile* profile, const TemplateURLData& data)
@@ -690,6 +783,38 @@ bool TemplateURL::IsExtensionKeyword() const {
return GURL(data_.url()).SchemeIs(chrome::kExtensionScheme);
}
+size_t TemplateURL::URLCount() const {
+ DCHECK(!url().empty());
Peter Kasting 2012/10/02 21:47:59 Nit: This is guaranteed at all times, you need not
beaudoin 2012/10/03 22:46:52 Done.
+ // Add 1 for the regular search URL.
+ return data_.alternate_urls.size() + 1;
+}
+
+const std::string& TemplateURL::GetURL(size_t index) const {
+ DCHECK(!url().empty());
+ DCHECK(index >= 0 && index < URLCount());
+
+ if (index < data_.alternate_urls.size())
Peter Kasting 2012/10/02 21:47:59 Nit: Simpler: return (index < data_.alternate_u
beaudoin 2012/10/03 22:46:52 Done.
+ return data_.alternate_urls[index];
+ return url();
+}
+
+bool TemplateURL::ExtractSearchTermsFromURL(
+ const GURL& url, string16* search_terms) {
+ DCHECK(search_terms);
+ search_terms->clear();
+
+ // Then try to match with every pattern.
+ for (size_t i = 0; i < URLCount(); ++i) {
+ TemplateURLRef ref(this, i);
+ if (ref.ExtractSearchTermsFromURL(url, search_terms)) {
+ // Never accept an empty string as a valid result, but exit early if
+ // one is found. This ensures 'http://google.com/?q=foo#q=' fails.
+ return !search_terms->empty();
+ }
+ }
+ return false;
+}
+
void TemplateURL::CopyFrom(const TemplateURL& other) {
if (this == &other)
return;

Powered by Google App Engine
This is Rietveld 408576698