Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(5634)

Unified Diff: chrome/browser/search_engines/template_url_parser.cc

Issue 373343003: Componentize TemplateURLFetcher (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: chrome/browser/search_engines/template_url_parser.cc
diff --git a/chrome/browser/search_engines/template_url_parser.cc b/chrome/browser/search_engines/template_url_parser.cc
deleted file mode 100644
index c338ceb752429d888b2338e5f633ddcada8a7f4d..0000000000000000000000000000000000000000
--- a/chrome/browser/search_engines/template_url_parser.cc
+++ /dev/null
@@ -1,493 +0,0 @@
-// Copyright (c) 2012 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "chrome/browser/search_engines/template_url_parser.h"
-
-#include <algorithm>
-#include <map>
-#include <vector>
-
-#include "base/logging.h"
-#include "base/memory/scoped_ptr.h"
-#include "base/strings/string_number_conversions.h"
-#include "base/strings/string_util.h"
-#include "base/strings/utf_string_conversions.h"
-#include "components/search_engines/template_url.h"
-#include "libxml/parser.h"
-#include "libxml/xmlwriter.h"
-#include "ui/gfx/favicon_size.h"
-#include "url/gurl.h"
-#include "url/url_constants.h"
-
-namespace {
-
-// NOTE: libxml uses the UTF-8 encoding. As 0-127 of UTF-8 corresponds
-// to that of char, the following names are all in terms of char. This avoids
-// having to convert to wide, then do comparisons.
-
-// Defines for element names of the OSD document:
-const char kURLElement[] = "Url";
-const char kParamElement[] = "Param";
-const char kShortNameElement[] = "ShortName";
-const char kImageElement[] = "Image";
-const char kOpenSearchDescriptionElement[] = "OpenSearchDescription";
-const char kFirefoxSearchDescriptionElement[] = "SearchPlugin";
-const char kInputEncodingElement[] = "InputEncoding";
-
-// Various XML attributes used.
-const char kURLTypeAttribute[] = "type";
-const char kURLTemplateAttribute[] = "template";
-const char kImageTypeAttribute[] = "type";
-const char kImageWidthAttribute[] = "width";
-const char kImageHeightAttribute[] = "height";
-const char kParamNameAttribute[] = "name";
-const char kParamValueAttribute[] = "value";
-const char kParamMethodAttribute[] = "method";
-
-// Mime type for search results.
-const char kHTMLType[] = "text/html";
-
-// Mime type for as you type suggestions.
-const char kSuggestionType[] = "application/x-suggestions+json";
-
-std::string XMLCharToString(const xmlChar* value) {
- return std::string(reinterpret_cast<const char*>(value));
-}
-
-// Returns true if input_encoding contains a valid input encoding string. This
-// doesn't verify that we have a valid encoding for the string, just that the
-// string contains characters that constitute a valid input encoding.
-bool IsValidEncodingString(const std::string& input_encoding) {
- if (input_encoding.empty())
- return false;
-
- if (!IsAsciiAlpha(input_encoding[0]))
- return false;
-
- for (size_t i = 1, max = input_encoding.size(); i < max; ++i) {
- char c = input_encoding[i];
- if (!IsAsciiAlpha(c) && !IsAsciiDigit(c) && c != '.' && c != '_' &&
- c != '-') {
- return false;
- }
- }
- return true;
-}
-
-void AppendParamToQuery(const std::string& key,
- const std::string& value,
- std::string* query) {
- if (!query->empty())
- query->append("&");
- if (!key.empty()) {
- query->append(key);
- query->append("=");
- }
- query->append(value);
-}
-
-// Returns true if |url| is empty or is a valid URL with a scheme of HTTP[S].
-bool IsHTTPRef(const std::string& url) {
- if (url.empty())
- return true;
- GURL gurl(url);
- return gurl.is_valid() && (gurl.SchemeIs(url::kHttpScheme) ||
- gurl.SchemeIs(url::kHttpsScheme));
-}
-
-} // namespace
-
-
-// TemplateURLParsingContext --------------------------------------------------
-
-// To minimize memory overhead while parsing, a SAX style parser is used.
-// TemplateURLParsingContext is used to maintain the state we're in the document
-// while parsing.
-class TemplateURLParsingContext {
- public:
- // Enum of the known element types.
- enum ElementType {
- UNKNOWN,
- OPEN_SEARCH_DESCRIPTION,
- URL,
- PARAM,
- SHORT_NAME,
- IMAGE,
- INPUT_ENCODING,
- };
-
- enum Method {
- GET,
- POST
- };
-
- // Key/value of a Param node.
- typedef std::pair<std::string, std::string> Param;
-
- explicit TemplateURLParsingContext(
- TemplateURLParser::ParameterFilter* parameter_filter);
-
- static void StartElementImpl(void* ctx,
- const xmlChar* name,
- const xmlChar** atts);
- static void EndElementImpl(void* ctx, const xmlChar* name);
- static void CharactersImpl(void* ctx, const xmlChar* ch, int len);
-
- // Returns a heap-allocated TemplateURL representing the result of parsing.
- // This will be NULL if parsing failed or if the results were invalid for some
- // reason (e.g. the resulting URL was not HTTP[S], a name wasn't supplied,
- // a resulting TemplateURLRef was invalid, etc.).
- TemplateURL* GetTemplateURL(const SearchTermsData& search_terms_data,
- bool show_in_default_list);
-
- private:
- // Key is UTF8 encoded.
- typedef std::map<std::string, ElementType> ElementNameToElementTypeMap;
-
- static void InitMapping();
-
- void ParseURL(const xmlChar** atts);
- void ParseImage(const xmlChar** atts);
- void ParseParam(const xmlChar** atts);
- void ProcessURLParams();
-
- // Returns the current ElementType.
- ElementType GetKnownType();
-
- static ElementNameToElementTypeMap* kElementNameToElementTypeMap;
-
- // Data that gets updated as we parse, and is converted to a TemplateURL by
- // GetTemplateURL().
- TemplateURLData data_;
-
- std::vector<ElementType> elements_;
- bool image_is_valid_for_favicon_;
-
- // Character content for the current element.
- base::string16 string_;
-
- TemplateURLParser::ParameterFilter* parameter_filter_;
-
- // The list of parameters parsed in the Param nodes of a Url node.
- std::vector<Param> extra_params_;
-
- // The HTTP methods used.
- Method method_;
- Method suggestion_method_;
-
- // If true, we are currently parsing a suggest URL, otherwise it is an HTML
- // search. Note that we don't need a stack as URL nodes cannot be nested.
- bool is_suggest_url_;
-
- // Whether we should derive the image from the URL (when images are data
- // URLs).
- bool derive_image_from_url_;
-
- DISALLOW_COPY_AND_ASSIGN(TemplateURLParsingContext);
-};
-
-// static
-TemplateURLParsingContext::ElementNameToElementTypeMap*
- TemplateURLParsingContext::kElementNameToElementTypeMap = NULL;
-
-TemplateURLParsingContext::TemplateURLParsingContext(
- TemplateURLParser::ParameterFilter* parameter_filter)
- : image_is_valid_for_favicon_(false),
- parameter_filter_(parameter_filter),
- method_(GET),
- suggestion_method_(GET),
- is_suggest_url_(false),
- derive_image_from_url_(false) {
- if (kElementNameToElementTypeMap == NULL)
- InitMapping();
-}
-
-// static
-void TemplateURLParsingContext::StartElementImpl(void* ctx,
- const xmlChar* name,
- const xmlChar** atts) {
- // Remove the namespace from |name|, ex: os:Url -> Url.
- std::string node_name(XMLCharToString(name));
- size_t index = node_name.find_first_of(":");
- if (index != std::string::npos)
- node_name.erase(0, index + 1);
-
- TemplateURLParsingContext* context =
- reinterpret_cast<TemplateURLParsingContext*>(ctx);
- context->elements_.push_back(
- context->kElementNameToElementTypeMap->count(node_name) ?
- (*context->kElementNameToElementTypeMap)[node_name] : UNKNOWN);
- switch (context->GetKnownType()) {
- case TemplateURLParsingContext::URL:
- context->extra_params_.clear();
- context->ParseURL(atts);
- break;
- case TemplateURLParsingContext::IMAGE:
- context->ParseImage(atts);
- break;
- case TemplateURLParsingContext::PARAM:
- context->ParseParam(atts);
- break;
- default:
- break;
- }
- context->string_.clear();
-}
-
-// static
-void TemplateURLParsingContext::EndElementImpl(void* ctx, const xmlChar* name) {
- TemplateURLParsingContext* context =
- reinterpret_cast<TemplateURLParsingContext*>(ctx);
- switch (context->GetKnownType()) {
- case TemplateURLParsingContext::SHORT_NAME:
- context->data_.short_name = context->string_;
- break;
- case TemplateURLParsingContext::IMAGE: {
- GURL image_url(base::UTF16ToUTF8(context->string_));
- if (image_url.SchemeIs(url::kDataScheme)) {
- // TODO (jcampan): bug 1169256: when dealing with data URL, we need to
- // decode the data URL in the renderer. For now, we'll just point to the
- // favicon from the URL.
- context->derive_image_from_url_ = true;
- } else if (context->image_is_valid_for_favicon_ && image_url.is_valid() &&
- (image_url.SchemeIs(url::kHttpScheme) ||
- image_url.SchemeIs(url::kHttpsScheme))) {
- context->data_.favicon_url = image_url;
- }
- context->image_is_valid_for_favicon_ = false;
- break;
- }
- case TemplateURLParsingContext::INPUT_ENCODING: {
- std::string input_encoding = base::UTF16ToASCII(context->string_);
- if (IsValidEncodingString(input_encoding))
- context->data_.input_encodings.push_back(input_encoding);
- break;
- }
- case TemplateURLParsingContext::URL:
- context->ProcessURLParams();
- break;
- default:
- break;
- }
- context->string_.clear();
- context->elements_.pop_back();
-}
-
-// static
-void TemplateURLParsingContext::CharactersImpl(void* ctx,
- const xmlChar* ch,
- int len) {
- reinterpret_cast<TemplateURLParsingContext*>(ctx)->string_ +=
- base::UTF8ToUTF16(std::string(reinterpret_cast<const char*>(ch), len));
-}
-
-TemplateURL* TemplateURLParsingContext::GetTemplateURL(
- const SearchTermsData& search_terms_data,
- bool show_in_default_list) {
- // TODO(jcampan): Support engines that use POST; see http://crbug.com/18107
- if (method_ == TemplateURLParsingContext::POST || data_.short_name.empty() ||
- !IsHTTPRef(data_.url()) || !IsHTTPRef(data_.suggestions_url))
- return NULL;
- if (suggestion_method_ == TemplateURLParsingContext::POST)
- data_.suggestions_url.clear();
-
- // If the image was a data URL, use the favicon from the search URL instead.
- // (see the TODO in EndElementImpl()).
- GURL search_url(data_.url());
- if (derive_image_from_url_ && data_.favicon_url.is_empty())
- data_.favicon_url = TemplateURL::GenerateFaviconURL(search_url);
-
- data_.SetKeyword(TemplateURL::GenerateKeyword(search_url));
- data_.show_in_default_list = show_in_default_list;
-
- // Bail if the search URL is empty or if either TemplateURLRef is invalid.
- scoped_ptr<TemplateURL> template_url(new TemplateURL(data_));
- if (template_url->url().empty() ||
- !template_url->url_ref().IsValid(search_terms_data) ||
- (!template_url->suggestions_url().empty() &&
- !template_url->suggestions_url_ref().IsValid(search_terms_data))) {
- return NULL;
- }
-
- return template_url.release();
-}
-
-// static
-void TemplateURLParsingContext::InitMapping() {
- kElementNameToElementTypeMap = new std::map<std::string, ElementType>;
- (*kElementNameToElementTypeMap)[kURLElement] = URL;
- (*kElementNameToElementTypeMap)[kParamElement] = PARAM;
- (*kElementNameToElementTypeMap)[kShortNameElement] = SHORT_NAME;
- (*kElementNameToElementTypeMap)[kImageElement] = IMAGE;
- (*kElementNameToElementTypeMap)[kOpenSearchDescriptionElement] =
- OPEN_SEARCH_DESCRIPTION;
- (*kElementNameToElementTypeMap)[kFirefoxSearchDescriptionElement] =
- OPEN_SEARCH_DESCRIPTION;
- (*kElementNameToElementTypeMap)[kInputEncodingElement] = INPUT_ENCODING;
-}
-
-void TemplateURLParsingContext::ParseURL(const xmlChar** atts) {
- if (!atts)
- return;
-
- std::string template_url;
- bool is_post = false;
- bool is_html_url = false;
- bool is_suggest_url = false;
- for (; *atts; atts += 2) {
- std::string name(XMLCharToString(*atts));
- const xmlChar* value = atts[1];
- if (name == kURLTypeAttribute) {
- std::string type = XMLCharToString(value);
- is_html_url = (type == kHTMLType);
- is_suggest_url = (type == kSuggestionType);
- } else if (name == kURLTemplateAttribute) {
- template_url = XMLCharToString(value);
- } else if (name == kParamMethodAttribute) {
- is_post = LowerCaseEqualsASCII(XMLCharToString(value), "post");
- }
- }
-
- if (is_html_url && !template_url.empty()) {
- data_.SetURL(template_url);
- is_suggest_url_ = false;
- if (is_post)
- method_ = POST;
- } else if (is_suggest_url) {
- data_.suggestions_url = template_url;
- is_suggest_url_ = true;
- if (is_post)
- suggestion_method_ = POST;
- }
-}
-
-void TemplateURLParsingContext::ParseImage(const xmlChar** atts) {
- if (!atts)
- return;
-
- int width = 0;
- int height = 0;
- std::string type;
- for (; *atts; atts += 2) {
- std::string name(XMLCharToString(*atts));
- const xmlChar* value = atts[1];
- if (name == kImageTypeAttribute) {
- type = XMLCharToString(value);
- } else if (name == kImageWidthAttribute) {
- base::StringToInt(XMLCharToString(value), &width);
- } else if (name == kImageHeightAttribute) {
- base::StringToInt(XMLCharToString(value), &height);
- }
- }
-
- image_is_valid_for_favicon_ = (width == gfx::kFaviconSize) &&
- (height == gfx::kFaviconSize) &&
- ((type == "image/x-icon") || (type == "image/vnd.microsoft.icon"));
-}
-
-void TemplateURLParsingContext::ParseParam(const xmlChar** atts) {
- if (!atts)
- return;
-
- std::string key, value;
- for (; *atts; atts += 2) {
- std::string name(XMLCharToString(*atts));
- const xmlChar* val = atts[1];
- if (name == kParamNameAttribute) {
- key = XMLCharToString(val);
- } else if (name == kParamValueAttribute) {
- value = XMLCharToString(val);
- }
- }
-
- if (!key.empty() &&
- (!parameter_filter_ || parameter_filter_->KeepParameter(key, value)))
- extra_params_.push_back(Param(key, value));
-}
-
-void TemplateURLParsingContext::ProcessURLParams() {
- if (!parameter_filter_ && extra_params_.empty())
- return;
-
- GURL url(is_suggest_url_ ? data_.suggestions_url : data_.url());
- if (url.is_empty())
- return;
-
- // If there is a parameter filter, parse the existing URL and remove any
- // unwanted parameter.
- std::string new_query;
- bool modified = false;
- if (parameter_filter_) {
- url::Component query = url.parsed_for_possibly_invalid_spec().query;
- url::Component key, value;
- const char* url_spec = url.spec().c_str();
- while (url::ExtractQueryKeyValue(url_spec, &query, &key, &value)) {
- std::string key_str(url_spec, key.begin, key.len);
- std::string value_str(url_spec, value.begin, value.len);
- if (parameter_filter_->KeepParameter(key_str, value_str)) {
- AppendParamToQuery(key_str, value_str, &new_query);
- } else {
- modified = true;
- }
- }
- }
- if (!modified)
- new_query = url.query();
-
- // Add the extra parameters if any.
- if (!extra_params_.empty()) {
- modified = true;
- for (std::vector<Param>::const_iterator iter(extra_params_.begin());
- iter != extra_params_.end(); ++iter)
- AppendParamToQuery(iter->first, iter->second, &new_query);
- }
-
- if (modified) {
- GURL::Replacements repl;
- repl.SetQueryStr(new_query);
- url = url.ReplaceComponents(repl);
- if (is_suggest_url_)
- data_.suggestions_url = url.spec();
- else if (url.is_valid())
- data_.SetURL(url.spec());
- }
-}
-
-TemplateURLParsingContext::ElementType
- TemplateURLParsingContext::GetKnownType() {
- if (elements_.size() == 2 && elements_[0] == OPEN_SEARCH_DESCRIPTION)
- return elements_[1];
- // We only expect PARAM nodes under the URL node.
- return (elements_.size() == 3 && elements_[0] == OPEN_SEARCH_DESCRIPTION &&
- elements_[1] == URL && elements_[2] == PARAM) ? PARAM : UNKNOWN;
-}
-
-
-// TemplateURLParser ----------------------------------------------------------
-
-// static
-TemplateURL* TemplateURLParser::Parse(
- const SearchTermsData& search_terms_data,
- bool show_in_default_list,
- const char* data,
- size_t length,
- TemplateURLParser::ParameterFilter* param_filter) {
- // xmlSubstituteEntitiesDefault(1) makes it so that &amp; isn't mapped to
- // &#38; . Unfortunately xmlSubstituteEntitiesDefault affects global state.
- // If this becomes problematic we'll need to provide our own entity
- // type for &amp;, or strip out &#38; by hand after parsing.
- int last_sub_entities_value = xmlSubstituteEntitiesDefault(1);
- TemplateURLParsingContext context(param_filter);
- xmlSAXHandler sax_handler;
- memset(&sax_handler, 0, sizeof(sax_handler));
- sax_handler.startElement = &TemplateURLParsingContext::StartElementImpl;
- sax_handler.endElement = &TemplateURLParsingContext::EndElementImpl;
- sax_handler.characters = &TemplateURLParsingContext::CharactersImpl;
- int error = xmlSAXUserParseMemory(&sax_handler, &context, data,
- static_cast<int>(length));
- xmlSubstituteEntitiesDefault(last_sub_entities_value);
-
- return error ?
- NULL : context.GetTemplateURL(search_terms_data, show_in_default_list);
-}
« no previous file with comments | « chrome/browser/search_engines/template_url_parser.h ('k') | chrome/browser/search_engines/template_url_parser_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698