| Index: chrome/browser/search_engines/template_url_parser.cc
|
| diff --git a/chrome/browser/search_engines/template_url_parser.cc b/chrome/browser/search_engines/template_url_parser.cc
|
| deleted file mode 100644
|
| index c338ceb752429d888b2338e5f633ddcada8a7f4d..0000000000000000000000000000000000000000
|
| --- a/chrome/browser/search_engines/template_url_parser.cc
|
| +++ /dev/null
|
| @@ -1,493 +0,0 @@
|
| -// Copyright (c) 2012 The Chromium Authors. All rights reserved.
|
| -// Use of this source code is governed by a BSD-style license that can be
|
| -// found in the LICENSE file.
|
| -
|
| -#include "chrome/browser/search_engines/template_url_parser.h"
|
| -
|
| -#include <algorithm>
|
| -#include <map>
|
| -#include <vector>
|
| -
|
| -#include "base/logging.h"
|
| -#include "base/memory/scoped_ptr.h"
|
| -#include "base/strings/string_number_conversions.h"
|
| -#include "base/strings/string_util.h"
|
| -#include "base/strings/utf_string_conversions.h"
|
| -#include "components/search_engines/template_url.h"
|
| -#include "libxml/parser.h"
|
| -#include "libxml/xmlwriter.h"
|
| -#include "ui/gfx/favicon_size.h"
|
| -#include "url/gurl.h"
|
| -#include "url/url_constants.h"
|
| -
|
| -namespace {
|
| -
|
| -// NOTE: libxml uses the UTF-8 encoding. As 0-127 of UTF-8 corresponds
|
| -// to that of char, the following names are all in terms of char. This avoids
|
| -// having to convert to wide, then do comparisons.
|
| -
|
| -// Defines for element names of the OSD document:
|
| -const char kURLElement[] = "Url";
|
| -const char kParamElement[] = "Param";
|
| -const char kShortNameElement[] = "ShortName";
|
| -const char kImageElement[] = "Image";
|
| -const char kOpenSearchDescriptionElement[] = "OpenSearchDescription";
|
| -const char kFirefoxSearchDescriptionElement[] = "SearchPlugin";
|
| -const char kInputEncodingElement[] = "InputEncoding";
|
| -
|
| -// Various XML attributes used.
|
| -const char kURLTypeAttribute[] = "type";
|
| -const char kURLTemplateAttribute[] = "template";
|
| -const char kImageTypeAttribute[] = "type";
|
| -const char kImageWidthAttribute[] = "width";
|
| -const char kImageHeightAttribute[] = "height";
|
| -const char kParamNameAttribute[] = "name";
|
| -const char kParamValueAttribute[] = "value";
|
| -const char kParamMethodAttribute[] = "method";
|
| -
|
| -// Mime type for search results.
|
| -const char kHTMLType[] = "text/html";
|
| -
|
| -// Mime type for as you type suggestions.
|
| -const char kSuggestionType[] = "application/x-suggestions+json";
|
| -
|
| -std::string XMLCharToString(const xmlChar* value) {
|
| - return std::string(reinterpret_cast<const char*>(value));
|
| -}
|
| -
|
| -// Returns true if input_encoding contains a valid input encoding string. This
|
| -// doesn't verify that we have a valid encoding for the string, just that the
|
| -// string contains characters that constitute a valid input encoding.
|
| -bool IsValidEncodingString(const std::string& input_encoding) {
|
| - if (input_encoding.empty())
|
| - return false;
|
| -
|
| - if (!IsAsciiAlpha(input_encoding[0]))
|
| - return false;
|
| -
|
| - for (size_t i = 1, max = input_encoding.size(); i < max; ++i) {
|
| - char c = input_encoding[i];
|
| - if (!IsAsciiAlpha(c) && !IsAsciiDigit(c) && c != '.' && c != '_' &&
|
| - c != '-') {
|
| - return false;
|
| - }
|
| - }
|
| - return true;
|
| -}
|
| -
|
| -void AppendParamToQuery(const std::string& key,
|
| - const std::string& value,
|
| - std::string* query) {
|
| - if (!query->empty())
|
| - query->append("&");
|
| - if (!key.empty()) {
|
| - query->append(key);
|
| - query->append("=");
|
| - }
|
| - query->append(value);
|
| -}
|
| -
|
| -// Returns true if |url| is empty or is a valid URL with a scheme of HTTP[S].
|
| -bool IsHTTPRef(const std::string& url) {
|
| - if (url.empty())
|
| - return true;
|
| - GURL gurl(url);
|
| - return gurl.is_valid() && (gurl.SchemeIs(url::kHttpScheme) ||
|
| - gurl.SchemeIs(url::kHttpsScheme));
|
| -}
|
| -
|
| -} // namespace
|
| -
|
| -
|
| -// TemplateURLParsingContext --------------------------------------------------
|
| -
|
| -// To minimize memory overhead while parsing, a SAX style parser is used.
|
| -// TemplateURLParsingContext is used to maintain the state we're in the document
|
| -// while parsing.
|
| -class TemplateURLParsingContext {
|
| - public:
|
| - // Enum of the known element types.
|
| - enum ElementType {
|
| - UNKNOWN,
|
| - OPEN_SEARCH_DESCRIPTION,
|
| - URL,
|
| - PARAM,
|
| - SHORT_NAME,
|
| - IMAGE,
|
| - INPUT_ENCODING,
|
| - };
|
| -
|
| - enum Method {
|
| - GET,
|
| - POST
|
| - };
|
| -
|
| - // Key/value of a Param node.
|
| - typedef std::pair<std::string, std::string> Param;
|
| -
|
| - explicit TemplateURLParsingContext(
|
| - TemplateURLParser::ParameterFilter* parameter_filter);
|
| -
|
| - static void StartElementImpl(void* ctx,
|
| - const xmlChar* name,
|
| - const xmlChar** atts);
|
| - static void EndElementImpl(void* ctx, const xmlChar* name);
|
| - static void CharactersImpl(void* ctx, const xmlChar* ch, int len);
|
| -
|
| - // Returns a heap-allocated TemplateURL representing the result of parsing.
|
| - // This will be NULL if parsing failed or if the results were invalid for some
|
| - // reason (e.g. the resulting URL was not HTTP[S], a name wasn't supplied,
|
| - // a resulting TemplateURLRef was invalid, etc.).
|
| - TemplateURL* GetTemplateURL(const SearchTermsData& search_terms_data,
|
| - bool show_in_default_list);
|
| -
|
| - private:
|
| - // Key is UTF8 encoded.
|
| - typedef std::map<std::string, ElementType> ElementNameToElementTypeMap;
|
| -
|
| - static void InitMapping();
|
| -
|
| - void ParseURL(const xmlChar** atts);
|
| - void ParseImage(const xmlChar** atts);
|
| - void ParseParam(const xmlChar** atts);
|
| - void ProcessURLParams();
|
| -
|
| - // Returns the current ElementType.
|
| - ElementType GetKnownType();
|
| -
|
| - static ElementNameToElementTypeMap* kElementNameToElementTypeMap;
|
| -
|
| - // Data that gets updated as we parse, and is converted to a TemplateURL by
|
| - // GetTemplateURL().
|
| - TemplateURLData data_;
|
| -
|
| - std::vector<ElementType> elements_;
|
| - bool image_is_valid_for_favicon_;
|
| -
|
| - // Character content for the current element.
|
| - base::string16 string_;
|
| -
|
| - TemplateURLParser::ParameterFilter* parameter_filter_;
|
| -
|
| - // The list of parameters parsed in the Param nodes of a Url node.
|
| - std::vector<Param> extra_params_;
|
| -
|
| - // The HTTP methods used.
|
| - Method method_;
|
| - Method suggestion_method_;
|
| -
|
| - // If true, we are currently parsing a suggest URL, otherwise it is an HTML
|
| - // search. Note that we don't need a stack as URL nodes cannot be nested.
|
| - bool is_suggest_url_;
|
| -
|
| - // Whether we should derive the image from the URL (when images are data
|
| - // URLs).
|
| - bool derive_image_from_url_;
|
| -
|
| - DISALLOW_COPY_AND_ASSIGN(TemplateURLParsingContext);
|
| -};
|
| -
|
| -// static
|
| -TemplateURLParsingContext::ElementNameToElementTypeMap*
|
| - TemplateURLParsingContext::kElementNameToElementTypeMap = NULL;
|
| -
|
| -TemplateURLParsingContext::TemplateURLParsingContext(
|
| - TemplateURLParser::ParameterFilter* parameter_filter)
|
| - : image_is_valid_for_favicon_(false),
|
| - parameter_filter_(parameter_filter),
|
| - method_(GET),
|
| - suggestion_method_(GET),
|
| - is_suggest_url_(false),
|
| - derive_image_from_url_(false) {
|
| - if (kElementNameToElementTypeMap == NULL)
|
| - InitMapping();
|
| -}
|
| -
|
| -// static
|
| -void TemplateURLParsingContext::StartElementImpl(void* ctx,
|
| - const xmlChar* name,
|
| - const xmlChar** atts) {
|
| - // Remove the namespace from |name|, ex: os:Url -> Url.
|
| - std::string node_name(XMLCharToString(name));
|
| - size_t index = node_name.find_first_of(":");
|
| - if (index != std::string::npos)
|
| - node_name.erase(0, index + 1);
|
| -
|
| - TemplateURLParsingContext* context =
|
| - reinterpret_cast<TemplateURLParsingContext*>(ctx);
|
| - context->elements_.push_back(
|
| - context->kElementNameToElementTypeMap->count(node_name) ?
|
| - (*context->kElementNameToElementTypeMap)[node_name] : UNKNOWN);
|
| - switch (context->GetKnownType()) {
|
| - case TemplateURLParsingContext::URL:
|
| - context->extra_params_.clear();
|
| - context->ParseURL(atts);
|
| - break;
|
| - case TemplateURLParsingContext::IMAGE:
|
| - context->ParseImage(atts);
|
| - break;
|
| - case TemplateURLParsingContext::PARAM:
|
| - context->ParseParam(atts);
|
| - break;
|
| - default:
|
| - break;
|
| - }
|
| - context->string_.clear();
|
| -}
|
| -
|
| -// static
|
| -void TemplateURLParsingContext::EndElementImpl(void* ctx, const xmlChar* name) {
|
| - TemplateURLParsingContext* context =
|
| - reinterpret_cast<TemplateURLParsingContext*>(ctx);
|
| - switch (context->GetKnownType()) {
|
| - case TemplateURLParsingContext::SHORT_NAME:
|
| - context->data_.short_name = context->string_;
|
| - break;
|
| - case TemplateURLParsingContext::IMAGE: {
|
| - GURL image_url(base::UTF16ToUTF8(context->string_));
|
| - if (image_url.SchemeIs(url::kDataScheme)) {
|
| - // TODO (jcampan): bug 1169256: when dealing with data URL, we need to
|
| - // decode the data URL in the renderer. For now, we'll just point to the
|
| - // favicon from the URL.
|
| - context->derive_image_from_url_ = true;
|
| - } else if (context->image_is_valid_for_favicon_ && image_url.is_valid() &&
|
| - (image_url.SchemeIs(url::kHttpScheme) ||
|
| - image_url.SchemeIs(url::kHttpsScheme))) {
|
| - context->data_.favicon_url = image_url;
|
| - }
|
| - context->image_is_valid_for_favicon_ = false;
|
| - break;
|
| - }
|
| - case TemplateURLParsingContext::INPUT_ENCODING: {
|
| - std::string input_encoding = base::UTF16ToASCII(context->string_);
|
| - if (IsValidEncodingString(input_encoding))
|
| - context->data_.input_encodings.push_back(input_encoding);
|
| - break;
|
| - }
|
| - case TemplateURLParsingContext::URL:
|
| - context->ProcessURLParams();
|
| - break;
|
| - default:
|
| - break;
|
| - }
|
| - context->string_.clear();
|
| - context->elements_.pop_back();
|
| -}
|
| -
|
| -// static
|
| -void TemplateURLParsingContext::CharactersImpl(void* ctx,
|
| - const xmlChar* ch,
|
| - int len) {
|
| - reinterpret_cast<TemplateURLParsingContext*>(ctx)->string_ +=
|
| - base::UTF8ToUTF16(std::string(reinterpret_cast<const char*>(ch), len));
|
| -}
|
| -
|
| -TemplateURL* TemplateURLParsingContext::GetTemplateURL(
|
| - const SearchTermsData& search_terms_data,
|
| - bool show_in_default_list) {
|
| - // TODO(jcampan): Support engines that use POST; see http://crbug.com/18107
|
| - if (method_ == TemplateURLParsingContext::POST || data_.short_name.empty() ||
|
| - !IsHTTPRef(data_.url()) || !IsHTTPRef(data_.suggestions_url))
|
| - return NULL;
|
| - if (suggestion_method_ == TemplateURLParsingContext::POST)
|
| - data_.suggestions_url.clear();
|
| -
|
| - // If the image was a data URL, use the favicon from the search URL instead.
|
| - // (see the TODO in EndElementImpl()).
|
| - GURL search_url(data_.url());
|
| - if (derive_image_from_url_ && data_.favicon_url.is_empty())
|
| - data_.favicon_url = TemplateURL::GenerateFaviconURL(search_url);
|
| -
|
| - data_.SetKeyword(TemplateURL::GenerateKeyword(search_url));
|
| - data_.show_in_default_list = show_in_default_list;
|
| -
|
| - // Bail if the search URL is empty or if either TemplateURLRef is invalid.
|
| - scoped_ptr<TemplateURL> template_url(new TemplateURL(data_));
|
| - if (template_url->url().empty() ||
|
| - !template_url->url_ref().IsValid(search_terms_data) ||
|
| - (!template_url->suggestions_url().empty() &&
|
| - !template_url->suggestions_url_ref().IsValid(search_terms_data))) {
|
| - return NULL;
|
| - }
|
| -
|
| - return template_url.release();
|
| -}
|
| -
|
| -// static
|
| -void TemplateURLParsingContext::InitMapping() {
|
| - kElementNameToElementTypeMap = new std::map<std::string, ElementType>;
|
| - (*kElementNameToElementTypeMap)[kURLElement] = URL;
|
| - (*kElementNameToElementTypeMap)[kParamElement] = PARAM;
|
| - (*kElementNameToElementTypeMap)[kShortNameElement] = SHORT_NAME;
|
| - (*kElementNameToElementTypeMap)[kImageElement] = IMAGE;
|
| - (*kElementNameToElementTypeMap)[kOpenSearchDescriptionElement] =
|
| - OPEN_SEARCH_DESCRIPTION;
|
| - (*kElementNameToElementTypeMap)[kFirefoxSearchDescriptionElement] =
|
| - OPEN_SEARCH_DESCRIPTION;
|
| - (*kElementNameToElementTypeMap)[kInputEncodingElement] = INPUT_ENCODING;
|
| -}
|
| -
|
| -void TemplateURLParsingContext::ParseURL(const xmlChar** atts) {
|
| - if (!atts)
|
| - return;
|
| -
|
| - std::string template_url;
|
| - bool is_post = false;
|
| - bool is_html_url = false;
|
| - bool is_suggest_url = false;
|
| - for (; *atts; atts += 2) {
|
| - std::string name(XMLCharToString(*atts));
|
| - const xmlChar* value = atts[1];
|
| - if (name == kURLTypeAttribute) {
|
| - std::string type = XMLCharToString(value);
|
| - is_html_url = (type == kHTMLType);
|
| - is_suggest_url = (type == kSuggestionType);
|
| - } else if (name == kURLTemplateAttribute) {
|
| - template_url = XMLCharToString(value);
|
| - } else if (name == kParamMethodAttribute) {
|
| - is_post = LowerCaseEqualsASCII(XMLCharToString(value), "post");
|
| - }
|
| - }
|
| -
|
| - if (is_html_url && !template_url.empty()) {
|
| - data_.SetURL(template_url);
|
| - is_suggest_url_ = false;
|
| - if (is_post)
|
| - method_ = POST;
|
| - } else if (is_suggest_url) {
|
| - data_.suggestions_url = template_url;
|
| - is_suggest_url_ = true;
|
| - if (is_post)
|
| - suggestion_method_ = POST;
|
| - }
|
| -}
|
| -
|
| -void TemplateURLParsingContext::ParseImage(const xmlChar** atts) {
|
| - if (!atts)
|
| - return;
|
| -
|
| - int width = 0;
|
| - int height = 0;
|
| - std::string type;
|
| - for (; *atts; atts += 2) {
|
| - std::string name(XMLCharToString(*atts));
|
| - const xmlChar* value = atts[1];
|
| - if (name == kImageTypeAttribute) {
|
| - type = XMLCharToString(value);
|
| - } else if (name == kImageWidthAttribute) {
|
| - base::StringToInt(XMLCharToString(value), &width);
|
| - } else if (name == kImageHeightAttribute) {
|
| - base::StringToInt(XMLCharToString(value), &height);
|
| - }
|
| - }
|
| -
|
| - image_is_valid_for_favicon_ = (width == gfx::kFaviconSize) &&
|
| - (height == gfx::kFaviconSize) &&
|
| - ((type == "image/x-icon") || (type == "image/vnd.microsoft.icon"));
|
| -}
|
| -
|
| -void TemplateURLParsingContext::ParseParam(const xmlChar** atts) {
|
| - if (!atts)
|
| - return;
|
| -
|
| - std::string key, value;
|
| - for (; *atts; atts += 2) {
|
| - std::string name(XMLCharToString(*atts));
|
| - const xmlChar* val = atts[1];
|
| - if (name == kParamNameAttribute) {
|
| - key = XMLCharToString(val);
|
| - } else if (name == kParamValueAttribute) {
|
| - value = XMLCharToString(val);
|
| - }
|
| - }
|
| -
|
| - if (!key.empty() &&
|
| - (!parameter_filter_ || parameter_filter_->KeepParameter(key, value)))
|
| - extra_params_.push_back(Param(key, value));
|
| -}
|
| -
|
| -void TemplateURLParsingContext::ProcessURLParams() {
|
| - if (!parameter_filter_ && extra_params_.empty())
|
| - return;
|
| -
|
| - GURL url(is_suggest_url_ ? data_.suggestions_url : data_.url());
|
| - if (url.is_empty())
|
| - return;
|
| -
|
| - // If there is a parameter filter, parse the existing URL and remove any
|
| - // unwanted parameter.
|
| - std::string new_query;
|
| - bool modified = false;
|
| - if (parameter_filter_) {
|
| - url::Component query = url.parsed_for_possibly_invalid_spec().query;
|
| - url::Component key, value;
|
| - const char* url_spec = url.spec().c_str();
|
| - while (url::ExtractQueryKeyValue(url_spec, &query, &key, &value)) {
|
| - std::string key_str(url_spec, key.begin, key.len);
|
| - std::string value_str(url_spec, value.begin, value.len);
|
| - if (parameter_filter_->KeepParameter(key_str, value_str)) {
|
| - AppendParamToQuery(key_str, value_str, &new_query);
|
| - } else {
|
| - modified = true;
|
| - }
|
| - }
|
| - }
|
| - if (!modified)
|
| - new_query = url.query();
|
| -
|
| - // Add the extra parameters if any.
|
| - if (!extra_params_.empty()) {
|
| - modified = true;
|
| - for (std::vector<Param>::const_iterator iter(extra_params_.begin());
|
| - iter != extra_params_.end(); ++iter)
|
| - AppendParamToQuery(iter->first, iter->second, &new_query);
|
| - }
|
| -
|
| - if (modified) {
|
| - GURL::Replacements repl;
|
| - repl.SetQueryStr(new_query);
|
| - url = url.ReplaceComponents(repl);
|
| - if (is_suggest_url_)
|
| - data_.suggestions_url = url.spec();
|
| - else if (url.is_valid())
|
| - data_.SetURL(url.spec());
|
| - }
|
| -}
|
| -
|
| -TemplateURLParsingContext::ElementType
|
| - TemplateURLParsingContext::GetKnownType() {
|
| - if (elements_.size() == 2 && elements_[0] == OPEN_SEARCH_DESCRIPTION)
|
| - return elements_[1];
|
| - // We only expect PARAM nodes under the URL node.
|
| - return (elements_.size() == 3 && elements_[0] == OPEN_SEARCH_DESCRIPTION &&
|
| - elements_[1] == URL && elements_[2] == PARAM) ? PARAM : UNKNOWN;
|
| -}
|
| -
|
| -
|
| -// TemplateURLParser ----------------------------------------------------------
|
| -
|
| -// static
|
| -TemplateURL* TemplateURLParser::Parse(
|
| - const SearchTermsData& search_terms_data,
|
| - bool show_in_default_list,
|
| - const char* data,
|
| - size_t length,
|
| - TemplateURLParser::ParameterFilter* param_filter) {
|
| - // xmlSubstituteEntitiesDefault(1) makes it so that & isn't mapped to
|
| - // & . Unfortunately xmlSubstituteEntitiesDefault affects global state.
|
| - // If this becomes problematic we'll need to provide our own entity
|
| - // type for &, or strip out & by hand after parsing.
|
| - int last_sub_entities_value = xmlSubstituteEntitiesDefault(1);
|
| - TemplateURLParsingContext context(param_filter);
|
| - xmlSAXHandler sax_handler;
|
| - memset(&sax_handler, 0, sizeof(sax_handler));
|
| - sax_handler.startElement = &TemplateURLParsingContext::StartElementImpl;
|
| - sax_handler.endElement = &TemplateURLParsingContext::EndElementImpl;
|
| - sax_handler.characters = &TemplateURLParsingContext::CharactersImpl;
|
| - int error = xmlSAXUserParseMemory(&sax_handler, &context, data,
|
| - static_cast<int>(length));
|
| - xmlSubstituteEntitiesDefault(last_sub_entities_value);
|
| -
|
| - return error ?
|
| - NULL : context.GetTemplateURL(search_terms_data, show_in_default_list);
|
| -}
|
|
|