| Index: components/url_formatter/url_formatter.h
|
| diff --git a/components/url_formatter/url_formatter.h b/components/url_formatter/url_formatter.h
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..0a9931a7f0c8cce50593e9adc3568dbe4eb7c381
|
| --- /dev/null
|
| +++ b/components/url_formatter/url_formatter.h
|
| @@ -0,0 +1,155 @@
|
| +// Copyright 2015 The Chromium Authors. All rights reserved.
|
| +// Use of this source code is governed by a BSD-style license that can be
|
| +// found in the LICENSE file.
|
| +
|
| +// url_formatter contains routines for formatting URLs in a way that can be
|
| +// safely and securely displayed to users. For example, it is responsible
|
| +// for determining when to convert an IDN A-Label (e.g. "xn--[something]")
|
| +// into the IDN U-Label.
|
| +//
|
| +// Note that this formatting is only intended for display purposes; it would
|
| +// be insecure and insufficient to make comparisons solely on formatted URLs
|
| +// (that is, it should not be used for normalizing URLs for comparison for
|
| +// security decisions).
|
| +
|
| +#ifndef COMPONENTS_URL_FORMATTER_URL_FORMATTER_H_
|
| +#define COMPONENTS_URL_FORMATTER_URL_FORMATTER_H_
|
| +
|
| +#include <stdint.h>
|
| +
|
| +#include <string>
|
| +#include <vector>
|
| +
|
| +#include "base/strings/string16.h"
|
| +#include "base/strings/utf_offset_string_conversions.h"
|
| +#include "net/base/escape.h"
|
| +
|
| +class GURL;
|
| +
|
| +namespace url {
|
| +struct Parsed;
|
| +} // url
|
| +
|
| +namespace url_formatter {
|
| +
|
| +// Used by FormatUrl to specify handling of certain parts of the url.
|
| +typedef uint32_t FormatUrlType;
|
| +typedef uint32_t FormatUrlTypes;
|
| +
|
| +// Nothing is ommitted.
|
| +extern const FormatUrlType kFormatUrlOmitNothing;
|
| +
|
| +// If set, any username and password are removed.
|
| +extern const FormatUrlType kFormatUrlOmitUsernamePassword;
|
| +
|
| +// If the scheme is 'http://', it's removed.
|
| +extern const FormatUrlType kFormatUrlOmitHTTP;
|
| +
|
| +// Omits the path if it is just a slash and there is no query or ref. This is
|
| +// meaningful for non-file "standard" URLs.
|
| +extern const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname;
|
| +
|
| +// Convenience for omitting all unecessary types.
|
| +extern const FormatUrlType kFormatUrlOmitAll;
|
| +
|
| +// Creates a string representation of |url|. The IDN host name may be in Unicode
|
| +// if |languages| accepts the Unicode representation. |format_type| is a bitmask
|
| +// of FormatUrlTypes, see it for details. |unescape_rules| defines how to clean
|
| +// the URL for human readability. You will generally want |UnescapeRule::SPACES|
|
| +// for display to the user if you can handle spaces, or |UnescapeRule::NORMAL|
|
| +// if not. If the path part and the query part seem to be encoded in %-encoded
|
| +// UTF-8, decodes %-encoding and UTF-8.
|
| +//
|
| +// The last three parameters may be NULL.
|
| +//
|
| +// |new_parsed| will be set to the parsing parameters of the resultant URL.
|
| +//
|
| +// |prefix_end| will be the length before the hostname of the resultant URL.
|
| +//
|
| +// |offset[s]_for_adjustment| specifies one or more offsets into the original
|
| +// URL, representing insertion or selection points between characters: if the
|
| +// input is "http://foo.com/", offset 0 is before the entire URL, offset 7 is
|
| +// between the scheme and the host, and offset 15 is after the end of the URL.
|
| +// Valid input offsets range from 0 to the length of the input URL string. On
|
| +// exit, each offset will have been modified to reflect any changes made to the
|
| +// output string. For example, if |url| is "http://a:b@c.com/",
|
| +// |omit_username_password| is true, and an offset is 12 (pointing between 'c'
|
| +// and '.'), then on return the output string will be "http://c.com/" and the
|
| +// offset will be 8. If an offset cannot be successfully adjusted (e.g. because
|
| +// it points into the middle of a component that was entirely removed or into
|
| +// the middle of an encoding sequence), it will be set to base::string16::npos.
|
| +// For consistency, if an input offset points between the scheme and the
|
| +// username/password, and both are removed, on output this offset will be 0
|
| +// rather than npos; this means that offsets at the starts and ends of removed
|
| +// components are always transformed the same way regardless of what other
|
| +// components are adjacent.
|
| +NET_EXPORT base::string16 FormatUrl(const GURL& url,
|
| + const std::string& languages,
|
| + FormatUrlTypes format_types,
|
| + net::UnescapeRule::Type unescape_rules,
|
| + url::Parsed* new_parsed,
|
| + size_t* prefix_end,
|
| + size_t* offset_for_adjustment);
|
| +
|
| +NET_EXPORT base::string16 FormatUrlWithOffsets(
|
| + const GURL& url,
|
| + const std::string& languages,
|
| + FormatUrlTypes format_types,
|
| + net::UnescapeRule::Type unescape_rules,
|
| + url::Parsed* new_parsed,
|
| + size_t* prefix_end,
|
| + std::vector<size_t>* offsets_for_adjustment);
|
| +
|
| +// This function is like those above except it takes |adjustments| rather
|
| +// than |offset[s]_for_adjustment|. |adjustments| will be set to reflect all
|
| +// the transformations that happened to |url| to convert it into the returned
|
| +// value.
|
| +NET_EXPORT base::string16 FormatUrlWithAdjustments(
|
| + const GURL& url,
|
| + const std::string& languages,
|
| + FormatUrlTypes format_types,
|
| + net::UnescapeRule::Type unescape_rules,
|
| + url::Parsed* new_parsed,
|
| + size_t* prefix_end,
|
| + base::OffsetAdjuster::Adjustments* adjustments);
|
| +
|
| +// This is a convenience function for FormatUrl() with
|
| +// format_types = kFormatUrlOmitAll and unescape = SPACES. This is the typical
|
| +// set of flags for "URLs to display to the user". You should be cautious about
|
| +// using this for URLs which will be parsed or sent to other applications.
|
| +inline base::string16 FormatUrl(const GURL& url, const std::string& languages) {
|
| + return FormatUrl(url, languages, kFormatUrlOmitAll, net::UnescapeRule::SPACES,
|
| + nullptr, nullptr, nullptr);
|
| +}
|
| +
|
| +// Returns whether FormatUrl() would strip a trailing slash from |url|, given a
|
| +// format flag including kFormatUrlOmitTrailingSlashOnBareHostname.
|
| +NET_EXPORT bool CanStripTrailingSlash(const GURL& url);
|
| +
|
| +// Formats the host in |url| and appends it to |output|. The host formatter
|
| +// takes the same accept languages component as ElideURL().
|
| +NET_EXPORT void AppendFormattedHost(const GURL& url,
|
| + const std::string& languages,
|
| + base::string16* output);
|
| +
|
| +// Converts the given host name to unicode characters. This can be called for
|
| +// any host name, if the input is not IDN or is invalid in some way, we'll just
|
| +// return the ASCII source so it is still usable.
|
| +//
|
| +// The input should be the canonicalized ASCII host name from GURL. This
|
| +// function does NOT accept UTF-8!
|
| +//
|
| +// |languages| is a comma separated list of ISO 639 language codes. It
|
| +// is used to determine whether a hostname is 'comprehensible' to a user
|
| +// who understands languages listed. |host| will be converted to a
|
| +// human-readable form (Unicode) ONLY when each component of |host| is
|
| +// regarded as 'comprehensible'. Scipt-mixing is not allowed except that
|
| +// Latin letters in the ASCII range can be mixed with a limited set of
|
| +// script-language pairs (currently Han, Kana and Hangul for zh,ja and ko).
|
| +// When |languages| is empty, even that mixing is not allowed.
|
| +NET_EXPORT base::string16 IDNToUnicode(const std::string& host,
|
| + const std::string& languages);
|
| +
|
| +} // url_formatter
|
| +
|
| +#endif // COMPONENTS_URL_FORMATTER_URL_FORMATTER_H_
|
|
|