| Index: components/url_formatter/url_formatter.h
 | 
| diff --git a/components/url_formatter/url_formatter.h b/components/url_formatter/url_formatter.h
 | 
| new file mode 100644
 | 
| index 0000000000000000000000000000000000000000..01c8795ce0662e0edc465e1505e515923b26173b
 | 
| --- /dev/null
 | 
| +++ b/components/url_formatter/url_formatter.h
 | 
| @@ -0,0 +1,155 @@
 | 
| +// Copyright 2015 The Chromium Authors. All rights reserved.
 | 
| +// Use of this source code is governed by a BSD-style license that can be
 | 
| +// found in the LICENSE file.
 | 
| +
 | 
| +// url_formatter contains routines for formatting URLs in a way that can be
 | 
| +// safely and securely displayed to users. For example, it is responsible
 | 
| +// for determining when to convert an IDN A-Label (e.g. "xn--[something]")
 | 
| +// into the IDN U-Label.
 | 
| +//
 | 
| +// Note that this formatting is only intended for display purposes; it would
 | 
| +// be insecure and insufficient to make comparisons solely on formatted URLs
 | 
| +// (that is, it should not be used for normalizing URLs for comparison for
 | 
| +// security decisions).
 | 
| +
 | 
| +#ifndef COMPONENTS_URL_FORMATTER_URL_FORMATTER_H_
 | 
| +#define COMPONENTS_URL_FORMATTER_URL_FORMATTER_H_
 | 
| +
 | 
| +#include <stdint.h>
 | 
| +
 | 
| +#include <string>
 | 
| +#include <vector>
 | 
| +
 | 
| +#include "base/strings/string16.h"
 | 
| +#include "base/strings/utf_offset_string_conversions.h"
 | 
| +#include "net/base/escape.h"
 | 
| +
 | 
| +class GURL;
 | 
| +
 | 
| +namespace url {
 | 
| +struct Parsed;
 | 
| +}  // url
 | 
| +
 | 
| +namespace url_formatter {
 | 
| +
 | 
| +// Used by FormatUrl to specify handling of certain parts of the url.
 | 
| +typedef uint32_t FormatUrlType;
 | 
| +typedef uint32_t FormatUrlTypes;
 | 
| +
 | 
| +// Nothing is ommitted.
 | 
| +extern const FormatUrlType kFormatUrlOmitNothing;
 | 
| +
 | 
| +// If set, any username and password are removed.
 | 
| +extern const FormatUrlType kFormatUrlOmitUsernamePassword;
 | 
| +
 | 
| +// If the scheme is 'http://', it's removed.
 | 
| +extern const FormatUrlType kFormatUrlOmitHTTP;
 | 
| +
 | 
| +// Omits the path if it is just a slash and there is no query or ref.  This is
 | 
| +// meaningful for non-file "standard" URLs.
 | 
| +extern const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname;
 | 
| +
 | 
| +// Convenience for omitting all unecessary types.
 | 
| +extern const FormatUrlType kFormatUrlOmitAll;
 | 
| +
 | 
| +// Creates a string representation of |url|. The IDN host name may be in Unicode
 | 
| +// if |languages| accepts the Unicode representation. |format_type| is a bitmask
 | 
| +// of FormatUrlTypes, see it for details. |unescape_rules| defines how to clean
 | 
| +// the URL for human readability. You will generally want |UnescapeRule::SPACES|
 | 
| +// for display to the user if you can handle spaces, or |UnescapeRule::NORMAL|
 | 
| +// if not. If the path part and the query part seem to be encoded in %-encoded
 | 
| +// UTF-8, decodes %-encoding and UTF-8.
 | 
| +//
 | 
| +// The last three parameters may be NULL.
 | 
| +//
 | 
| +// |new_parsed| will be set to the parsing parameters of the resultant URL.
 | 
| +//
 | 
| +// |prefix_end| will be the length before the hostname of the resultant URL.
 | 
| +//
 | 
| +// |offset[s]_for_adjustment| specifies one or more offsets into the original
 | 
| +// URL, representing insertion or selection points between characters: if the
 | 
| +// input is "http://foo.com/", offset 0 is before the entire URL, offset 7 is
 | 
| +// between the scheme and the host, and offset 15 is after the end of the URL.
 | 
| +// Valid input offsets range from 0 to the length of the input URL string.  On
 | 
| +// exit, each offset will have been modified to reflect any changes made to the
 | 
| +// output string.  For example, if |url| is "http://a:b@c.com/",
 | 
| +// |omit_username_password| is true, and an offset is 12 (pointing between 'c'
 | 
| +// and '.'), then on return the output string will be "http://c.com/" and the
 | 
| +// offset will be 8.  If an offset cannot be successfully adjusted (e.g. because
 | 
| +// it points into the middle of a component that was entirely removed or into
 | 
| +// the middle of an encoding sequence), it will be set to base::string16::npos.
 | 
| +// For consistency, if an input offset points between the scheme and the
 | 
| +// username/password, and both are removed, on output this offset will be 0
 | 
| +// rather than npos; this means that offsets at the starts and ends of removed
 | 
| +// components are always transformed the same way regardless of what other
 | 
| +// components are adjacent.
 | 
| +base::string16 FormatUrl(const GURL& url,
 | 
| +                         const std::string& languages,
 | 
| +                         FormatUrlTypes format_types,
 | 
| +                         net::UnescapeRule::Type unescape_rules,
 | 
| +                         url::Parsed* new_parsed,
 | 
| +                         size_t* prefix_end,
 | 
| +                         size_t* offset_for_adjustment);
 | 
| +
 | 
| +base::string16 FormatUrlWithOffsets(
 | 
| +    const GURL& url,
 | 
| +    const std::string& languages,
 | 
| +    FormatUrlTypes format_types,
 | 
| +    net::UnescapeRule::Type unescape_rules,
 | 
| +    url::Parsed* new_parsed,
 | 
| +    size_t* prefix_end,
 | 
| +    std::vector<size_t>* offsets_for_adjustment);
 | 
| +
 | 
| +// This function is like those above except it takes |adjustments| rather
 | 
| +// than |offset[s]_for_adjustment|.  |adjustments| will be set to reflect all
 | 
| +// the transformations that happened to |url| to convert it into the returned
 | 
| +// value.
 | 
| +base::string16 FormatUrlWithAdjustments(
 | 
| +    const GURL& url,
 | 
| +    const std::string& languages,
 | 
| +    FormatUrlTypes format_types,
 | 
| +    net::UnescapeRule::Type unescape_rules,
 | 
| +    url::Parsed* new_parsed,
 | 
| +    size_t* prefix_end,
 | 
| +    base::OffsetAdjuster::Adjustments* adjustments);
 | 
| +
 | 
| +// This is a convenience function for FormatUrl() with
 | 
| +// format_types = kFormatUrlOmitAll and unescape = SPACES.  This is the typical
 | 
| +// set of flags for "URLs to display to the user".  You should be cautious about
 | 
| +// using this for URLs which will be parsed or sent to other applications.
 | 
| +inline base::string16 FormatUrl(const GURL& url, const std::string& languages) {
 | 
| +  return FormatUrl(url, languages, kFormatUrlOmitAll, net::UnescapeRule::SPACES,
 | 
| +                   nullptr, nullptr, nullptr);
 | 
| +}
 | 
| +
 | 
| +// Returns whether FormatUrl() would strip a trailing slash from |url|, given a
 | 
| +// format flag including kFormatUrlOmitTrailingSlashOnBareHostname.
 | 
| +bool CanStripTrailingSlash(const GURL& url);
 | 
| +
 | 
| +// Formats the host in |url| and appends it to |output|.  The host formatter
 | 
| +// takes the same accept languages component as ElideURL().
 | 
| +void AppendFormattedHost(const GURL& url,
 | 
| +                         const std::string& languages,
 | 
| +                         base::string16* output);
 | 
| +
 | 
| +// Converts the given host name to unicode characters. This can be called for
 | 
| +// any host name, if the input is not IDN or is invalid in some way, we'll just
 | 
| +// return the ASCII source so it is still usable.
 | 
| +//
 | 
| +// The input should be the canonicalized ASCII host name from GURL. This
 | 
| +// function does NOT accept UTF-8!
 | 
| +//
 | 
| +// |languages| is a comma separated list of ISO 639 language codes. It
 | 
| +// is used to determine whether a hostname is 'comprehensible' to a user
 | 
| +// who understands languages listed. |host| will be converted to a
 | 
| +// human-readable form (Unicode) ONLY when each component of |host| is
 | 
| +// regarded as 'comprehensible'. Scipt-mixing is not allowed except that
 | 
| +// Latin letters in the ASCII range can be mixed with a limited set of
 | 
| +// script-language pairs (currently Han, Kana and Hangul for zh,ja and ko).
 | 
| +// When |languages| is empty, even that mixing is not allowed.
 | 
| +base::string16 IDNToUnicode(const std::string& host,
 | 
| +                            const std::string& languages);
 | 
| +
 | 
| +}  // url_formatter
 | 
| +
 | 
| +#endif  // COMPONENTS_URL_FORMATTER_URL_FORMATTER_H_
 | 
| 
 |