Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(303)

Unified Diff: components/url_formatter/url_formatter.h

Issue 1171333003: Move net::FormatUrl and friends outside of //net and into //components (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Rebase again now that CQ is fixed Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « components/url_formatter/url_fixer_unittest.cc ('k') | components/url_formatter/url_formatter.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: components/url_formatter/url_formatter.h
diff --git a/components/url_formatter/url_formatter.h b/components/url_formatter/url_formatter.h
new file mode 100644
index 0000000000000000000000000000000000000000..01c8795ce0662e0edc465e1505e515923b26173b
--- /dev/null
+++ b/components/url_formatter/url_formatter.h
@@ -0,0 +1,155 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// url_formatter contains routines for formatting URLs in a way that can be
+// safely and securely displayed to users. For example, it is responsible
+// for determining when to convert an IDN A-Label (e.g. "xn--[something]")
+// into the IDN U-Label.
+//
+// Note that this formatting is only intended for display purposes; it would
+// be insecure and insufficient to make comparisons solely on formatted URLs
+// (that is, it should not be used for normalizing URLs for comparison for
+// security decisions).
+
+#ifndef COMPONENTS_URL_FORMATTER_URL_FORMATTER_H_
+#define COMPONENTS_URL_FORMATTER_URL_FORMATTER_H_
+
+#include <stdint.h>
+
+#include <string>
+#include <vector>
+
+#include "base/strings/string16.h"
+#include "base/strings/utf_offset_string_conversions.h"
+#include "net/base/escape.h"
+
+class GURL;
+
+namespace url {
+struct Parsed;
+} // url
+
+namespace url_formatter {
+
+// Used by FormatUrl to specify handling of certain parts of the url.
+typedef uint32_t FormatUrlType;
+typedef uint32_t FormatUrlTypes;
+
+// Nothing is ommitted.
+extern const FormatUrlType kFormatUrlOmitNothing;
+
+// If set, any username and password are removed.
+extern const FormatUrlType kFormatUrlOmitUsernamePassword;
+
+// If the scheme is 'http://', it's removed.
+extern const FormatUrlType kFormatUrlOmitHTTP;
+
+// Omits the path if it is just a slash and there is no query or ref. This is
+// meaningful for non-file "standard" URLs.
+extern const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname;
+
+// Convenience for omitting all unecessary types.
+extern const FormatUrlType kFormatUrlOmitAll;
+
+// Creates a string representation of |url|. The IDN host name may be in Unicode
+// if |languages| accepts the Unicode representation. |format_type| is a bitmask
+// of FormatUrlTypes, see it for details. |unescape_rules| defines how to clean
+// the URL for human readability. You will generally want |UnescapeRule::SPACES|
+// for display to the user if you can handle spaces, or |UnescapeRule::NORMAL|
+// if not. If the path part and the query part seem to be encoded in %-encoded
+// UTF-8, decodes %-encoding and UTF-8.
+//
+// The last three parameters may be NULL.
+//
+// |new_parsed| will be set to the parsing parameters of the resultant URL.
+//
+// |prefix_end| will be the length before the hostname of the resultant URL.
+//
+// |offset[s]_for_adjustment| specifies one or more offsets into the original
+// URL, representing insertion or selection points between characters: if the
+// input is "http://foo.com/", offset 0 is before the entire URL, offset 7 is
+// between the scheme and the host, and offset 15 is after the end of the URL.
+// Valid input offsets range from 0 to the length of the input URL string. On
+// exit, each offset will have been modified to reflect any changes made to the
+// output string. For example, if |url| is "http://a:b@c.com/",
+// |omit_username_password| is true, and an offset is 12 (pointing between 'c'
+// and '.'), then on return the output string will be "http://c.com/" and the
+// offset will be 8. If an offset cannot be successfully adjusted (e.g. because
+// it points into the middle of a component that was entirely removed or into
+// the middle of an encoding sequence), it will be set to base::string16::npos.
+// For consistency, if an input offset points between the scheme and the
+// username/password, and both are removed, on output this offset will be 0
+// rather than npos; this means that offsets at the starts and ends of removed
+// components are always transformed the same way regardless of what other
+// components are adjacent.
+base::string16 FormatUrl(const GURL& url,
+ const std::string& languages,
+ FormatUrlTypes format_types,
+ net::UnescapeRule::Type unescape_rules,
+ url::Parsed* new_parsed,
+ size_t* prefix_end,
+ size_t* offset_for_adjustment);
+
+base::string16 FormatUrlWithOffsets(
+ const GURL& url,
+ const std::string& languages,
+ FormatUrlTypes format_types,
+ net::UnescapeRule::Type unescape_rules,
+ url::Parsed* new_parsed,
+ size_t* prefix_end,
+ std::vector<size_t>* offsets_for_adjustment);
+
+// This function is like those above except it takes |adjustments| rather
+// than |offset[s]_for_adjustment|. |adjustments| will be set to reflect all
+// the transformations that happened to |url| to convert it into the returned
+// value.
+base::string16 FormatUrlWithAdjustments(
+ const GURL& url,
+ const std::string& languages,
+ FormatUrlTypes format_types,
+ net::UnescapeRule::Type unescape_rules,
+ url::Parsed* new_parsed,
+ size_t* prefix_end,
+ base::OffsetAdjuster::Adjustments* adjustments);
+
+// This is a convenience function for FormatUrl() with
+// format_types = kFormatUrlOmitAll and unescape = SPACES. This is the typical
+// set of flags for "URLs to display to the user". You should be cautious about
+// using this for URLs which will be parsed or sent to other applications.
+inline base::string16 FormatUrl(const GURL& url, const std::string& languages) {
+ return FormatUrl(url, languages, kFormatUrlOmitAll, net::UnescapeRule::SPACES,
+ nullptr, nullptr, nullptr);
+}
+
+// Returns whether FormatUrl() would strip a trailing slash from |url|, given a
+// format flag including kFormatUrlOmitTrailingSlashOnBareHostname.
+bool CanStripTrailingSlash(const GURL& url);
+
+// Formats the host in |url| and appends it to |output|. The host formatter
+// takes the same accept languages component as ElideURL().
+void AppendFormattedHost(const GURL& url,
+ const std::string& languages,
+ base::string16* output);
+
+// Converts the given host name to unicode characters. This can be called for
+// any host name, if the input is not IDN or is invalid in some way, we'll just
+// return the ASCII source so it is still usable.
+//
+// The input should be the canonicalized ASCII host name from GURL. This
+// function does NOT accept UTF-8!
+//
+// |languages| is a comma separated list of ISO 639 language codes. It
+// is used to determine whether a hostname is 'comprehensible' to a user
+// who understands languages listed. |host| will be converted to a
+// human-readable form (Unicode) ONLY when each component of |host| is
+// regarded as 'comprehensible'. Scipt-mixing is not allowed except that
+// Latin letters in the ASCII range can be mixed with a limited set of
+// script-language pairs (currently Han, Kana and Hangul for zh,ja and ko).
+// When |languages| is empty, even that mixing is not allowed.
+base::string16 IDNToUnicode(const std::string& host,
+ const std::string& languages);
+
+} // url_formatter
+
+#endif // COMPONENTS_URL_FORMATTER_URL_FORMATTER_H_
« no previous file with comments | « components/url_formatter/url_fixer_unittest.cc ('k') | components/url_formatter/url_formatter.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698