OLD | NEW |
| (Empty) |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #ifndef NET_BASE_ESCAPE_H_ | |
6 #define NET_BASE_ESCAPE_H_ | |
7 | |
8 #include <string> | |
9 #include <vector> | |
10 | |
11 #include "base/basictypes.h" | |
12 #include "base/strings/string16.h" | |
13 #include "base/strings/utf_offset_string_conversions.h" | |
14 #include "net/base/net_export.h" | |
15 | |
16 namespace net { | |
17 | |
18 // Escaping -------------------------------------------------------------------- | |
19 | |
20 // Escapes characters in text suitable for use as a query parameter value. | |
21 // We %XX everything except alphanumerics and -_.!~*'() | |
22 // Spaces change to "+" unless you pass usePlus=false. | |
23 // This is basically the same as encodeURIComponent in javascript. | |
24 NET_EXPORT std::string EscapeQueryParamValue(const std::string& text, | |
25 bool use_plus); | |
26 | |
27 // Escapes a partial or complete file/pathname. This includes: | |
28 // non-printable, non-7bit, and (including space) "#%:<>?[\]^`{|} | |
29 // For the base::string16 version, we attempt a conversion to |codepage| before | |
30 // encoding the string. If this conversion fails, we return false. | |
31 NET_EXPORT std::string EscapePath(const std::string& path); | |
32 | |
33 #if defined(OS_MACOSX) | |
34 // Escapes characters as per expectations of NSURL. This includes: | |
35 // non-printable, non-7bit, and (including space) "#%<>[\]^`{|} | |
36 NET_EXPORT std::string EscapeNSURLPrecursor(const std::string& precursor); | |
37 #endif // defined(OS_MACOSX) | |
38 | |
39 // Escapes application/x-www-form-urlencoded content. This includes: | |
40 // non-printable, non-7bit, and (including space) ?>=<;+'&%$#"![\]^`{|} | |
41 // Space is escaped as + (if use_plus is true) and other special characters | |
42 // as %XX (hex). | |
43 NET_EXPORT std::string EscapeUrlEncodedData(const std::string& path, | |
44 bool use_plus); | |
45 | |
46 // Escapes all non-ASCII input. | |
47 NET_EXPORT std::string EscapeNonASCII(const std::string& input); | |
48 | |
49 // Escapes characters in text suitable for use as an external protocol handler | |
50 // command. | |
51 // We %XX everything except alphanumerics and -_.!~*'() and the restricted | |
52 // chracters (;/?:@&=+$,#[]) and a valid percent escape sequence (%XX). | |
53 NET_EXPORT std::string EscapeExternalHandlerValue(const std::string& text); | |
54 | |
55 // Appends the given character to the output string, escaping the character if | |
56 // the character would be interpretted as an HTML delimiter. | |
57 NET_EXPORT void AppendEscapedCharForHTML(char c, std::string* output); | |
58 | |
59 // Escapes chars that might cause this text to be interpretted as HTML tags. | |
60 NET_EXPORT std::string EscapeForHTML(const std::string& text); | |
61 NET_EXPORT base::string16 EscapeForHTML(const base::string16& text); | |
62 | |
63 // Unescaping ------------------------------------------------------------------ | |
64 | |
65 class UnescapeRule { | |
66 public: | |
67 // A combination of the following flags that is passed to the unescaping | |
68 // functions. | |
69 typedef uint32 Type; | |
70 | |
71 enum { | |
72 // Don't unescape anything at all. | |
73 NONE = 0, | |
74 | |
75 // Don't unescape anything special, but all normal unescaping will happen. | |
76 // This is a placeholder and can't be combined with other flags (since it's | |
77 // just the absence of them). All other unescape rules imply "normal" in | |
78 // addition to their special meaning. Things like escaped letters, digits, | |
79 // and most symbols will get unescaped with this mode. | |
80 NORMAL = 1, | |
81 | |
82 // Convert %20 to spaces. In some places where we're showing URLs, we may | |
83 // want this. In places where the URL may be copied and pasted out, then | |
84 // you wouldn't want this since it might not be interpreted in one piece | |
85 // by other applications. | |
86 SPACES = 2, | |
87 | |
88 // Unescapes various characters that will change the meaning of URLs, | |
89 // including '%', '+', '&', '/', '#'. If we unescaped these characters, the | |
90 // resulting URL won't be the same as the source one. This flag is used when | |
91 // generating final output like filenames for URLs where we won't be | |
92 // interpreting as a URL and want to do as much unescaping as possible. | |
93 URL_SPECIAL_CHARS = 4, | |
94 | |
95 // Unescapes control characters such as %01. This INCLUDES NULLs. This is | |
96 // used for rare cases such as data: URL decoding where the result is binary | |
97 // data. This flag also unescapes BiDi control characters. | |
98 // | |
99 // DO NOT use CONTROL_CHARS if the URL is going to be displayed in the UI | |
100 // for security reasons. | |
101 CONTROL_CHARS = 8, | |
102 | |
103 // URL queries use "+" for space. This flag controls that replacement. | |
104 REPLACE_PLUS_WITH_SPACE = 16, | |
105 }; | |
106 }; | |
107 | |
108 // Unescapes |escaped_text| and returns the result. | |
109 // Unescaping consists of looking for the exact pattern "%XX", where each X is | |
110 // a hex digit, and converting to the character with the numerical value of | |
111 // those digits. Thus "i%20=%203%3b" unescapes to "i = 3;". | |
112 // | |
113 // Watch out: this doesn't necessarily result in the correct final result, | |
114 // because the encoding may be unknown. For example, the input might be ASCII, | |
115 // which, after unescaping, is supposed to be interpreted as UTF-8, and then | |
116 // converted into full UTF-16 chars. This function won't tell you if any | |
117 // conversions need to take place, it only unescapes. | |
118 NET_EXPORT std::string UnescapeURLComponent(const std::string& escaped_text, | |
119 UnescapeRule::Type rules); | |
120 NET_EXPORT base::string16 UnescapeURLComponent( | |
121 const base::string16& escaped_text, | |
122 UnescapeRule::Type rules); | |
123 | |
124 // Unescapes the given substring as a URL, and then tries to interpret the | |
125 // result as being encoded as UTF-8. If the result is convertable into UTF-8, it | |
126 // will be returned as converted. If it is not, the original escaped string will | |
127 // be converted into a base::string16 and returned. |adjustments| provides | |
128 // information on how the original string was adjusted to get the string | |
129 // returned. | |
130 NET_EXPORT base::string16 UnescapeAndDecodeUTF8URLComponent( | |
131 const std::string& text, | |
132 UnescapeRule::Type rules); | |
133 NET_EXPORT base::string16 UnescapeAndDecodeUTF8URLComponentWithAdjustments( | |
134 const std::string& text, | |
135 UnescapeRule::Type rules, | |
136 base::OffsetAdjuster::Adjustments* adjustments); | |
137 | |
138 // Unescapes the following ampersand character codes from |text|: | |
139 // < > & " ' | |
140 NET_EXPORT base::string16 UnescapeForHTML(const base::string16& text); | |
141 | |
142 } // namespace net | |
143 | |
144 #endif // NET_BASE_ESCAPE_H_ | |
OLD | NEW |